I'm trying to compare GPU to CPU performance. For the NVIDIA GPU I've been using the cudaEvent_t
types to get a very precise timing.
For the CPU I've been using the following code:
// Timers
clock_t start, stop;
float elapsedTime = 0;
// Capture the start time
start = clock();
// Do something here
.......
// Capture the stop time
stop = clock();
// Retrieve time elapsed in milliseconds
elapsedTime = (float)(stop - start) / (float)CLOCKS_PER_SEC * 1000.0f;
Apparently, that piece of code is only good if you're counting in seconds. Also, the results sometime come out quite strange.
Does anyone know of some way to create a high resolution timer in Linux?
Check out clock_gettime
, which is a POSIX interface to high-resolution timers.
If, having read the manpage, you're left wondering about the difference between CLOCK_REALTIME
and CLOCK_MONOTONIC
, see Difference between CLOCK_REALTIME and CLOCK_MONOTONIC?
See the following page for a complete example: http://www.guyrutenberg.com/2007/09/22/profiling-code-using-clock_gettime/
#include <iostream>
#include <time.h>
using namespace std;
timespec diff(timespec start, timespec end);
int main()
{
timespec time1, time2;
int temp;
clock_gettime(CLOCK_PROCESS_CPUTIME_ID, &time1);
for (int i = 0; i< 242000000; i++)
temp+=temp;
clock_gettime(CLOCK_PROCESS_CPUTIME_ID, &time2);
cout<<diff(time1,time2).tv_sec<<":"<<diff(time1,time2).tv_nsec<<endl;
return 0;
}
timespec diff(timespec start, timespec end)
{
timespec temp;
if ((end.tv_nsec-start.tv_nsec)<0) {
temp.tv_sec = end.tv_sec-start.tv_sec-1;
temp.tv_nsec = 1000000000+end.tv_nsec-start.tv_nsec;
} else {
temp.tv_sec = end.tv_sec-start.tv_sec;
temp.tv_nsec = end.tv_nsec-start.tv_nsec;
}
return temp;
}