clock() is not accurate enough.
+5
A:
Use CUDA events for measure time of kernels or CUDA operations (memcpy etc):
// Prepare
cudaEvent_t start, stop;
cudaEventCreate(&start);
cudaEventCreate(&stop);
// Start record
cudaEventRecord(start, 0);
// Do something on GPU
MyKernel<<<dimGrid, dimBlock>>>(input_data, output_data);
// Stop event
cudaEventRecord(stop, 0);
cudaEventSynchronize(stop);
float elapsedTime;
cudaEventElapsedTime(&elapsedTime, start, stop); // that's our time!
// Clean up:
cudaEventDestroy(start);
cudaEventDestroy(stop);
See CUDA Programming Guide, section 3.2.7.6
KoppeKTop
2010-08-25 07:14:44