20 using namespace std::chrono;
22 std::pair<std::chrono::microseconds, std::chrono::microseconds> ret;
25 for (
int i = 0; i < 5; i++) {
26 auto start = steady_clock::now();
28 ret.first += duration_cast<microseconds>(steady_clock::now() - start);
29 start = steady_clock::now();
31 ret.second += duration_cast<microseconds>(steady_clock::now() - start);
virtual void * cudaMalloc(size_t)
virtual void memcpyToCPU(void *, const void *, size_t, cudaStream_t *=nullptr)
virtual void cudaFree(void *)
virtual void * cudaMallocHost(size_t)
virtual void memcpyToCUDA(void *, const void *, size_t, cudaStream_t *=nullptr)
virtual void cudaFreeHost(void *)
R__EXTERN RooBatchComputeInterface * dispatchCUDA
std::pair< std::chrono::microseconds, std::chrono::microseconds > memcpyBenchmark(std::size_t nBytes)
Measure the time for transfering data from host to device and vice-versa.