36std::pair<std::chrono::microseconds, std::chrono::microseconds>
39 using namespace std::chrono;
42 std::pair<std::chrono::microseconds, std::chrono::microseconds> ret;
45 constexpr std::size_t nIterations = 5;
46 for (std::size_t i = 0; i < nIterations; i++) {
47 auto start = steady_clock::now();
49 ret.first += duration_cast<microseconds>(steady_clock::now() - start);
50 start = steady_clock::now();
52 ret.second += duration_cast<microseconds>(steady_clock::now() - start);
54 ret.first /= nIterations;
55 ret.second /= nIterations;
A templated class for managing an array of data using a specified memory type.
std::pair< std::chrono::microseconds, std::chrono::microseconds > memcpyBenchmark(std::size_t nBytes)
Measure the time taken to perform memory copy operations between host and device memory.
void copyDeviceToHost(const T *src, T *dest, std::size_t n, CudaStream *=nullptr)
Copies data from the CUDA device to the host.
void copyHostToDevice(const T *src, T *dest, std::size_t n, CudaStream *=nullptr)
Copies data from the host to the CUDA device.