Logo ROOT  
Reference Guide
 
Loading...
Searching...
No Matches
CudaHelpers.cxx
Go to the documentation of this file.
1/*
2 * Project: RooFit
3 * Authors:
4 * Jonas Rembser, CERN 2022
5 *
6 * Copyright (c) 2023, CERN
7 *
8 * Redistribution and use in source and binary forms,
9 * with or without modification, are permitted according to the terms
10 * listed in LICENSE (http://roofit.sourceforge.net/license.txt)
11 */
12
13#include "CudaHelpers.h"
14
16
17/**
18 * @brief Measure the time taken to perform memory copy operations between host and device memory.
19 * @param nBytes The number of bytes to be copied between host and device memory.
20 * @return A pair of durations representing the average time taken for host-to-device and device-to-host copies.
21 *
22 * This function measures the time taken to copy data between host and device memory using the CUDA API.
23 * It performs a series of copy operations and calculates the average time for both directions.
24 * The input parameter `nBytes` specifies the size of the data to be copied in bytes.
25 * The function returns a pair of durations, where the first duration represents the average time
26 * taken for host-to-device copies and the second duration represents the average time taken for
27 * device-to-host copies.
28 *
29 * Example usage:
30 * @code
31 * auto copyTimes = RooFit::Detail::CudaHelpers::memcpyBenchmark(1024 * 1024); // Measure copy time for 1 MB of data
32 * std::cout << "Average Host-to-Device Copy Time: " << copyTimes.first.count() << " microseconds" << std::endl;
33 * std::cout << "Average Device-to-Host Copy Time: " << copyTimes.second.count() << " microseconds" << std::endl;
34 * @endcode
35 */
36std::pair<std::chrono::microseconds, std::chrono::microseconds>
38{
39 using namespace std::chrono;
41
42 std::pair<std::chrono::microseconds, std::chrono::microseconds> ret;
45 constexpr std::size_t nIterations = 5;
46 for (std::size_t i = 0; i < nIterations; i++) {
47 auto start = steady_clock::now();
48 CudaInterface::copyHostToDevice(hostArr.data(), deviArr.data(), nBytes);
49 ret.first += duration_cast<microseconds>(steady_clock::now() - start);
50 start = steady_clock::now();
51 CudaInterface::copyDeviceToHost(deviArr.data(), hostArr.data(), nBytes);
52 ret.second += duration_cast<microseconds>(steady_clock::now() - start);
53 }
54 ret.first /= nIterations;
55 ret.second /= nIterations;
56 return ret;
57}
A templated class for managing an array of data using a specified memory type.
std::pair< std::chrono::microseconds, std::chrono::microseconds > memcpyBenchmark(std::size_t nBytes)
Measure the time taken to perform memory copy operations between host and device memory.
void copyDeviceToHost(const T *src, T *dest, std::size_t n, CudaStream *=nullptr)
Copies data from the CUDA device to the host.
void copyHostToDevice(const T *src, T *dest, std::size_t n, CudaStream *=nullptr)
Copies data from the host to the CUDA device.