Logo ROOT  
Reference Guide
 
Loading...
Searching...
No Matches
CudaInterface.h
Go to the documentation of this file.
1/*
2 * Project: RooFit
3 * Author:
4 * Jonas Rembser, CERN 2023
5 *
6 * Copyright (c) 2023, CERN
7 *
8 * Redistribution and use in source and binary forms,
9 * with or without modification, are permitted according to the terms
10 * listed in LICENSE (http://roofit.sourceforge.net/license.txt)
11 */
12
13#ifndef CudaInterface_h
14#define CudaInterface_h
15
16#include <cstddef>
17#include <memory>
18
19namespace RooBatchCompute {
20
21/*
22 * C++ interface around CUDA functionality.
23 *
24 * Generally, if the call to the underlying CUDA function does not return
25 * `cudaSuccess`, a `std::runtime_error` is thrown.
26 *
27 * \ingroup RooFitCuda
28 */
29namespace CudaInterface {
30
31/// \cond ROOFIT_INTERNAL
32
33template <class T>
34struct Deleter {
35 void operator()(void *ptr);
36};
37
38/// \endcond
39
40/*
41 * Wrapper around cudaEvent_t.
42 */
43class CudaEvent {
44public:
45 CudaEvent(bool forTiming);
46
47// When compiling with NVCC, we allow setting and getting the actual CUDA objects from the wrapper.
48#ifdef __CUDACC__
49 inline operator cudaEvent_t() { return *reinterpret_cast<cudaEvent_t *>(_ptr.get()); }
50#endif
51private:
52 std::unique_ptr<void, Deleter<CudaEvent>> _ptr;
53};
54
55/*
56 * Wrapper around cudaStream_t.
57 */
59public:
60 CudaStream();
61
62 bool isActive();
63 void waitForEvent(CudaEvent &);
64
65// When compiling with NVCC, we allow setting and getting the actual CUDA objects from the wrapper.
66#ifdef __CUDACC__
67 inline cudaStream_t *get() { return reinterpret_cast<cudaStream_t *&>(_ptr); }
68 inline operator cudaStream_t() { return *reinterpret_cast<cudaStream_t *>(_ptr.get()); }
69#endif
70private:
71 std::unique_ptr<void, Deleter<CudaStream>> _ptr;
72};
73
76
77/// \cond ROOFIT_INTERNAL
78void copyHostToDeviceImpl(const void *src, void *dest, std::size_t n, CudaStream * = nullptr);
79void copyDeviceToHostImpl(const void *src, void *dest, std::size_t n, CudaStream * = nullptr);
80void copyDeviceToDeviceImpl(const void *src, void *dest, std::size_t n, CudaStream * = nullptr);
81/// \endcond
82
83/**
84 * Copies data from the host to the CUDA device.
85 *
86 * @param[in] src Pointer to the source memory on the host.
87 * @param[in] dest Pointer to the destination memory on the device.
88 * @param[in] nBytes Number of bytes to copy.
89 * @param[in] stream CudaStream for asynchronous memory transfer (optional).
90 */
91template <class T>
92void copyHostToDevice(const T *src, T *dest, std::size_t n, CudaStream * = nullptr)
93{
94 copyHostToDeviceImpl(src, dest, sizeof(T) * n);
95}
96
97/**
98 * Copies data from the CUDA device to the host.
99 *
100 * @param[in] src Pointer to the source memory on the device.
101 * @param[in] dest Pointer to the destination memory on the host.
102 * @param[in] nBytes Number of bytes to copy.
103 * @param[in] stream CudaStream for asynchronous memory transfer (optional).
104 */
105template <class T>
106void copyDeviceToHost(const T *src, T *dest, std::size_t n, CudaStream * = nullptr)
107{
108 copyDeviceToHostImpl(src, dest, sizeof(T) * n);
109}
110
111/**
112 * Copies data from the CUDA device to the CUDA device.
113 *
114 * @param[in] src Pointer to the source memory on the device.
115 * @param[in] dest Pointer to the destination memory on the device.
116 * @param[in] nBytes Number of bytes to copy.
117 * @param[in] stream CudaStream for asynchronous memory transfer (optional).
118 */
119template <class T>
120void copyDeviceToDevice(const T *src, T *dest, std::size_t n, CudaStream * = nullptr)
121{
122 copyDeviceToDeviceImpl(src, dest, sizeof(T) * n);
123}
124
125/// \cond ROOFIT_INTERNAL
126
127// The user should not use these "Memory" classes directly, but instead the typed
128// "Array" classes. That's why we tell doxygen that this is internal.
129
130class DeviceMemory {
131public:
132 DeviceMemory(std::size_t n, std::size_t typeSize);
133
134 std::size_t size() const { return _size; }
135 void *data() { return _data.get(); }
136 void const *data() const { return _data.get(); }
137
138private:
139 std::unique_ptr<void, Deleter<DeviceMemory>> _data;
140 std::size_t _size = 0;
141};
142
143class PinnedHostMemory {
144public:
145 PinnedHostMemory(std::size_t n, std::size_t typeSize);
146
147 std::size_t size() const { return _size; }
148 void *data() { return _data.get(); }
149 void const *data() const { return _data.get(); }
150
151private:
152 std::unique_ptr<void, Deleter<PinnedHostMemory>> _data;
153 std::size_t _size = 0;
154};
155/// \endcond
156
157/**
158 * @class Array
159 * @brief A templated class for managing an array of data using a specified memory type.
160 *
161 * The Array class provides a convenient interface for managing an array of
162 * data using different memory types (e.g., memory on the host or device).
163 * The memory is automatically freed at the end of the lifetime.
164 *
165 * @tparam Data_t The type of data elements to be stored in the array.
166 * @tparam Memory_t The type of memory that provides storage for the array.
167 */
168template <class Data_t, class Memory_t>
169class Array : public Memory_t {
170public:
171 /**
172 * @brief Constructor to create an Array object with a specified size.
173 * @param n The size of the array (number of elements).
174 */
175 Array(std::size_t n) : Memory_t{n, sizeof(Data_t)} {}
176
177 // Needs to be declared explicitly for doxygen to mention it.
178 /**
179 * @brief Get the size of the array.
180 * @return The size of the array (number of elements).
181 *
182 * This function returns the number of elements in the array.
183 */
184 inline std::size_t size() const { return Memory_t::size(); }
185
186 /**
187 * @brief Get a pointer to the start of the array.
188 * @return A pointer to the start of the array.
189 *
190 * This function returns a pointer to the underlying memory.
191 * It allows direct manipulation of array elements.
192 */
193 inline Data_t *data() { return static_cast<Data_t *>(Memory_t::data()); }
194
195 /**
196 * @brief Get a const pointer to the start of the array.
197 * @return A const pointer to the start of the array.
198 *
199 * This function returns a const pointer to the underlying memory.
200 * It allows read-only access to array elements.
201 */
202 inline Data_t const *data() const { return static_cast<Data_t const *>(Memory_t::data()); }
203};
204
205/**
206 * An array of specific type that is allocated on the device with `cudaMalloc` and freed with `cudaFree`.
207 */
208template <class T>
210
211/**
212 * A pinned array of specific type that allocated on the host with `cudaMallocHost` and freed with `cudaFreeHost`.
213 * The memory is "pinned", i.e. page-locked and accessible to the device for fast copying.
214 * \see The documentation of `cudaMallocHost` on <a
215 * href="https://developer.download.nvidia.com/compute/DevZone/docs/html/C/doc/html/group__CUDART__HIGHLEVEL_ge439496de696b166ba457dab5dd4f356.html">developer.download.nvidia.com</a>.
216 */
217template <class T>
219
220} // namespace CudaInterface
221} // namespace RooBatchCompute
222
223#endif
RooAbsData * _data
Pointer to original input dataset.
size_t size(const MatrixT &matrix)
retrieve the size of a square matrix
ROOT::Detail::TRangeCast< T, true > TRangeDynCast
TRangeDynCast is an adapter class that allows the typed iteration through a TCollection.
Option_t Option_t TPoint TPoint const char GetTextMagnitude GetFillStyle GetLineColor GetLineWidth GetMarkerStyle GetTextAlign GetTextColor GetTextSize void char Point_t Rectangle_t dest
Option_t Option_t TPoint TPoint const char GetTextMagnitude GetFillStyle GetLineColor GetLineWidth GetMarkerStyle GetTextAlign GetTextColor GetTextSize void data
Option_t Option_t TPoint TPoint const char GetTextMagnitude GetFillStyle GetLineColor GetLineWidth GetMarkerStyle GetTextAlign GetTextColor GetTextSize void char Point_t Rectangle_t src
TRObject operator()(const T1 &t1) const
virtual const RooArgSet * get() const
Definition RooAbsData.h:101
A templated class for managing an array of data using a specified memory type.
Array(std::size_t n)
Constructor to create an Array object with a specified size.
Data_t * data()
Get a pointer to the start of the array.
Data_t const * data() const
Get a const pointer to the start of the array.
std::size_t size() const
Get the size of the array.
CudaEvent(bool forTiming)
Creates a new CUDA event.
std::unique_ptr< void, Deleter< CudaEvent > > _ptr
bool isActive()
Checks if a CUDA stream is currently active.
std::unique_ptr< void, Deleter< CudaStream > > _ptr
void waitForEvent(CudaEvent &)
Makes a CUDA stream wait for a CUDA event.
const Int_t n
Definition legend1.C:16
void copyDeviceToDevice(const T *src, T *dest, std::size_t n, CudaStream *=nullptr)
Copies data from the CUDA device to the CUDA device.
void cudaEventRecord(CudaEvent &event, CudaStream &stream)
Records a CUDA event.
void copyHostToDevice(const T *src, T *dest, std::size_t n, CudaStream *=nullptr)
Copies data from the host to the CUDA device.
void copyDeviceToHost(const T *src, T *dest, std::size_t n, CudaStream *=nullptr)
Copies data from the CUDA device to the host.
float cudaEventElapsedTime(CudaEvent &begin, CudaEvent &end)
Calculates the elapsed time between two CUDA events.
Namespace for dispatching RooFit computations to various backends.