Logo ROOT  
Reference Guide
 
Loading...
Searching...
No Matches
CudaInterface.h
Go to the documentation of this file.
1/*
2 * Project: RooFit
3 * Author:
4 * Jonas Rembser, CERN 2023
5 *
6 * Copyright (c) 2023, CERN
7 *
8 * Redistribution and use in source and binary forms,
9 * with or without modification, are permitted according to the terms
10 * listed in LICENSE (http://roofit.sourceforge.net/license.txt)
11 */
12
13#ifndef RooFit_Detail_CudaInterface_h
14#define RooFit_Detail_CudaInterface_h
15
16#include <cstddef>
17#include <memory>
18
19namespace RooFit {
20namespace Detail {
21/*
22 * C++ interface around CUDA functionality.
23 *
24 * Generally, if the call to the underlying CUDA function does not return
25 * `cudaSuccess`, a `std::runtime_error` is thrown.
26 *
27 * \ingroup RooFitCuda
28 */
29namespace CudaInterface {
30
31/// \cond ROOFIT_INTERNAL
32
33template <class T>
34struct Deleter {
35 void operator()(void *ptr);
36};
37
38/// \endcond
39
40/*
41 * Wrapper around cudaEvent_t.
42 */
43class CudaEvent {
44public:
45 CudaEvent(bool /*forTiming*/);
46
47// When compiling with NVCC, we allow setting and getting the actual CUDA objects from the wrapper.
48#ifdef __CUDACC__
49 inline operator cudaEvent_t() { return *reinterpret_cast<cudaEvent_t *>(_ptr.get()); }
50#endif
51private:
52 std::unique_ptr<void, Deleter<CudaEvent>> _ptr;
53};
54
55/*
56 * Wrapper around cudaStream_t.
57 */
59public:
60 CudaStream();
61
62 bool isActive();
63 void waitForEvent(CudaEvent &);
64
65// When compiling with NVCC, we allow setting and getting the actual CUDA objects from the wrapper.
66#ifdef __CUDACC__
67 inline cudaStream_t *get() { return reinterpret_cast<cudaStream_t *&>(_ptr); }
68 inline operator cudaStream_t() { return *reinterpret_cast<cudaStream_t *>(_ptr.get()); }
69#endif
70private:
71 std::unique_ptr<void, Deleter<CudaStream>> _ptr;
72};
73
76
77/// \cond ROOFIT_INTERNAL
78void copyHostToDeviceImpl(const void *src, void *dest, std::size_t n, CudaStream * = nullptr);
79void copyDeviceToHostImpl(const void *src, void *dest, std::size_t n, CudaStream * = nullptr);
80/// \endcond
81
82/**
83 * Copies data from the host to the CUDA device.
84 *
85 * @param[in] src Pointer to the source memory on the host.
86 * @param[in] dest Pointer to the destination memory on the device.
87 * @param[in] nBytes Number of bytes to copy.
88 * @param[in] stream CudaStream for asynchronous memory transfer (optional).
89 */
90template <class T>
91void copyHostToDevice(const T *src, T *dest, std::size_t n, CudaStream * = nullptr)
92{
93 copyHostToDeviceImpl(src, dest, sizeof(T) * n);
94}
95
96/**
97 * Copies data from the CUDA device to the host.
98 *
99 * @param[in] src Pointer to the source memory on the device.
100 * @param[in] dest Pointer to the destination memory on the host.
101 * @param[in] nBytes Number of bytes to copy.
102 * @param[in] stream CudaStream for asynchronous memory transfer (optional).
103 */
104template <class T>
105void copyDeviceToHost(const T *src, T *dest, std::size_t n, CudaStream * = nullptr)
106{
107 copyDeviceToHostImpl(src, dest, sizeof(T) * n);
108}
109
110/// \cond ROOFIT_INTERNAL
111
112// The user should not use these "Memory" classes directly, but instead the typed
113// "Array" classes. That's why we tell doxygen that this is internal.
114
115class DeviceMemory {
116public:
117 DeviceMemory(std::size_t n, std::size_t typeSize);
118
119 std::size_t size() const { return _size; }
120 void *data() { return _data.get(); }
121 void const *data() const { return _data.get(); }
122
123private:
124 std::unique_ptr<void, Deleter<DeviceMemory>> _data;
125 std::size_t _size = 0;
126};
127
128class PinnedHostMemory {
129public:
130 PinnedHostMemory(std::size_t n, std::size_t typeSize);
131
132 std::size_t size() const { return _size; }
133 void *data() { return _data.get(); }
134 void const *data() const { return _data.get(); }
135
136private:
137 std::unique_ptr<void, Deleter<PinnedHostMemory>> _data;
138 std::size_t _size = 0;
139};
140/// \endcond
141
142
143/**
144 * @class Array
145 * @brief A templated class for managing an array of data using a specified memory type.
146 *
147 * The Array class provides a convenient interface for managing an array of
148 * data using different memory types (e.g., memory on the host or device).
149 * The memory is automatically freed at the end of the lifetime.
150 *
151 * @tparam Data_t The type of data elements to be stored in the array.
152 * @tparam Memory_t The type of memory that provides storage for the array.
153 */
154template <class Data_t, class Memory_t>
155class Array : public Memory_t {
156public:
157 /**
158 * @brief Constructor to create an Array object with a specified size.
159 * @param n The size of the array (number of elements).
160 */
161 Array(std::size_t n) : Memory_t{n, sizeof(Data_t)} {}
162
163 // Needs to be declared explicitly for doxygen to mention it.
164 /**
165 * @brief Get the size of the array.
166 * @return The size of the array (number of elements).
167 *
168 * This function returns the number of elements in the array.
169 */
170 inline std::size_t size() const { return Memory_t::size(); }
171
172 /**
173 * @brief Get a pointer to the start of the array.
174 * @return A pointer to the start of the array.
175 *
176 * This function returns a pointer to the underlying memory.
177 * It allows direct manipulation of array elements.
178 */
179 inline Data_t *data() { return static_cast<Data_t *>(Memory_t::data()); }
180
181 /**
182 * @brief Get a const pointer to the start of the array.
183 * @return A const pointer to the start of the array.
184 *
185 * This function returns a const pointer to the underlying memory.
186 * It allows read-only access to array elements.
187 */
188 inline Data_t const *data() const { return static_cast<Data_t const *>(Memory_t::data()); }
189};
190
191/**
192 * An array of specific type that is allocated on the device with `cudaMalloc` and freed with `cudaFree`.
193 */
194template <class T>
196
197/**
198 * A pinned array of specific type that allocated on the host with `cudaMallocHost` and freed with `cudaFreeHost`.
199 * The memory is "pinned", i.e. page-locked and accessible to the device for fast copying.
200 * \see The documentation of `cudaMallocHost` on <a
201 * href="https://developer.download.nvidia.com/compute/DevZone/docs/html/C/doc/html/group__CUDART__HIGHLEVEL_ge439496de696b166ba457dab5dd4f356.html">developer.download.nvidia.com</a>.
202 */
203template <class T>
205
206} // namespace CudaInterface
207} // namespace Detail
208} // namespace RooFit
209
210#endif
size_t size(const MatrixT &matrix)
retrieve the size of a square matrix
Option_t Option_t TPoint TPoint const char GetTextMagnitude GetFillStyle GetLineColor GetLineWidth GetMarkerStyle GetTextAlign GetTextColor GetTextSize void data
Option_t Option_t TPoint TPoint const char GetTextMagnitude GetFillStyle GetLineColor GetLineWidth GetMarkerStyle GetTextAlign GetTextColor GetTextSize void char Point_t Rectangle_t src
TRObject operator()(const T1 &t1) const
A templated class for managing an array of data using a specified memory type.
Data_t const * data() const
Get a const pointer to the start of the array.
Data_t * data()
Get a pointer to the start of the array.
std::size_t size() const
Get the size of the array.
Array(std::size_t n)
Constructor to create an Array object with a specified size.
std::unique_ptr< void, Deleter< CudaEvent > > _ptr
CudaStream()
Creates a new CUDA stream.
std::unique_ptr< void, Deleter< CudaStream > > _ptr
void waitForEvent(CudaEvent &)
Makes a CUDA stream wait for a CUDA event.
bool isActive()
Checks if a CUDA stream is currently active.
const Int_t n
Definition legend1.C:16
void cudaEventRecord(CudaEvent &, CudaStream &)
Records a CUDA event.
void copyDeviceToHost(const T *src, T *dest, std::size_t n, CudaStream *=nullptr)
Copies data from the CUDA device to the host.
void copyHostToDevice(const T *src, T *dest, std::size_t n, CudaStream *=nullptr)
Copies data from the host to the CUDA device.
float cudaEventElapsedTime(CudaEvent &, CudaEvent &)
Calculates the elapsed time between two CUDA events.
The namespace RooFit contains mostly switches that change the behaviour of functions of PDFs (or othe...
Definition JSONIO.h:26
#define dest(otri, vertexptr)
Definition triangle.c:1041