Logo ROOT  
Reference Guide
 
Loading...
Searching...
No Matches
CudaInterface.cu
Go to the documentation of this file.
1/*
2 * Project: RooFit
3 * Author:
4 * Jonas Rembser, CERN 2023
5 *
6 * Copyright (c) 2023, CERN
7 *
8 * Redistribution and use in source and binary forms,
9 * with or without modification, are permitted according to the terms
10 * listed in LICENSE (http://roofit.sourceforge.net/license.txt)
11 */
12
14
15#include <stdexcept>
16#include <sstream>
17#include <string>
18
19#define ERRCHECK(err) __checkCudaErrors((err), __func__, __FILE__, __LINE__)
20inline static void __checkCudaErrors(cudaError_t error, std::string func, std::string file, int line)
21{
22 if (error != cudaSuccess) {
23 std::stringstream errMsg;
24 errMsg << func << "(), " << file + ":" << std::to_string(line) << " : " << cudaGetErrorString(error);
25 throw std::runtime_error(errMsg.str());
26 }
27}
28
29namespace RooFit {
30namespace Detail {
31namespace CudaInterface {
32
33DeviceMemory::DeviceMemory(std::size_t n, std::size_t typeSize) : _size{n}
34{
35 void *ret;
36 ERRCHECK(::cudaMalloc(&ret, n * typeSize));
37 _data.reset(ret);
38}
39PinnedHostMemory::PinnedHostMemory(std::size_t n, std::size_t typeSize) : _size{n}
40{
41 void *ret;
42 ERRCHECK(::cudaMallocHost(&ret, n * typeSize));
43 _data.reset(ret);
44}
45
46template <>
47void Deleter<DeviceMemory>::operator()(void *ptr)
48{
49 ERRCHECK(::cudaFree(ptr));
50 ptr = nullptr;
51}
52template <>
53void Deleter<PinnedHostMemory>::operator()(void *ptr)
54{
55 ERRCHECK(::cudaFreeHost(ptr));
56 ptr = nullptr;
57}
58
59/**
60 * Creates a new CUDA event.
61 *
62 * @param[in] forTiming Set to true if the event is intended for timing purposes.
63 * If `false`, the `cudaEventDisableTiming` is passed to CUDA.
64 * @return CudaEvent object representing the new event.
65 */
66CudaEvent::CudaEvent(bool forTiming)
67{
68 auto event = new cudaEvent_t;
69 ERRCHECK(cudaEventCreateWithFlags(event, forTiming ? 0 : cudaEventDisableTiming));
70 _ptr.reset(event);
71}
72
73template <>
74void Deleter<CudaEvent>::operator()(void *ptr)
75{
76 auto event = reinterpret_cast<cudaEvent_t *>(ptr);
77 ERRCHECK(cudaEventDestroy(*event));
78 delete event;
79 ptr = nullptr;
80}
81
82template <>
83void Deleter<CudaStream>::operator()(void *ptr)
84{
85 auto stream = reinterpret_cast<cudaStream_t *>(ptr);
86 ERRCHECK(cudaStreamDestroy(*stream));
87 delete stream;
88 ptr = nullptr;
89}
90
91/**
92 * Records a CUDA event.
93 *
94 * @param[in] event CudaEvent object representing the event to be recorded.
95 * @param[in] stream CudaStream in which to record the event.
96 */
98{
99 ERRCHECK(::cudaEventRecord(event, stream));
100}
101
102/**
103 * Creates a new CUDA stream.
104 *
105 * @return CudaStream object representing the new stream.
106 */
107CudaStream::CudaStream()
108{
109 auto stream = new cudaStream_t;
110 ERRCHECK(cudaStreamCreate(stream));
111 _ptr.reset(stream);
112}
113
114/**
115 * Checks if a CUDA stream is currently active.
116 *
117 * @return True if the stream is active, false otherwise.
118 */
119bool CudaStream::isActive()
120{
121 cudaError_t err = cudaStreamQuery(*this);
122 if (err == cudaErrorNotReady)
123 return true;
124 else if (err == cudaSuccess)
125 return false;
126 ERRCHECK(err);
127 return false;
128}
129
130/**
131 * Makes a CUDA stream wait for a CUDA event.
132 *
133 * @param[in] event CudaEvent object representing the event to wait for.
134 */
135void CudaStream::waitForEvent(CudaEvent &event)
136{
137 ERRCHECK(::cudaStreamWaitEvent(*this, event, 0));
138}
139
140/**
141 * Calculates the elapsed time between two CUDA events.
142 *
143 * @param[in] begin CudaEvent representing the start event.
144 * @param[in] end CudaEvent representing the end event.
145 * @return Elapsed time in milliseconds.
146 */
148{
149 float ret;
150 ERRCHECK(::cudaEventElapsedTime(&ret, begin, end));
151 return ret;
152}
153
154/// \cond ROOFIT_INTERNAL
155
156void copyHostToDeviceImpl(const void *src, void *dest, size_t nBytes, CudaStream *stream)
157{
158 if (stream)
159 ERRCHECK(cudaMemcpyAsync(dest, src, nBytes, cudaMemcpyHostToDevice, *stream));
160 else
161 ERRCHECK(cudaMemcpy(dest, src, nBytes, cudaMemcpyHostToDevice));
162}
163
164void copyDeviceToHostImpl(const void *src, void *dest, size_t nBytes, CudaStream *stream)
165{
166 if (stream)
167 ERRCHECK(cudaMemcpyAsync(dest, src, nBytes, cudaMemcpyDeviceToHost, *stream));
168 else
169 ERRCHECK(cudaMemcpy(dest, src, nBytes, cudaMemcpyDeviceToHost));
170}
171
172/// \endcond
173
174} // namespace CudaInterface
175} // namespace Detail
176} // namespace RooFit
#define ERRCHECK(err)
static void __checkCudaErrors(cudaError_t error, std::string func, std::string file, int line)
Option_t Option_t TPoint TPoint const char GetTextMagnitude GetFillStyle GetLineColor GetLineWidth GetMarkerStyle GetTextAlign GetTextColor GetTextSize void char Point_t Rectangle_t src
TLine * line
const Int_t n
Definition legend1.C:16
void cudaEventRecord(CudaEvent &, CudaStream &)
Records a CUDA event.
float cudaEventElapsedTime(CudaEvent &, CudaEvent &)
Calculates the elapsed time between two CUDA events.
The namespace RooFit contains mostly switches that change the behaviour of functions of PDFs (or othe...
Definition JSONIO.h:26
Definition file.py:1
#define dest(otri, vertexptr)
Definition triangle.c:1041