19#define ERRCHECK(err) __checkCudaErrors((err), __func__, __FILE__, __LINE__)
22 if (error != cudaSuccess) {
23 std::stringstream errMsg;
24 errMsg << func <<
"(), " << file +
":" << std::to_string(
line) <<
" : " << cudaGetErrorString(error);
25 throw std::runtime_error(errMsg.str());
32DeviceMemory::DeviceMemory(std::size_t
n, std::size_t typeSize) : _size{
n}
38PinnedHostMemory::PinnedHostMemory(std::size_t
n, std::size_t typeSize) : _size{
n}
46void Deleter<DeviceMemory>::operator()(
void *ptr)
52void Deleter<PinnedHostMemory>::operator()(
void *ptr)
67 auto event =
new cudaEvent_t;
68 ERRCHECK(cudaEventCreateWithFlags(event, forTiming ? 0 : cudaEventDisableTiming));
73void Deleter<CudaEvent>::operator()(
void *ptr)
75 auto event =
reinterpret_cast<cudaEvent_t *
>(ptr);
82void Deleter<CudaStream>::operator()(
void *ptr)
84 auto stream =
reinterpret_cast<cudaStream_t *
>(ptr);
85 ERRCHECK(cudaStreamDestroy(*stream));
108 auto stream =
new cudaStream_t;
120 cudaError_t
err = cudaStreamQuery(*
this);
121 if (
err == cudaErrorNotReady)
123 else if (
err == cudaSuccess)
136 ERRCHECK(::cudaStreamWaitEvent(*
this, event, 0));
155void copyHostToDeviceImpl(
const void *src,
void *dest,
size_t nBytes, CudaStream *stream)
158 ERRCHECK(cudaMemcpyAsync(dest, src, nBytes, cudaMemcpyHostToDevice, *stream));
160 ERRCHECK(cudaMemcpy(dest, src, nBytes, cudaMemcpyHostToDevice));
163void copyDeviceToHostImpl(
const void *src,
void *dest,
size_t nBytes, CudaStream *stream)
166 ERRCHECK(cudaMemcpyAsync(dest, src, nBytes, cudaMemcpyDeviceToHost, *stream));
168 ERRCHECK(cudaMemcpy(dest, src, nBytes, cudaMemcpyDeviceToHost));
171void copyDeviceToDeviceImpl(
const void *src,
void *dest,
size_t nBytes, CudaStream *stream)
174 ERRCHECK(cudaMemcpyAsync(dest, src, nBytes, cudaMemcpyDeviceToDevice, *stream));
176 ERRCHECK(cudaMemcpy(dest, src, nBytes, cudaMemcpyDeviceToDevice));
static void __checkCudaErrors(cudaError_t error, std::string func, std::string file, int line)
CudaEvent(bool forTiming)
Creates a new CUDA event.
std::unique_ptr< void, Deleter< CudaEvent > > _ptr
CudaStream()
Creates a new CUDA stream.
bool isActive()
Checks if a CUDA stream is currently active.
std::unique_ptr< void, Deleter< CudaStream > > _ptr
void waitForEvent(CudaEvent &)
Makes a CUDA stream wait for a CUDA event.
void cudaEventRecord(CudaEvent &event, CudaStream &stream)
Records a CUDA event.
float cudaEventElapsedTime(CudaEvent &begin, CudaEvent &end)
Calculates the elapsed time between two CUDA events.
Namespace for dispatching RooFit computations to various backends.