Logo ROOT   6.16/01
Reference Guide
CudaBuffers.h
Go to the documentation of this file.
1// @(#)root/tmva/tmva/dnn:$Id$
2// Author: Simon Pfreundschuh 07/08/16
3
4/*************************************************************************
5 * Copyright (C) 2016, Simon Pfreundschuh *
6 * All rights reserved. *
7 * *
8 * For the licensing terms see $ROOTSYS/LICENSE. *
9 * For the list of contributors see $ROOTSYS/README/CREDITS. *
10 *************************************************************************/
11
12////////////////////////////////////////////////////
13// Device and host buffer for CUDA architectures. //
14////////////////////////////////////////////////////
15
16#ifndef TMVA_DNN_ARCHITECTURES_CUDA_CUDABUFFERS
17#define TMVA_DNN_ARCHITECTURES_CUDA_CUDABUFFERS
18
19#include "cuda.h"
20#include "cuda_runtime.h"
21
22#include <memory>
23
24namespace TMVA {
25namespace DNN {
26
27template<typename AFloat>
28class TCudaDeviceBuffer;
29
30/** TCudaHostBuffer
31 *
32 * Wrapper class for pinned memory buffers on the host. Uses
33 * std::shared_pointer with custom destructor to ensure consistent
34 * memory management and allow for easy copying/moving of the
35 * buffers. Copying is asynchronous and will set the cudaStream of the
36 * device buffer so that subsequent computations on the device buffer
37 * can be performed on the same stream.
38 *
39 * \tparam AFloat The floating point type to be stored in the buffers.
40 */
41template<typename AFloat>
43{
44private:
45
46 size_t fOffset; ///< Offset for sub-buffers
47 size_t fSize;
48 mutable cudaStream_t fComputeStream; ///< cudaStream for data transfer
49 std::shared_ptr<AFloat *> fHostPointer; ///< Pointer to the buffer data
50
51 // Custom destructor required to free pinned host memory using cudaFree.
53 {
54 TDestructor() = default;
55 TDestructor(const TDestructor &) = default;
56 TDestructor( TDestructor &&) = default;
57 TDestructor & operator=(const TDestructor &) = default;
59 void operator()(AFloat ** devicePointer);
61
63
64public:
65
66 TCudaHostBuffer(size_t size);
67 TCudaHostBuffer(AFloat *);
68 TCudaHostBuffer() = default;
69 TCudaHostBuffer(const TCudaHostBuffer &) = default;
73
74 /** Return sub-buffer of the current buffer. */
75 TCudaHostBuffer GetSubBuffer(size_t offset, size_t size);
76
77 operator AFloat * () const;
78
79 inline AFloat & operator[](size_t index);
80 inline AFloat operator[](size_t index) const;
81
82 size_t GetSize() const {return fSize;}
83
84};
85
86/** TCudaDeviceBuffer
87 *
88 * Service class for on-device memory buffers. Uses
89 * std::shared_pointer with custom destructor to ensure consistent
90 * memory management and allow for easy copying/moving. A device
91 * buffer has an associated CUDA compute stream , which is used for
92 * implicit synchronization of data transfers.
93 *
94 * \tparam AFloat The floating point type to be stored in the buffers.
95 */
96template<typename AFloat>
98{
99private:
100
101 size_t fOffset; ///< Offset for sub-buffers
102 size_t fSize;
103 cudaStream_t fComputeStream; ///< cudaStream for data transfer
104 std::shared_ptr<AFloat *> fDevicePointer; ///< Pointer to the buffer data
105
106 // Custom destructor required to free pinned host memory using cudaFree.
108 {
109 TDestructor() = default;
110 TDestructor(const TDestructor &) = default;
111 TDestructor( TDestructor &&) = default;
112 TDestructor & operator=(const TDestructor &) = default;
114 void operator()(AFloat ** devicePointer);
117
118public:
119
120 TCudaDeviceBuffer(size_t size);
121 TCudaDeviceBuffer(size_t size, cudaStream_t stream);
122 TCudaDeviceBuffer(AFloat *, size_t size, cudaStream_t stream);
123 TCudaDeviceBuffer() = default;
128
129 /** Return sub-buffer of the current buffer. */
130 TCudaDeviceBuffer GetSubBuffer(size_t offset, size_t size);
131 /** Convert to raw device data pointer.*/
132 operator AFloat * () const;
133
134 void CopyFrom(const TCudaHostBuffer<AFloat> &) const;
135 void CopyTo(const TCudaHostBuffer<AFloat> &) const;
136
137 cudaStream_t GetComputeStream() const {return fComputeStream;}
138 void SetComputeStream(cudaStream_t stream) {fComputeStream = stream;}
139
140};
141
142//
143// Inline Functions.
144//______________________________________________________________________________
145
146template<typename AFloat>
148{
149 return (*fHostPointer + fOffset)[index];
150}
151
152template<typename AFloat>
153AFloat TCudaHostBuffer<AFloat>::operator[](size_t index) const
154{
155 return (*fHostPointer + fOffset)[index];
156}
157
158
159} // namespace DNN
160} // namespace TMVA
161#endif
TCudaDeviceBuffer.
Definition: CudaBuffers.h:98
size_t fOffset
Offset for sub-buffers.
Definition: CudaBuffers.h:101
void SetComputeStream(cudaStream_t stream)
Definition: CudaBuffers.h:138
void CopyFrom(const TCudaHostBuffer< AFloat > &) const
void CopyTo(const TCudaHostBuffer< AFloat > &) const
struct TMVA::DNN::TCudaDeviceBuffer::TDestructor fDestructor
TCudaDeviceBuffer GetSubBuffer(size_t offset, size_t size)
Return sub-buffer of the current buffer.
TCudaDeviceBuffer & operator=(TCudaDeviceBuffer &&)=default
cudaStream_t fComputeStream
cudaStream for data transfer
Definition: CudaBuffers.h:103
TCudaDeviceBuffer & operator=(const TCudaDeviceBuffer &)=default
std::shared_ptr< AFloat * > fDevicePointer
Pointer to the buffer data.
Definition: CudaBuffers.h:104
TCudaDeviceBuffer(TCudaDeviceBuffer &&)=default
cudaStream_t GetComputeStream() const
Definition: CudaBuffers.h:137
TCudaDeviceBuffer(const TCudaDeviceBuffer &)=default
TCudaHostBuffer.
Definition: CudaBuffers.h:43
AFloat & operator[](size_t index)
Definition: CudaBuffers.h:147
struct TMVA::DNN::TCudaHostBuffer::TDestructor fDestructor
size_t GetSize() const
Definition: CudaBuffers.h:82
size_t fOffset
Offset for sub-buffers.
Definition: CudaBuffers.h:46
TCudaHostBuffer GetSubBuffer(size_t offset, size_t size)
Return sub-buffer of the current buffer.
Definition: CudaBuffers.cxx:57
TCudaHostBuffer & operator=(const TCudaHostBuffer &)=default
TCudaHostBuffer(TCudaHostBuffer &&)=default
cudaStream_t fComputeStream
cudaStream for data transfer
Definition: CudaBuffers.h:48
TCudaHostBuffer(const TCudaHostBuffer &)=default
std::shared_ptr< AFloat * > fHostPointer
Pointer to the buffer data.
Definition: CudaBuffers.h:49
TCudaHostBuffer & operator=(TCudaHostBuffer &&)=default
Abstract ClassifierFactory template that handles arbitrary types.
TDestructor(const TDestructor &)=default
void operator()(AFloat **devicePointer)
Definition: CudaBuffers.cxx:69
TDestructor & operator=(TDestructor &&)=default
TDestructor & operator=(const TDestructor &)=default
void operator()(AFloat **devicePointer)
Definition: CudaBuffers.cxx:33
TDestructor(const TDestructor &)=default
TDestructor(TDestructor &&)=default
TDestructor & operator=(TDestructor &&)=default
TDestructor & operator=(const TDestructor &)=default