Logo ROOT   6.12/07
Reference Guide
CudaBuffers.h
Go to the documentation of this file.
1 // @(#)root/tmva/tmva/dnn:$Id$
2 // Author: Simon Pfreundschuh 07/08/16
3 
4 /*************************************************************************
5  * Copyright (C) 2016, Simon Pfreundschuh *
6  * All rights reserved. *
7  * *
8  * For the licensing terms see $ROOTSYS/LICENSE. *
9  * For the list of contributors see $ROOTSYS/README/CREDITS. *
10  *************************************************************************/
11 
12 ////////////////////////////////////////////////////
13 // Device and host buffer for CUDA architectures. //
14 ////////////////////////////////////////////////////
15 
16 #ifndef TMVA_DNN_ARCHITECTURES_CUDA_CUDABUFFERS
17 #define TMVA_DNN_ARCHITECTURES_CUDA_CUDABUFFERS
18 
19 #include "cuda.h"
20 #include "cuda_runtime.h"
21 #include <memory>
22 
23 namespace TMVA {
24 namespace DNN {
25 
26 template<typename AFloat>
28 
29 /** TCudaHostBuffer
30  *
31  * Wrapper class for pinned memory buffers on the host. Uses
32  * std::shared_pointer with custom destructor to ensure consistent
33  * memory management and allow for easy copying/moving of the
34  * buffers. Copying is asynchronous and will set the cudaStream of the
35  * device buffer so that subsequent computations on the device buffer
36  * can be performed on the same stream.
37  *
38  * \tparam AFloat The floating point type to be stored in the buffers.
39  */
40 template<typename AFloat>
42 {
43 private:
44 
45  size_t fOffset; ///< Offset for sub-buffers
46  size_t fSize;
47  mutable cudaStream_t fComputeStream; ///< cudaStream for data transfer
48  std::shared_ptr<AFloat *> fHostPointer; ///< Pointer to the buffer data
49 
50  // Custom destructor required to free pinned host memory using cudaFree.
51  struct TDestructor
52  {
53  TDestructor() = default;
54  TDestructor(const TDestructor &) = default;
55  TDestructor( TDestructor &&) = default;
56  TDestructor & operator=(const TDestructor &) = default;
57  TDestructor & operator=( TDestructor &&) = default;
58  void operator()(AFloat ** devicePointer);
59  } fDestructor;
60 
62 
63 public:
64 
65  TCudaHostBuffer(size_t size);
66  TCudaHostBuffer(AFloat *);
67  TCudaHostBuffer() = default;
68  TCudaHostBuffer(const TCudaHostBuffer &) = default;
69  TCudaHostBuffer( TCudaHostBuffer &&) = default;
70  TCudaHostBuffer & operator=(const TCudaHostBuffer &) = default;
71  TCudaHostBuffer & operator=( TCudaHostBuffer &&) = default;
72 
73  /** Return sub-buffer of the current buffer. */
74  TCudaHostBuffer GetSubBuffer(size_t offset, size_t size);
75 
76  operator AFloat * () const;
77 
78  inline AFloat & operator[](size_t index);
79  inline AFloat operator[](size_t index) const;
80 
81  size_t GetSize() const {return fSize;}
82 
83 };
84 
85 /** TCudaDeviceBuffer
86  *
87  * Service class for on-device memory buffers. Uses
88  * std::shared_pointer with custom destructor to ensure consistent
89  * memory management and allow for easy copying/moving. A device
90  * buffer has an associated CUDA compute stream , which is used for
91  * implicit synchronization of data transfers.
92  *
93  * \tparam AFloat The floating point type to be stored in the buffers.
94  */
95 template<typename AFloat>
97 {
98 private:
99 
100  size_t fOffset; ///< Offset for sub-buffers
101  size_t fSize;
102  cudaStream_t fComputeStream; ///< cudaStream for data transfer
103  std::shared_ptr<AFloat *> fDevicePointer; ///< Pointer to the buffer data
104 
105  // Custom destructor required to free pinned host memory using cudaFree.
106  struct TDestructor
107  {
108  TDestructor() = default;
109  TDestructor(const TDestructor &) = default;
110  TDestructor( TDestructor &&) = default;
111  TDestructor & operator=(const TDestructor &) = default;
112  TDestructor & operator=( TDestructor &&) = default;
113  void operator()(AFloat ** devicePointer);
115  } fDestructor;
116 
117 public:
118 
119  TCudaDeviceBuffer(size_t size);
120  TCudaDeviceBuffer(size_t size, cudaStream_t stream);
121  TCudaDeviceBuffer(AFloat *, size_t size, cudaStream_t stream);
122  TCudaDeviceBuffer() = default;
123  TCudaDeviceBuffer(const TCudaDeviceBuffer &) = default;
124  TCudaDeviceBuffer( TCudaDeviceBuffer &&) = default;
125  TCudaDeviceBuffer & operator=(const TCudaDeviceBuffer &) = default;
127 
128  /** Return sub-buffer of the current buffer. */
129  TCudaDeviceBuffer GetSubBuffer(size_t offset, size_t size);
130  /** Convert to raw device data pointer.*/
131  operator AFloat * () const;
132 
133  void CopyFrom(const TCudaHostBuffer<AFloat> &) const;
134  void CopyTo(const TCudaHostBuffer<AFloat> &) const;
135 
136  cudaStream_t GetComputeStream() const {return fComputeStream;}
137  void SetComputeStream(cudaStream_t stream) {fComputeStream = stream;}
138 
139 };
140 
141 //
142 // Inline Functions.
143 //______________________________________________________________________________
144 
145 template<typename AFloat>
147 {
148  return (*fHostPointer + fOffset)[index];
149 }
150 
151 template<typename AFloat>
152 AFloat TCudaHostBuffer<AFloat>::operator[](size_t index) const
153 {
154  return (*fHostPointer + fOffset)[index];
155 }
156 
157 
158 } // namespace DNN
159 } // namespace TMVA
160 #endif
TDestructor & operator=(const TDestructor &)=default
std::shared_ptr< AFloat * > fHostPointer
Pointer to the buffer data.
Definition: CudaBuffers.h:48
void SetComputeStream(cudaStream_t stream)
Definition: CudaBuffers.h:137
TCudaDeviceBuffer.
Definition: CudaBuffers.h:27
size_t GetSize() const
Definition: CudaBuffers.h:81
cudaStream_t GetComputeStream() const
Definition: CudaBuffers.h:136
size_t fOffset
Offset for sub-buffers.
Definition: CudaBuffers.h:100
AFloat & operator[](size_t index)
Definition: CudaBuffers.h:146
struct TMVA::DNN::TCudaHostBuffer::TDestructor fDestructor
cudaStream_t fComputeStream
cudaStream for data transfer
Definition: CudaBuffers.h:47
TCudaHostBuffer.
Definition: CudaBuffers.h:41
void operator()(AFloat **devicePointer)
Definition: CudaBuffers.cxx:30
std::shared_ptr< AFloat * > fDevicePointer
Pointer to the buffer data.
Definition: CudaBuffers.h:103
Abstract ClassifierFactory template that handles arbitrary types.
TCudaHostBuffer GetSubBuffer(size_t offset, size_t size)
Return sub-buffer of the current buffer.
Definition: CudaBuffers.cxx:55
size_t fOffset
Offset for sub-buffers.
Definition: CudaBuffers.h:45
cudaStream_t fComputeStream
cudaStream for data transfer
Definition: CudaBuffers.h:102