Logo ROOT   6.07/09
Reference Guide
CudaBuffers.h
Go to the documentation of this file.
1 // @(#)root/tmva/tmva/dnn:$Id$
2 // Author: Simon Pfreundschuh 07/08/16
3 
4 /*************************************************************************
5  * Copyright (C) 2016, Simon Pfreundschuh *
6  * All rights reserved. *
7  * *
8  * For the licensing terms see $ROOTSYS/LICENSE. *
9  * For the list of contributors see $ROOTSYS/README/CREDITS. *
10  *************************************************************************/
11 
12 ////////////////////////////////////////////////////
13 // Device and host buffer for CUDA architectures. //
14 ////////////////////////////////////////////////////
15 
16 #ifndef TMVA_DNN_ARCHITECTURES_CUDA_CUDABUFFERS
17 #define TMVA_DNN_ARCHITECTURES_CUDA_CUDABUFFERS
18 
19 #include "cuda.h"
20 #include "cuda_runtime.h"
21 #include <memory>
22 
23 namespace TMVA {
24 namespace DNN {
25 
26 template<typename AFloat>
28 
29 /** TCudaHostBuffer
30  *
31  * Wrapper class for pinned memory buffers on the host. Uses
32  * std::shared_pointer with custom destructor to ensure consistent
33  * memory management and allow for easy copying/moving of the
34  * buffers. Copying is asynchronous and will set the cudaStream of the
35  * device buffer so that subsequent computations on the device buffer
36  * can be performed on the same stream.
37  *
38  * \tparam AFloat The floating point type to be stored in the buffers.
39  */
40 template<typename AFloat>
42 {
43 private:
44 
45  size_t fOffset; ///< Offset for sub-buffers
46  mutable cudaStream_t fComputeStream; ///< cudaStream for data transfer
47  std::shared_ptr<AFloat *> fHostPointer; ///< Pointer to the buffer data
48 
49  // Custom destructor required to free pinned host memory using cudaFree.
50  struct TDestructor
51  {
52  TDestructor() = default;
53  TDestructor(const TDestructor &) = default;
54  TDestructor( TDestructor &&) = default;
55  TDestructor & operator=(const TDestructor &) = default;
56  TDestructor & operator=( TDestructor &&) = default;
57  void operator()(AFloat ** devicePointer);
58  } fDestructor;
59 
61 
62 public:
63 
64  TCudaHostBuffer(size_t size);
65  TCudaHostBuffer(AFloat *);
66  TCudaHostBuffer() = default;
67  TCudaHostBuffer(const TCudaHostBuffer &) = default;
68  TCudaHostBuffer( TCudaHostBuffer &&) = default;
69  TCudaHostBuffer & operator=(const TCudaHostBuffer &) = default;
70  TCudaHostBuffer & operator=( TCudaHostBuffer &&) = default;
71 
72  /** Return sub-buffer of the current buffer. */
73  TCudaHostBuffer GetSubBuffer(size_t offset, size_t size);
74 
75  operator AFloat * () const;
76 
77  inline AFloat & operator[](size_t index);
78  inline AFloat operator[](size_t index) const;
79 
80 };
81 
82 /** TCudaDeviceBuffer
83  *
84  * Service class for on-device memory buffers. Uses
85  * std::shared_pointer with custom destructor to ensure consistent
86  * memory management and allow for easy copying/moving. A device
87  * buffer has an associated CUDA compute stream , which is used for
88  * implicit synchronization of data transfers.
89  *
90  * \tparam AFloat The floating point type to be stored in the buffers.
91  */
92 template<typename AFloat>
94 {
95 private:
96 
97  size_t fOffset; ///< Offset for sub-buffers
98  size_t fSize;
99  cudaStream_t fComputeStream; ///< cudaStream for data transfer
100  std::shared_ptr<AFloat *> fDevicePointer; ///< Pointer to the buffer data
101 
102  // Custom destructor required to free pinned host memory using cudaFree.
103  struct TDestructor
104  {
105  TDestructor() = default;
106  TDestructor(const TDestructor &) = default;
107  TDestructor( TDestructor &&) = default;
108  TDestructor & operator=(const TDestructor &) = default;
109  TDestructor & operator=( TDestructor &&) = default;
110  void operator()(AFloat ** devicePointer);
112  } fDestructor;
113 
114 public:
115 
116  TCudaDeviceBuffer(size_t size);
117  TCudaDeviceBuffer(size_t size, cudaStream_t stream);
118  TCudaDeviceBuffer(AFloat *, size_t size, cudaStream_t stream);
119  TCudaDeviceBuffer() = default;
120  TCudaDeviceBuffer(const TCudaDeviceBuffer &) = default;
121  TCudaDeviceBuffer( TCudaDeviceBuffer &&) = default;
122  TCudaDeviceBuffer & operator=(const TCudaDeviceBuffer &) = default;
124 
125  /** Return sub-buffer of the current buffer. */
126  TCudaDeviceBuffer GetSubBuffer(size_t offset, size_t size);
127  /** Convert to raw device data pointer.*/
128  operator AFloat * () const;
129 
130  void CopyFrom(const TCudaHostBuffer<AFloat> &) const;
131  void CopyTo(const TCudaHostBuffer<AFloat> &) const;
132 
133  cudaStream_t GetComputeStream() const {return fComputeStream;}
134  void SetComputeStream(cudaStream_t stream) {fComputeStream = stream;}
135 
136 };
137 
138 //
139 // Inline Functions.
140 //______________________________________________________________________________
141 
142 template<typename AFloat>
144 {
145  return (*fHostPointer + fOffset)[index];
146 }
147 
148 template<typename AFloat>
149 AFloat TCudaHostBuffer<AFloat>::operator[](size_t index) const
150 {
151  return (*fHostPointer + fOffset)[index];
152 }
153 
154 
155 } // namespace DNN
156 } // namespace TMVA
157 #endif
TDestructor & operator=(const TDestructor &)=default
std::shared_ptr< AFloat * > fHostPointer
Pointer to the buffer data.
Definition: CudaBuffers.h:47
void SetComputeStream(cudaStream_t stream)
Definition: CudaBuffers.h:134
TCudaDeviceBuffer.
Definition: CudaBuffers.h:27
size_t fOffset
Offset for sub-buffers.
Definition: CudaBuffers.h:97
AFloat & operator[](size_t index)
Definition: CudaBuffers.h:143
struct TMVA::DNN::TCudaHostBuffer::TDestructor fDestructor
cudaStream_t fComputeStream
cudaStream for data transfer
Definition: CudaBuffers.h:46
cudaStream_t GetComputeStream() const
Definition: CudaBuffers.h:133
TCudaHostBuffer.
Definition: CudaBuffers.h:41
void operator()(AFloat **devicePointer)
Definition: CudaBuffers.cxx:29
std::shared_ptr< AFloat * > fDevicePointer
Pointer to the buffer data.
Definition: CudaBuffers.h:100
Abstract ClassifierFactory template that handles arbitrary types.
TCudaHostBuffer GetSubBuffer(size_t offset, size_t size)
Return sub-buffer of the current buffer.
Definition: CudaBuffers.cxx:54
size_t fOffset
Offset for sub-buffers.
Definition: CudaBuffers.h:45
cudaStream_t fComputeStream
cudaStream for data transfer
Definition: CudaBuffers.h:99