Logo ROOT  
Reference Guide
 
Loading...
Searching...
No Matches
Arithmetic.cu
Go to the documentation of this file.
1// @(#)root/tmva/tmva/dnn:$Id$
2// Author: Simon Pfreundschuh 13/07/16
3
4/*************************************************************************
5 * Copyright (C) 2016, Simon Pfreundschuh *
6 * All rights reserved. *
7 * *
8 * For the licensing terms see $ROOTSYS/LICENSE. *
9 * For the list of contributors see $ROOTSYS/README/CREDITS. *
10 *************************************************************************/
11
12///////////////////////////////////////////////////////////////////
13// Contains additional arithmetic functions required by the CUDA //
14// neural network implementation. //
15///////////////////////////////////////////////////////////////////
16
19//#include "TMVA/DNN/Architectures/Cuda/CudaMatrix.h"
20//#include "TMVA/DNN/Architectures/Cuda/Device.h"
21//#include "../Cuda/Kernels.cuh"
22
23namespace TMVA
24{
25namespace DNN
26{
27
28//____________________________________________________________________________
29// FIXME: This is elementwise multiplication
30/*template<>
31void TCudnn<float>::Multiply(TCudaTensor<float> &C,
32 const TCudaTensor<float> &A,
33 const TCudaTensor<float> &B,
34 const float alpha,
35 const float beta,
36 const float gamma)
37{
38
39 // Descriptor for the Tensor Operation
40 cudnnOpTensorDescriptor_t opTensorDescr;
41 CUDNNCHECK(cudnnCreateOpTensorDescriptor(&opTensorDescr));
42
43 CUDNNCHECK(cudnnSetOpTensorDescriptor(opTensorDescr,
44 CUDNN_OP_TENSOR_MUL,
45 CUDNN_DATA_FLOAT,
46 CUDNN_PROPAGATE_NAN)); // NaN will be propagated
47
48 // C = MUL(alpha*A, beta*B) + gamma*C
49 cudnnStatus_t status = cudnnOpTensor(A.GetCudnnHandle(),
50 opTensorDescr,
51 &alpha,
52 A.GetTensorDescriptor(),
53 A.GetDataPointer(),
54 &beta,
55 B.GetTensorDescriptor(),
56 B.GetDataPointer(),
57 &gamma, // gamma = 0: Don't add C
58 C.GetTensorDescriptor(),
59 C.GetDataPointer());
60
61 CUDNNCHECK(cudnnDestroyOpTensorDescriptor(opTensorDescr));
62}
63
64//____________________________________________________________________________
65template<>
66void TCudnn<double>::Multiply(TCudaTensor<double> &C,
67 const TCudaTensor<double> &A,
68 const TCudaTensor<double> &B,
69 const double alpha,
70 const double beta,
71 const double gamma)
72{
73 // Descriptor for the Tensor Operation
74 cudnnOpTensorDescriptor_t opTensorDescr;
75 CUDNNCHECK(cudnnCreateOpTensorDescriptor(&opTensorDescr));
76
77 CUDNNCHECK(cudnnSetOpTensorDescriptor(opTensorDescr,
78 CUDNN_OP_TENSOR_MUL,
79 CUDNN_DATA_DOUBLE,
80 CUDNN_PROPAGATE_NAN)); // NaN will be propagated
81
82 // C = MUL(alpha*A, beta*B) + gamma*C
83 CUDNNCHECK(cudnnOpTensor(A.GetCudnnHandle(),
84 opTensorDescr,
85 &alpha,
86 A.GetTensorDescriptor(),
87 A.GetDataPointer(),
88 &beta,
89 B.GetTensorDescriptor(),
90 B.GetDataPointer(),
91 &gamma, // gamma = 0: Don't add C
92 C.GetTensorDescriptor(),
93 C.GetDataPointer()));
94
95 CUDNNCHECK(cudnnDestroyOpTensorDescriptor(opTensorDescr));
96}*/
97
98//____________________________________________________________________________
99/*template<>
100void TCudnn<float>::TransposeMultiply(TCudaTensor<float> & C,
101 const TCudaTensor<float> & A,
102 const TCudaTensor<float> & B)
103{
104
105}*/
106//____________________________________________________________________________
107/*template<>
108void TCudnn<double>::TransposeMultiply(TCudaTensor<double> & C,
109 const TCudaTensor<double> & A,
110 const TCudaTensor<double> & B)
111{
112
113}*/
114
115//____________________________________________________________________________
116/*template<typename AFloat>
117void TCudnn<AFloat>::Hadamard(TCudaTensor<AFloat> & B,
118 const TCudaTensor<AFloat> &A)
119{
120
121}*/
122
123//____________________________________________________________________________
124template<typename AFloat>
125AFloat TCudnn<AFloat>::Sum(const TCudaTensor<AFloat> & A, const AFloat alpha, const AFloat beta)
126{
127 cudnnHandle_t cudnnHandle = A.GetCudnnHandle();
128 cudnnDataType_t cudnnDataType;
129 if (std::is_same<AFloat, double>::value) { cudnnDataType = CUDNN_DATA_DOUBLE;}
130 else if (std::is_same<AFloat, float>::value) { cudnnDataType = CUDNN_DATA_FLOAT;}
131
132 // The output tensor C, which has dimensions of a number
133 TCudaHostBuffer<AFloat> hostBuffer (1);
134 const std::vector<size_t> shapeVec {1,1,1,1};
135 // This constructor copies the data automatically to device
136 TCudaTensor<AFloat> C (hostBuffer, shapeVec);
137
138 // Descriptor for the Tensor Reduction
139 cudnnReduceTensorDescriptor_t reduceTensorDescr;
140 CUDNNCHECK(cudnnCreateReduceTensorDescriptor(&reduceTensorDescr));
141 CUDNNCHECK(cudnnSetReduceTensorDescriptor(reduceTensorDescr,
142 CUDNN_REDUCE_TENSOR_ADD,
143 cudnnDataType,
144 CUDNN_PROPAGATE_NAN, // NaN will be propagated
145 CUDNN_REDUCE_TENSOR_FLATTENED_INDICES,
146 //CUDNN_REDUCE_TENSOR_NO_INDICES, // Do not compute indices
147 CUDNN_32BIT_INDICES)); // Type of the indices
148
149 // Find the minimum size of the indices
150 size_t indiceSizeInBytes;
151 void* indices = nullptr;
152 CUDNNCHECK(cudnnGetReductionIndicesSize(cudnnHandle,
153 reduceTensorDescr,
154 A.GetTensorDescriptor(),
155 C.GetTensorDescriptor(),
156 &indiceSizeInBytes));
157 cudaMalloc(&indices, indiceSizeInBytes);
158
159 // Find the minimum size of the workspace needed for the reduction
160 size_t workspaceSizeInBytes;
161 void* workspace = nullptr;
162 CUDNNCHECK(cudnnGetReductionWorkspaceSize(cudnnHandle,
163 reduceTensorDescr,
164 A.GetTensorDescriptor(),
165 C.GetTensorDescriptor(),
166 &workspaceSizeInBytes));
167 cudaMalloc(&workspace, workspaceSizeInBytes);
168
169 // Tensor reduction to the dimensions of the tensor C set above
170 // C = alpha * reduce op ( A ) + beta * C
171 CUDNNCHECK(cudnnReduceTensor(cudnnHandle,
172 reduceTensorDescr,
173 indices,
174 indiceSizeInBytes,
175 workspace,
176 workspaceSizeInBytes,
177 &alpha,
178 A.GetTensorDescriptor(),
179 A.GetDataPointer(),
180 &beta,
181 C.GetTensorDescriptor(),
182 C.GetDataPointer()));
183
184 // Get return value from device
185 TCudaDeviceBuffer<AFloat>& resultDeviceBuffer = C.GetDeviceBuffer();
186 resultDeviceBuffer.CopyTo(hostBuffer);
187
188 cudaFree(indices);
189 cudaFree(workspace);
190 CUDNNCHECK(cudnnDestroyReduceTensorDescriptor(reduceTensorDescr));
191
192 return *hostBuffer;
193}
194
195//____________________________________________________________________________
196/*template<>
197void TCudnn<float>::SumColumns(TCudaTensor<float> & B,
198 const TCudaTensor<float> & A)
199{
200
201}*/
202
203//____________________________________________________________________________
204/*template<>
205void TCudnn<double>::SumColumns(TCudaTensor<double> & B,
206 const TCudaTensor<double> & A)
207{
208
209}
210
211template<>
212void TCudnn<float>::SumRows(TCudaTensor<float> & B,
213 const TCudaTensor<float> & A)
214{
215
216}*/
217
218//____________________________________________________________________________
219/*template<>
220void TCudnn<double>::SumRows(TCudaTensor<double> & B,
221 const TCudaTensor<double> & A)
222{
223
224}*/
225
226////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
227/// \brief Checks two matrices for element-wise equality.
228/// \tparam AFloat An architecture-specific floating point number type.
229/// \param A The first matrix.
230/// \param B The second matrix.
231/// \param epsilon Equality tolerance, needed to address floating point arithmetic.
232/// \return Whether the two matrices can be considered equal element-wise
233////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
234/*template<typename AFloat>
235bool TCudnn<AFloat>::AlmostEquals(const TCudaTensor<AFloat> &A, const TCudaTensor<AFloat> &B, double epsilon)
236{
237
238}*/
239
240//____________________________________________________________________________
241template<typename AFloat>
242void TCudnn<AFloat>::ScaleAdd(TCudaTensor<AFloat> & B,
243 const TCudaTensor<AFloat> & A,
244 const AFloat alpha,
245 const AFloat beta)
246{
247
248 assert(B.GetShape().size() == A.GetShape().size());
249 for (size_t i = 0; i < B.GetShape().size(); ++i) {
250 if (B.GetShape()[i] != A.GetShape()[i] ) {
251 if ( A.GetShape()[i]!=1) {
252 PrintTensor(A);
253 PrintTensor(B);
254 assert(false);
255 }
256 }
257 }
258
259 CUDNNCHECK(cudnnAddTensor(A.GetCudnnHandle(),
260 &alpha,
261 A.GetTensorDescriptor(),
262 A.GetDataPointer(),
263 &beta,
264 B.GetTensorDescriptor(), // Destination Tensor
265 B.GetDataPointer()));
266}
267
268#if 0 // we need to test these functions
269//____________________________________________________________________________
270template<typename AFloat>
271void TCudnn<AFloat>::ConstAdd(TCudaTensor<AFloat> &A, const AFloat beta)
272{
273 // tmp tensor that does the addition
274 TCudaTensor<AFloat> C (A);
275 C.SetConstVal(beta);
276
277 ScaleAdd(A, C);
278}
279
280//____________________________________________________________________________
281template<typename AFloat>
282void TCudnn<AFloat>::ConstMult(TCudaTensor<AFloat> &A, const AFloat beta)
283{
284 CUDNNCHECK(cudnnScaleTensor(A.GetCudnnHandle(),
285 A.GetTensorDescriptor(),
286 A.GetDataPointer(),
287 &beta));
288}
289#endif
290//____________________________________________________________________________
291/*template<typename AFloat>
292void TCudnn<AFloat>::ReciprocalElementWise(TCudaTensor<AFloat> &A)
293{
294
295}*/
296
297//____________________________________________________________________________
298/*template<typename AFloat>
299void TCudnn<AFloat>::SquareElementWise(TCudaTensor<AFloat> &A)
300{
301
302}*/
303
304#if 0 // to check
305//____________________________________________________________________________
306template<typename AFloat>
307void TCudnn<AFloat>::SqrtElementWise(TCudaTensor<AFloat> &A, const AFloat alpha, const AFloat beta, const AFloat gamma)
308{
309 cudnnDataType_t cudnnDataType;
310 if (std::is_same<AFloat, double>::value) { cudnnDataType = CUDNN_DATA_DOUBLE;}
311 else if (std::is_same<AFloat, float>::value) { cudnnDataType = CUDNN_DATA_FLOAT;}
312
313 // Descriptor for the Tensor Operation
314 cudnnOpTensorDescriptor_t opTensorDescr;
315 CUDNNCHECK(cudnnCreateOpTensorDescriptor(&opTensorDescr));
316
317 CUDNNCHECK(cudnnSetOpTensorDescriptor(opTensorDescr,
318 CUDNN_OP_TENSOR_SQRT,
319 cudnnDataType,
320 CUDNN_PROPAGATE_NAN)); // NaN will be propagated
321
322 // C = MUL(alpha*A, beta*B) + gamma*C
323 CUDNNCHECK(cudnnOpTensor(A.GetCudnnHandle(),
324 opTensorDescr,
325 &alpha,
326 A.GetTensorDescriptor(),
327 A.GetDataPointer(),
328 &beta,
329 A.GetTensorDescriptor(),
330 A.GetDataPointer(),
331 &gamma,
332 A.GetTensorDescriptor(), // Save result in A
333 A.GetDataPointer()));
334
335 CUDNNCHECK(cudnnDestroyOpTensorDescriptor(opTensorDescr));
336}
337
338#endif
339
340
341/// Adam updates
342//____________________________________________________________________________
343/*template<typename AFloat>
344void TCudnn<AFloat>::AdamUpdate(TCudaTensor<AFloat> &A, const TCudaTensor<AFloat> & M, const TCudaTensor<AFloat> & V, AFloat alpha, AFloat eps)
345{
346
347}*/
348
349//____________________________________________________________________________
350/*template<typename AFloat>
351void TCudnn<AFloat>::AdamUpdateFirstMom(TCudaTensor<AFloat> &A, const TCudaTensor<AFloat> & B, AFloat beta)
352{
353
354}*/
355
356//____________________________________________________________________________
357/*template<typename AFloat>
358void TCudnn<AFloat>::AdamUpdateSecondMom(TCudaTensor<AFloat> &A, const TCudaTensor<AFloat> & B, AFloat beta)
359{
360
361}*/
362
363} // DNN
364} // TMVA
size_t size(const MatrixT &matrix)
retrieve the size of a square matrix
void PrintTensor(RTensor< T > &t)
create variable transformations
constexpr Double_t C()
Velocity of light in .
Definition TMath.h:114