Logo ROOT  
Reference Guide
TCudnn.h
Go to the documentation of this file.
1// @(#)root/tmva/tmva/dnn:$Id$
2// Author: Joana Niermann 23/07/19
3
4/*************************************************************************
5 * Copyright (C) 2019, Joana Niermann *
6 * All rights reserved. *
7 * *
8 * For the licensing terms see $ROOTSYS/LICENSE. *
9 * For the list of contributors see $ROOTSYS/README/CREDITS. *
10 *************************************************************************/
11
12///////////////////////////////////////////////////////////////////
13// Definition of the TCudnn architecture class, which provides //
14// a wrapping of the low-level functionality for neural networks //
15// in the cuDNN library. //
16///////////////////////////////////////////////////////////////////
17
18#ifndef TMVA_DNN_ARCHITECTURES_CUDNN
19#define TMVA_DNN_ARCHITECTURES_CUDNN
20
21#include "RConfigure.h" // for definition of R__HAS_CUDNN
22
23#ifndef R__HAS_CUDNN
24#error This file can be compiled only when cudnn is available in ROOT
25#else
26
27#include "TMVA/DNN/Functions.h"
29//#include "TMVA/DNN/CNN/Descriptors.h"
36
37#include "cudnn.h"
38#include "Cuda/CudaBuffers.h"
39#include "Cuda/CudaTensor.h"
41#include <utility>
42#include <vector>
43
45
46class TRandom;
47
48namespace TMVA
49{
50namespace DNN
51{
52
53struct TCudnnEmptyDescriptor {};
54
55
56/** The TCudnn architecture class.
57 *
58 * Low-level interface class for CUDA computing architectures using the cuDNN
59 * library as backend. Contains as public types the declaration of the scalar,
60 * matrix and buffer types for this architecture, as well as the remaining
61 * functions in the low-level interface in the form of static members.
62 */
63template<typename AFloat = Float_t>
64class TCudnn
65{
66private:
67 static TRandom * fgRandomGen;
68public:
69
70 using Scalar_t = AFloat;
71 using Matrix_t = TCudaTensor<AFloat>;
72 using Tensor_t = TCudaTensor<AFloat>;
73 using DeviceBuffer_t = TCudaDeviceBuffer<AFloat>;
74 using HostBuffer_t = TCudaHostBuffer<AFloat>;
75
76 // The descriptors for the (tensor) data are held by the data classes (CudaTensor)
77 using ActivationDescriptor_t = cudnnActivationDescriptor_t;
78 using ConvolutionDescriptor_t = cudnnConvolutionDescriptor_t;
79 using DropoutDescriptor_t = cudnnDropoutDescriptor_t;
80 using FilterDescriptor_t = cudnnFilterDescriptor_t;
81 //using OpTensorDescriptor_t = cudnnOpTensorDescriptor_t;
82 using PoolingDescriptor_t = cudnnPoolingDescriptor_t;
83 //using ReductionDescriptor_t = cudnnReduceTensorDescriptor_t;
84 using AlgorithmForward_t = cudnnConvolutionFwdAlgo_t;
85 using AlgorithmBackward_t = cudnnConvolutionBwdDataAlgo_t;
86 using AlgorithmHelper_t = cudnnConvolutionBwdFilterAlgo_t;
87 using AlgorithmDataType_t = cudnnDataType_t;
88 using ReduceTensorDescriptor_t = cudnnReduceTensorDescriptor_t;
89 using TensorDescriptor_t = cudnnTensorDescriptor_t;
90 using RecurrentDescriptor_t = cudnnRNNDescriptor_t;
91
92 using EmptyDescriptor_t = TCudnnEmptyDescriptor; // Used if a descriptor is not needed in a class
93
94 using BNormLayer_t = TBatchNormLayer<TCudnn<AFloat>>;
95 using BNormDescriptors_t = TDNNGenDescriptors<BNormLayer_t>;
96 //using BNormWorkspace_t = CNN::TCNNWorkspace<BNormLayer_t>;*/
97 using ConvLayer_t = CNN::TConvLayer<TCudnn<AFloat>>;
98 using ConvDescriptors_t = CNN::TCNNDescriptors<ConvLayer_t>;
99 using ConvWorkspace_t = CNN::TCNNWorkspace<ConvLayer_t>;
100 using PoolingLayer_t = CNN::TMaxPoolLayer<TCudnn<AFloat>>;
101 using PoolingDescriptors_t = CNN::TCNNDescriptors<PoolingLayer_t>;
102 using PoolingWorkspace_t = CNN::TCNNWorkspace<PoolingLayer_t>;
103
104 using RNNLayer_t = RNN::TBasicRNNLayer<TCudnn<AFloat>>;
105 using RNNDescriptors_t = RNN::TRNNDescriptors<TCudnn<AFloat>>;
106 using RNNWorkspace_t = RNN::TRNNWorkspace<TCudnn<AFloat>>;
107
108 using LSTMLayer_t = RNN::TBasicLSTMLayer<TCudnn<AFloat>>;
109 // using LSTMDescriptors_t = RNN::TRNNDescriptors<LSTMLayer_t>;
110 // using LSTMWorkspace_t = RNN::TRNNWorkspace<LSTMLayer_t>;
111
112 using GRULayer_t = RNN::TBasicGRULayer<TCudnn<AFloat>>;
113 // using GRUDescriptors_t = RNN::TRNNDescriptors<GRULayer_t>;
114 // using GRUWorkspace_t = RNN::TRNNWorkspace<GRULayer_t>;
115
116 // template <typename AFloat>
117 // using ConvDescriptors_t = CNN::TCNNDescriptors<CNN::TConvLayer<TCudnn<AFloat>>>;
118
119 // convolution options
120 // default is -1 (left to cudnn)
121 struct CNNOptions {
122
123 static int ConvFwdAlgorithm;
124 static int ConvBwdDataAlgorithm;
125 static int ConvBwdFilterAlgorithm;
126 // default is 0 (left to cudnn : a value -1 will indicate to not use any space)
127 static Long_t ConvMaxWorkspaceSize;
128 }; // namespace DNN
129
131
132
133 static Tensor_t CreateTensor(size_t n, size_t c, size_t h, size_t w) {
134 return Tensor_t( {n,c,h,w}, GetTensorLayout(), 0, 0);
135 }
136
137 static Tensor_t CreateTensor(DeviceBuffer_t buffer, size_t n, size_t c, size_t h, size_t w) {
138 return Tensor_t( buffer, {n,c,h,w}, GetTensorLayout(), 0, 0);
139 }
140
141 static Tensor_t CreateTensor(size_t n, size_t c, size_t w)
142 {
143 return Tensor_t({n, c, w}, GetTensorLayout(), 0, 0);
144 }
145
146 static Tensor_t CreateTensor(DeviceBuffer_t buffer, size_t n, size_t c, size_t w)
147 {
148 return Tensor_t(buffer, {n, c, w}, GetTensorLayout(), 0, 0);
149 }
150
151 static bool IsCudnn() { return true; }
152
153 // create a weight tensor/matrix vector from another tensor/weight vector using the given tensor shapes
154 // this function is used by the optimizers to stgore intermidiate weights representations
155 static void CreateWeightTensors( std::vector<Matrix_t> & newWeights, const std::vector<Matrix_t> & weights) {
156 if (!newWeights.empty()) newWeights.clear();
157 size_t n = weights.size();
158 for (size_t i = 0; i < n; ++i)
159 newWeights.emplace_back( weights[i].GetShape(), weights[i].GetLayout(), 0, 0);
160 }
161 //____________________________________________________________________________
162 //
163 // Architecture Initialization
164 //____________________________________________________________________________
165
166 static void InitializeBNormDescriptors(TDescriptors * & descriptors,
167 BNormLayer_t *L = nullptr);
168
169 static void InitializeConvDescriptors(TDescriptors * & descriptors,
170 ConvLayer_t *L = nullptr);
171
172 static void InitializePoolDescriptors(TDescriptors * & descriptors,
173 PoolingLayer_t *L = nullptr);
174
175 static void InitializeRNNDescriptors(TDescriptors *&descriptors, RNNLayer_t *layer)
176 {
177 InitializeRecurrentDescriptors<RNNLayer_t>(descriptors, layer);
178 }
179 static void InitializeLSTMDescriptors(TDescriptors *&descriptors, LSTMLayer_t *layer) {
180 InitializeRecurrentDescriptors<LSTMLayer_t>(descriptors, layer);
181 }
182 static void InitializeGRUDescriptors(TDescriptors *&descriptors, GRULayer_t *layer) {
183 InitializeRecurrentDescriptors<GRULayer_t>(descriptors, layer);
184 }
185 template<typename RNNLayer>
186 static void InitializeRecurrentDescriptors(TDescriptors *&descriptors, RNNLayer *L);
187 // static void InitializeRNNDescriptors(TDescriptors *&descriptors, LSTMLayer_t *L = nullptr);
188 // static void InitializeRNNDescriptors(TDescriptors *&descriptors, GRULayer_t *L = nullptr);
189
190 static void InitializeActivationDescriptor(ActivationDescriptor_t & descriptors, EActivationFunction activFunc, double coef = 0.0);
191
192 static void ReleaseConvDescriptors(TDescriptors * descriptors );
193 static void ReleasePoolDescriptors(TDescriptors * descriptors );
194 static void ReleaseRNNDescriptors(TDescriptors *descriptors);
195 static void ReleaseBNormDescriptors(TDescriptors * descriptors );
196 static void ReleaseDescriptor(EmptyDescriptor_t & emptyDescr) {} // Does nothing
197 static void ReleaseDescriptor(ActivationDescriptor_t & activationDescr);
198 static void ReleaseDescriptor(ConvolutionDescriptor_t & convolutionDescr);
199 static void ReleaseDescriptor(DropoutDescriptor_t & dropoutDescr);
200 static void ReleaseDescriptor(FilterDescriptor_t & filterDescr);
201 static void ReleaseDescriptor(PoolingDescriptor_t & poolingDescr);
202 static void ReleaseDescriptor(TensorDescriptor_t & tensorDescr);
203
204
205 static void InitializeConvWorkspace(TWorkspace * & workspace,
206 TDescriptors * & descriptors,
207 const DNN::CNN::TConvParams & params,
208 ConvLayer_t *L = nullptr);
209 static void InitializePoolDropoutWorkspace(TWorkspace * & workspace,
210 TDescriptors * & descriptors,
211 const DNN::CNN::TConvParams & params,
212 PoolingLayer_t *L = nullptr);
213
214 static void InitializeRNNWorkspace(TWorkspace *&workspace, TDescriptors *&descriptors, RNNLayer_t *layer)
215 {
216 InitializeRecurrentWorkspace<RNNLayer_t>(workspace, descriptors, layer);
217 }
218 static void InitializeLSTMWorkspace(TWorkspace *&workspace, TDescriptors *&descriptors, LSTMLayer_t *layer)
219 {
220 InitializeRecurrentWorkspace<LSTMLayer_t>(workspace, descriptors, layer);
221 }
222 static void InitializeGRUWorkspace(TWorkspace *&workspace, TDescriptors *&descriptors, GRULayer_t *layer)
223 {
224 InitializeRecurrentWorkspace<GRULayer_t>(workspace, descriptors, layer);
225 }
226 template<typename RNNLayer>
227 static void InitializeRecurrentWorkspace(TWorkspace *&workspace, TDescriptors *&descriptors,
228 RNNLayer *layer);
229
230 static void FreeConvWorkspace(TWorkspace * workspace);
231 static void FreePoolDropoutWorkspace(TWorkspace * workspace);
232 static void FreeRNNWorkspace(TWorkspace *workspace);
233
234 // tensor inizialization for recurrent networks
235 static void InitializeRNNTensors(RNNLayer_t *layer) { InitializeRecurrentTensors<RNNLayer_t>(layer); }
236 static void InitializeLSTMTensors(LSTMLayer_t *layer) { InitializeRecurrentTensors<LSTMLayer_t>(layer); }
237 static void InitializeGRUTensors(GRULayer_t *layer) { InitializeRecurrentTensors<GRULayer_t>(layer); }
238 template <typename RNNLayer>
239 static void InitializeRecurrentTensors(RNNLayer *layer);
240
241 //____________________________________________________________________________
242 //
243 // Propagation
244 //____________________________________________________________________________
245
246 /** @name Forward Propagation
247 * Low-level functions required for the forward propagation of activations
248 * through the network.
249 */
250 ///@{
251 /** Matrix-multiply \p input with the transpose of \pweights and
252 * write the results into \p output. */
253 static void MultiplyTranspose(Tensor_t &output, const Tensor_t &input, const Matrix_t &weights);
254
255 /** Add the vectors biases row-wise to the matrix output */
256 static void AddRowWise(Tensor_t &output,const Matrix_t &biases);
257
258 /** @name Backward Propagation (Dense Layers)
259 * Low-level functions required for the forward propagation of activations
260 * through the network.
261 */
262 ///@{
263 /** Perform the complete backward propagation step. If the provided
264 * \p activationGradientsBackward matrix is not empty, compute the
265 * gradients of the objective function with respect to the activations
266 * of the previous layer (backward direction).
267 * Also compute the weight and the bias gradients. Modifies the values
268 * in \p df and thus produces only a valid result, if it is applied the
269 * first time after the corresponding forward propagation has been per-
270 * formed. */
271 static void Backward(Tensor_t & activationGradientsBackward,
272 Matrix_t & weightGradients,
273 Matrix_t & biasGradients,
274 Tensor_t & df,
275 const Tensor_t & activationGradients,
276 const Matrix_t & weights,
277 const Tensor_t & activationBackward);
278
279 /** Above functions extended to vectors */
280 static void ScaleAdd(Tensor_t & A, const Tensor_t & B,
281 Scalar_t alpha = 1.0,
282 Scalar_t beta = 1.0);
283
284 /** Deep copy from B to A. */
285 static void Copy(Tensor_t & A, const Tensor_t & B);
286
287 // copy from another tensor
288 template<typename ATensor_t>
289 static void CopyDiffArch(Tensor_t & A,
290 const ATensor_t & B);
291
292 template <typename ATensor_t>
293 static void CopyWeightsDiffArch(Tensor_t &A, const ATensor_t &B);
294
295 //template<>
296 static void CopyDiffArch(Tensor_t A, const Tensor_t & B ) { Copy(A,B); }
297
298 // copy from vector of matrices of different types
299 template<typename AMatrix_t>
300 static void CopyDiffArch(std::vector<Tensor_t> & A,
301 const std::vector<AMatrix_t> & B);
302
303
304 //____________________________________________________________________________
305 //
306 // Activation Functions
307 //____________________________________________________________________________
308
309 /** @name Activation Functions
310 * For each activation function, the low-level interface contains two routines.
311 * One that applies the acitvation function to a matrix and one that evaluate
312 * the derivatives of the activation function at the elements of a given matrix
313 * and writes the results into the result matrix.
314 */
315 ///@{
316 static void Identity(Tensor_t & X) {}
317 static void IdentityDerivative(Tensor_t & dX, Tensor_t& X,
318 Tensor_t & Y, Tensor_t & dY,
319 ActivationDescriptor_t activationDescr,
320 const AFloat alpha = 1,
321 const AFloat beta = 1) {}
322
323 static void ActivationFunctionForward(Tensor_t & X, EActivationFunction activFunct,
324 const ActivationDescriptor_t activationDescr,
325 const double coef = 0.0, const AFloat alpha = 1,
326 const AFloat beta = 0);
327
328 // same as above but using different input/output tensors
329 static void ActivationFunctionForward(Tensor_t &Y, const Tensor_t & X, EActivationFunction activFunct,
330 const ActivationDescriptor_t activationDescr, const double coef = 0.0,
331 const AFloat alpha = 1, const AFloat beta = 0);
332
333 /** Computes the gradient of the activation function */
334 static void ActivationFunctionBackward(Tensor_t & dX, const Tensor_t & Y,
335 const Tensor_t & dY, const Tensor_t & X,
336 EActivationFunction activFunct,
337 const ActivationDescriptor_t activationDescr,
338 const AFloat alpha = 1,
339 const AFloat beta = 0);
340
341 //
342 // No cudnn implementation for the following activation functions
343 //
344 //static void SymmetricRelu(Tensor_t & B);
345
346 // implmentations not used by Cudnn
347 static void Relu(Tensor_t &) {}
348 static void Sigmoid(Tensor_t &) {}
349 static void Tanh(Tensor_t &) {}
350 static void FastTanh(Tensor_t &) {}
351 static void SymmetricRelu(Tensor_t &) {}
352 static void SoftSign(Tensor_t &) {}
353 static void Gauss(Tensor_t &) {}
354
355 static void IdentityDerivative(Tensor_t &, const Tensor_t &) {}
356 static void ReluDerivative(Tensor_t &, const Tensor_t &) {}
357 static void SigmoidDerivative(Tensor_t &, const Tensor_t &) {}
358 static void TanhDerivative(Tensor_t &, const Tensor_t &) {}
359 static void FastTanhDerivative(Tensor_t &, const Tensor_t &) {}
360 static void SymmetricReluDerivative(Tensor_t & , const Tensor_t & ) {}
361 static void SoftSignDerivative(Tensor_t & , const Tensor_t & ) {}
362 static void GaussDerivative(Tensor_t & , const Tensor_t & ) {}
363 ///@}
364
365 //____________________________________________________________________________
366 //
367 // Loss Functions
368 //____________________________________________________________________________
369
370 /** @name Loss Functions
371 * Loss functions compute a scalar value given the \p output of the network
372 * for a given training input and the expected network prediction \p Y that
373 * quantifies the quality of the prediction. For each function also a routing
374 * that computes the gradients (suffixed by Gradients) must be provided for
375 * the starting of the backpropagation algorithm.
376 */
377 ///@{
378
379 static Scalar_t MeanSquaredError(const Matrix_t &Y, const Matrix_t &output,
380 const Matrix_t &weights);
381 static void MeanSquaredErrorGradients(Matrix_t &dY, const Matrix_t &Y,
382 const Matrix_t &output, const Matrix_t &weights);
383
384 /** Sigmoid transformation is implicitly applied, thus \p output should
385 * hold the linear activations of the last layer in the net. */
386 static Scalar_t CrossEntropy(const Matrix_t &Y, const Matrix_t &output,
387 const Matrix_t &weights);
388
389 static void CrossEntropyGradients(Matrix_t &dY, const Matrix_t &Y,
390 const Matrix_t &output, const Matrix_t &weights);
391
392 /** Softmax transformation is implicitly applied, thus \p output should
393 * hold the linear activations of the last layer in the net. */
394 static Scalar_t SoftmaxCrossEntropy(const Matrix_t &Y, const Matrix_t &output,
395 const Matrix_t &weights);
396 static void SoftmaxCrossEntropyGradients(Matrix_t &dY, const Matrix_t &Y,
397 const Matrix_t &output, const Matrix_t &weights);
398 ///@}
399
400 //____________________________________________________________________________
401 //
402 // Output Functions
403 //____________________________________________________________________________
404
405 /** @name Output Functions
406 * Output functions transform the activations \p output of the
407 * output layer in the network to a valid prediction \p YHat for
408 * the desired usage of the network, e.g. the identity function
409 * for regression or the sigmoid transformation for two-class
410 * classification.
411 */
412 ///@{
413 static void Sigmoid(Matrix_t &YHat,
414 const Matrix_t & );
415 static void Softmax(Matrix_t &YHat,
416 const Matrix_t & );
417 ///@}
418
419
420
421 //____________________________________________________________________________
422 //
423 // Dropout
424 //____________________________________________________________________________
425
426 /** @name Dropout
427 */
428 ///@{
429
430 /** Apply dropout with activation probability \p p to the given
431 * tensor \p A and scale the result by reciprocal of \p p. */
432 static void DropoutForward(Tensor_t & A,
433 TDescriptors * descriptors,
434 TWorkspace * workspace,
435 Scalar_t p);
436
437 static void DropoutBackward(Tensor_t & A,
438 TDescriptors * descriptors,
439 TWorkspace * workspace);
440
441 ///@}
442
443 //____________________________________________________________________________
444 //
445 // Batch Normalization
446 //____________________________________________________________________________
447
448 /** @name Batch Normalization Layer Propagation
449 */
450 ///@{
451
452 /** The input from each batch are normalized during training to have zero mean and unit variance
453 * and they are then scaled by two parameter, different for each input variable:
454 * - a scale factor \gamma gamma
455 * - an offset \beta beta */
456
457 static void BatchNormLayerForwardTraining(int axis, const Tensor_t &x, Tensor_t &y, Matrix_t &gamma, Matrix_t &beta,
458 Matrix_t &mean, Matrix_t &, Matrix_t &iVariance, Matrix_t &runningMeans,
459 Matrix_t &runningVars, Scalar_t nTrainedBatches, Scalar_t momentum,
460 Scalar_t epsilon, const TensorDescriptor_t &bnParDescriptor);
461
462 /** During inference the inputs are not normalized using the batch mean but the previously computed
463 * at running mean and variance */
464
465 static void BatchNormLayerForwardInference(int axis, const Tensor_t &x, Matrix_t &gamma, Matrix_t &beta,
466 Tensor_t &y, const Matrix_t &runningMeans,
467 const Matrix_t &runningVars, Scalar_t epsilon,
468 const TensorDescriptor_t &);
469
470 static void BatchNormLayerBackward(int axis, const Tensor_t &x, const Tensor_t &dy, Tensor_t &dx,
471 Matrix_t &gamma, // Matrix_t &beta, (not needed)
472 Matrix_t &dgamma, Matrix_t &dbeta, const Matrix_t &mean, const Matrix_t &variance,
473 const Matrix_t &iVariance, Scalar_t epsilon, const TensorDescriptor_t &);
474
475 //____________________________________________________________________________
476 //
477 // Regularization
478 //____________________________________________________________________________
479
480 /** @name Regularization
481 * For each regularization type two functions are required, one named
482 * <tt><Type>Regularization</tt> that evaluates the corresponding
483 * regularization functional for a given weight matrix and the
484 * <tt>Add<Type>RegularizationGradients</tt>, that adds the regularization
485 * component in the gradients to the provided matrix.
486 */
487
488 static Scalar_t L1Regularization(const Matrix_t &W)
489 {
490 TCudaMatrix<AFloat> mW(W.GetDeviceBuffer(), W.GetSize(), 1);
492 }
493 static void AddL1RegularizationGradients(Matrix_t &A, const Matrix_t &W, Scalar_t weightDecay)
494 {
495 TCudaMatrix<AFloat> mA(A.GetDeviceBuffer(), A.GetSize(), 1);
496 TCudaMatrix<AFloat> mW(W.GetDeviceBuffer(), W.GetSize(), 1);
498 }
499
500 static Scalar_t L2Regularization(const Matrix_t &W)
501 {
502 TCudaMatrix<AFloat> mW(W.GetDeviceBuffer(), W.GetSize(), 1);
504 }
505 static void AddL2RegularizationGradients(Matrix_t &A, const Matrix_t &W, Scalar_t weightDecay)
506 {
507 TCudaMatrix<AFloat> mA(A.GetDeviceBuffer(), A.GetSize(), 1);
508 TCudaMatrix<AFloat> mW(W.GetDeviceBuffer(), W.GetSize(), 1);
510 }
511 ///@}
512
513 //____________________________________________________________________________
514 //
515 // Initialization
516 //____________________________________________________________________________
517
518 /** @name Initialization
519 * For each initialization method, one function in the low-level interface
520 * is provided. The naming scheme is <p>Initialize<Type></p> for a given
521 * initialization method Type.
522 */
523 ///@{
524
525 static void InitializeGauss(Matrix_t &A);
526 static void InitializeUniform(Matrix_t &A);
527 static void InitializeIdentity(Matrix_t &A);
528 static void InitializeZero(Matrix_t &A);
529 static void InitializeGlorotNormal(Matrix_t &A);
530 static void InitializeGlorotUniform(Matrix_t &A);
531
532 // return static instance of random generator used for initialization
533 // if generator does not exist it is created the first time with a random seed (e.g. seed = 0)
534 static TRandom &GetRandomGenerator();
535 // set random seed for the static geenrator
536 // if the static geneerator does not exists it is created
537 static void SetRandomSeed(size_t seed);
538 ///@}
539
540 //____________________________________________________________________________
541 //
542 // Dropout
543 //____________________________________________________________________________
544
545 /** @name Dropout
546 */
547 ///@{
548
549 /** Apply dropout with activation probability \p p to the given
550 * tensor \p A and scale the result by reciprocal of \p p. */
551 static void Dropout(Tensor_t &A, Scalar_t p) {}
552
553 ///@}
554
555 //____________________________________________________________________________
556 //
557 // Convolutional Layer Propagation
558 //____________________________________________________________________________
559
560 /** @name Forward Propagation in Convolutional Layer
561 */
562 ///@{
563
564 /** Add the biases in the Convolutional Layer. */
565 static void AddConvBiases(Matrix_t &output, const Matrix_t &biases);
566 ///@}
567
568 /** Dummy placeholder - preparation is currently only required for the CUDA architecture. */
569 static void PrepareInternals(Tensor_t &) {}
570
571 /** Forward propagation in the Convolutional layer */
572 static void ConvLayerForward(Tensor_t &output,
573 Tensor_t &inputActivationFunc, // this is output conv w/o activ func.
574 const Tensor_t &input, const Matrix_t &weights, const Matrix_t &biases,
575 const DNN::CNN::TConvParams &params, EActivationFunction activFunc,
576 Tensor_t & /* inputPrime */, const ConvDescriptors_t &descriptors,
577 ConvWorkspace_t &workspace);
578 // const AFloat alpha = 1,
579 // const AFloat beta = 1);
580
581 /** @name Backward Propagation in Convolutional Layer
582 */
583 ///@{
584
585 /** Perform the complete backward propagation step in a Convolutional Layer.
586 * If the provided \p activationGradientsBackward matrix is not empty, compute the
587 * gradients of the objective function with respect to the activations
588 * of the previous layer (backward direction).
589 * Also compute the weight and the bias gradients. Modifies the values
590 * in \p df and thus produces only a valid result, if it is applied the
591 * first time after the corresponding forward propagation has been per-
592 * formed. */
593 static void ConvLayerBackward(Tensor_t &activationGradientsBackward, Matrix_t &weightGradients,
594 Matrix_t &biasGradients, Tensor_t &inputActivation, Tensor_t &activationGradients,
595 const Matrix_t &weights, const Tensor_t &activationBackward,
596 const Tensor_t &outputTensor, EActivationFunction activFunc,
597 const ConvDescriptors_t &descriptors, ConvWorkspace_t &workspace, size_t /*batchSize*/,
598 size_t /*inputHeight*/, size_t /*inputWidth*/, size_t /*depth*/, size_t /*height*/,
599 size_t /*width*/, size_t /*filterDepth*/, size_t /*filterHeight*/,
600 size_t /*filterWidth*/, size_t /*nLocalViews*/);
601
602 ///@}
603
604 //____________________________________________________________________________
605 //
606 // Max Pooling Layer Propagation
607 //____________________________________________________________________________
608 /** @name Forward Propagation in Max Pooling Layer
609 */
610 ///@{
611
612 /** Downsample the matrix \p C to the matrix \p A, using max
613 * operation, such that the winning indices are stored in matrix
614 * \p B. No winning indices needed for cuDNN. */
615 static void Downsample(Tensor_t &A, Tensor_t & /*B*/, const Tensor_t &C, const PoolingDescriptors_t &descriptors,
616 PoolingWorkspace_t &workspace, size_t imgHeight, size_t imgWidth, size_t fltHeight,
617 size_t fltWidth, size_t strideRows, size_t strideCols);
618
619 ///@}
620
621 /** @name Backward Propagation in Max Pooling Layer
622 */
623 ///@{
624 /** Perform the complete backward propagation step in a Pooling Layer. Based on the
625 * input to and output from the MaxPoolLayer, the gradients for the winning pixels
626 * are computed. */
627 static void MaxPoolLayerBackward(Tensor_t &activationGradientsBackward, const Tensor_t &activationGradients,
628 const Tensor_t & /*indexMatrix*/, const Tensor_t &inputActivation,
629 const Tensor_t &outputTensor, const PoolingDescriptors_t &descriptors,
630 PoolingWorkspace_t &workspace, size_t imgHeight, size_t imgWidth, size_t fltHeight,
631 size_t fltWidth, size_t strideRows, size_t strideCols, size_t nLocalViews);
632
633 ///@}
634
635 //____________________________________________________________________________
636 //
637 // Reshape Layer Propagation
638 //____________________________________________________________________________
639 /** @name Forward and Backward Propagation in Reshape Layer
640 */
641 ///@{
642
643 /** Transform the matrix \p B to a matrix with different dimensions \p A */
644 // static void Reshape(Matrix_t &A, const Matrix_t &B);
645
646 /** Flattens the tensor \p B, such that each matrix, is stretched in
647 * one row, resulting with a matrix \p A. */
648 static void Flatten(Tensor_t &A, const Tensor_t &B);
649
650 /** Transforms each row of \p B to a matrix and stores it in the
651 * tensor \p B. */
652 static void Deflatten(Tensor_t &A, const Tensor_t &B); // size_t index, size_t nRows,size_t nCols);
653
654 /** Rearrage data accoring to time fill B x T x D out with T x B x D matrix in*/
655 static void Rearrange(Tensor_t &out, const Tensor_t &in);
656
657 // RNN functions
658 static void RNNForward(const Tensor_t &x, const Tensor_t &hx, const Tensor_t &cx, const Tensor_t &weights,
659 Tensor_t &y, Tensor_t &hy, Tensor_t &cy, const RNNDescriptors_t &descr,
660 RNNWorkspace_t &workspace, bool isTraining);
661
662 static void RNNBackward(const Tensor_t &x, const Tensor_t &hx, const Tensor_t &cx, const Tensor_t &y, const Tensor_t &dy,
663 const Tensor_t &dhy, const Tensor_t &dcy, const Tensor_t &weights, Tensor_t &dx, Tensor_t &dhx,
664 Tensor_t &dcx, Tensor_t &dw, const RNNDescriptors_t &desc, RNNWorkspace_t &workspace);
665
666
667 // Backward pass for Recurrent Networks functions used by another architectures
668 //******************************************************************************************
669 static Matrix_t &RecurrentLayerBackward(Matrix_t &state_gradients_backward, // BxH
670 Matrix_t & /* input_weight_gradients */,
671 Matrix_t & /* state_weight_gradients */, Matrix_t & /* bias_gradients */,
672 Matrix_t & /* df */, // DxH
673 const Matrix_t & /* state */, // BxH
674 const Matrix_t & /* weights_input */, // HxD
675 const Matrix_t & /* weights_state */, // HxH
676 const Matrix_t & /* input */, // BxD
677 Matrix_t & /* input_gradient */)
678 {
679 return state_gradients_backward;
680 }
681 static Matrix_t &LSTMLayerBackward(
682 Matrix_t & state_gradients_backward , Matrix_t & /*cell_gradients_backward*/,
683 Matrix_t & /*input_weight_gradients*/, Matrix_t & /*forget_weight_gradients*/,
684 Matrix_t & /*candidate_weight_gradients*/, Matrix_t & /*output_weight_gradients*/,
685 Matrix_t & /*input_state_weight_gradients*/, Matrix_t & /*forget_state_weight_gradients*/,
686 Matrix_t & /*candidate_state_weight_gradients*/,
687 Matrix_t & /*output_state_weight_gradients*/, Matrix_t & /*input_bias_gradients*/,
688 Matrix_t & /*forget_bias_gradients*/, Matrix_t & /*candidate_bias_gradients*/,
689 Matrix_t & /*output_bias_gradients*/, Matrix_t & /*di*/, Matrix_t & /*df*/,
690 Matrix_t & /*dc*/, Matrix_t & /*dout*/,
691 const Matrix_t & /*precStateActivations*/, const Matrix_t & /*precCellActivations*/,
692 const Matrix_t & /*fInput*/, const Matrix_t & /*fForget*/,
693 const Matrix_t & /*fCandidate*/, const Matrix_t & /*fOutput*/,
694 const Matrix_t & /*weights_input*/, const Matrix_t & /*weights_forget*/,
695 const Matrix_t & /*weights_candidate*/, const Matrix_t & /*weights_output*/,
696 const Matrix_t & /*weights_input_state*/, const Matrix_t & /*weights_forget_state*/,
697 const Matrix_t & /*weights_candidate_state*/, const Matrix_t & /*weights_output_state*/,
698 const Matrix_t & /*input*/, Matrix_t & /*input_gradient*/,
699 Matrix_t & /*cell_gradient*/, Matrix_t & /*cell_tanh*/)
700 {
701 return state_gradients_backward;
702 }
703
704 /** Backward pass for GRU Network */
705 static Matrix_t &GRULayerBackward(
706 Matrix_t & state_gradients_backward, Matrix_t & /*reset_weight_gradients*/,
707 Matrix_t & /*update_weight_gradients*/, Matrix_t & /*candidate_weight_gradients*/,
708 Matrix_t & /*reset_state_weight_gradients*/, Matrix_t & /*update_state_weight_gradients*/,
709 Matrix_t & /*candidate_state_weight_gradients*/, Matrix_t & /*reset_bias_gradients*/,
710 Matrix_t & /*update_bias_gradients*/, Matrix_t & /*candidate_bias_gradients*/,
711 Matrix_t & /*dr*/, Matrix_t & /*du*/, Matrix_t & /*dc*/,
712 const Matrix_t & /*precStateActivations*/, const Matrix_t & /*fReset*/,
713 const Matrix_t & /*fUpdate*/, const Matrix_t & /*fCandidate*/,
714 const Matrix_t & /*weights_reset*/, const Matrix_t & /*weights_update*/,
715 const Matrix_t & /*weights_candidate*/, const Matrix_t & /*weights_reset_state*/,
716 const Matrix_t & /*weights_update_state*/, const Matrix_t & /*weights_candidate_state*/,
717 const Matrix_t & /*input*/, Matrix_t & /*input_gradient*/, bool)
718 {
719 return state_gradients_backward;
720 }
721
722 ///@}
723
724 //____________________________________________________________________________
725 //
726 // Additional Arithmetic Functions
727 //____________________________________________________________________________
728
729 /** @name Additional Arithmetic Functions
730 *
731 * Additional arithmetic on CUDA matrices used to implement the low-level
732 * interface.
733 */
734
735 /** In-place Hadamard (element-wise) product of matrices \p A and \p B
736 * with the result being written into \p A.
737 */
738 static void Hadamard(Tensor_t &A, const Tensor_t &B)
739 {
740 TCudaMatrix<AFloat> tmpA(A.GetDeviceBuffer(), 1, A.GetSize());
741 TCudaMatrix<AFloat> tmpB(B.GetDeviceBuffer(), 1, B.GetSize());
742 assert(A.GetSize() == B.GetSize());
743 TCuda<AFloat>::Hadamard(tmpA, tmpB);
744 }
745 // static void Hadamard(Matrix_t &A,
746 // const Matrix_t &B);*/
747 // {
748 // Tensor_t tA(A);
749 // Hadamard( tA, Tensor_t(B));
750 // }
751
752
753 /** Compute the sum of all elements in \p A */
754 static Scalar_t Sum(const Matrix_t &A, Scalar_t alpha = 1.0, Scalar_t beta = 0.0);
755
756 /** Check two matrices for equality, taking floating point arithmetic errors into account. */
757 //static bool AlmostEquals(const Matrix_t &A, const Matrix_t &B, double epsilon = 0.1);
758
759 /** Add the constant \p beta to all the elements of matrix \p A and write the
760 * result into \p A.
761 */
762 static void ConstAdd(Matrix_t &A, Scalar_t beta) {
763 TCudaMatrix<AFloat> tmp(A.GetDeviceBuffer(), 1, A.GetSize());
765 }
766
767 /** Multiply the constant \p beta to all the elements of matrix \p A and write the
768 * result into \p A.
769 */
770 static void ConstMult(Matrix_t &A, Scalar_t beta) {
771 TCudaMatrix<AFloat> tmp(A.GetDeviceBuffer(), 1, A.GetSize());
773 }
774
775 /** Reciprocal each element of the matrix \p A and write the result into
776 * \p A
777 */
778 static void ReciprocalElementWise(Matrix_t &A) {
779 TCudaMatrix<AFloat> tmp(A.GetDeviceBuffer(), 1, A.GetSize());
781 }
782
783 /** Square each element of the matrix \p A and write the result into
784 * \p A
785 */
786 static void SquareElementWise(Matrix_t &A) {
787 TCudaMatrix<AFloat> tmp(A.GetDeviceBuffer(), 1, A.GetSize());
789 }
790
791 /** Square root each element of the matrix \p A and write the result into
792 * \p A
793 */
794 //static void SqrtElementWise(Matrix_t &A, Scalar_t alpha = 1, Scalar_t beta = 0, Scalar_t gamma = 0) {
795 static void SqrtElementWise(Matrix_t &A) {
796 TCudaMatrix<AFloat> tmp(A.GetDeviceBuffer(), 1, A.GetSize());
798 }
799
800 // optimizer functions
801 static void AdamUpdate(Matrix_t & A, const Matrix_t & M, const Matrix_t & V, Scalar_t alpha, Scalar_t eps) {
802 TCudaMatrix<AFloat> tmpA(A.GetDeviceBuffer(), A.GetSize(),1);
803 TCudaMatrix<AFloat> tmpM(M.GetDeviceBuffer(), M.GetSize(),1);
804 TCudaMatrix<AFloat> tmpV(V.GetDeviceBuffer(), V.GetSize(),1);
805 TCuda<AFloat>::AdamUpdate(tmpA, tmpM, tmpV,alpha, eps);
806 }
807 static void AdamUpdateFirstMom(Matrix_t & A, const Matrix_t & B, Scalar_t beta) {
808 TCudaMatrix<AFloat> tmpA(A.GetDeviceBuffer(), A.GetSize(),1);
809 TCudaMatrix<AFloat> tmpB(B.GetDeviceBuffer(), B.GetSize(),1);
811 }
812 static void AdamUpdateSecondMom(Matrix_t & A, const Matrix_t & B, Scalar_t beta) {
813 TCudaMatrix<AFloat> tmpA(A.GetDeviceBuffer(), A.GetSize(),1);
814 TCudaMatrix<AFloat> tmpB(B.GetDeviceBuffer(), B.GetSize(),1);
816 }
817
818 // printing of tensor
819 static void PrintTensor( const Tensor_t & A, const std::string name = "tensor", bool = false);
820
821 static void PrintTensor4dDescriptor(TensorDescriptor_t descriptor);
822 static void PrintTensorNdDescriptor(TensorDescriptor_t descriptor, int n = 10);
823
824 ///////////////////////////////////////////////////////////////////////////////
825 /// extra functions defined only for CPU architecture !!!
826 //////////////////////////////////////////////////////////////////////////////
827
828 /** Sum rows of (m x n) matrix \p A and write the results into the first
829 * m elements in \p B.
830 */
831 static void SumRows(Matrix_t &B, const Matrix_t &A);
832};
833
834
835//____________________________________________________________________________
836template <typename AFloat>
837template <typename ATensor>
838void TCudnn<AFloat>::CopyDiffArch(TCudaTensor<AFloat> &B,
839 const ATensor &A)
840{
841
842 // should add static assert that A has not to be same type as B
843
844 // this copying tensors from different architectures
845 if (B.GetLayout() == GetTensorLayout()) {
846 if ( B.GetShape().size() == 4) {
847 assert(B.GetShape().size() == 4);
848 size_t firstSize = (A.GetLayout() == GetTensorLayout()) ? A.GetShape()[0] : A.GetShape().back();
849 for (size_t i = 0; i < firstSize; ++i) {
850 TMatrixT<AFloat> matIn = A.At(i).GetMatrix(); // this convert tensor (B,D,HW) in (D,HW)i -> (D,HW)i
851 // TMAtrix has the correct layout (row-wise) no need to traspose in this case
852 TCudaTensor<AFloat> tmpOut = B.At(i); // matrix (D,HW)
853 // copy will copy the buffer
854 TCudaTensor<AFloat> tmpIn(matIn.GetMatrixArray(), tmpOut.GetShape(), tmpOut.GetLayout());
855 Copy(tmpOut, tmpIn);
856 }
857 }
858 else {
859 // for RNN weights
860 TMatrixT<AFloat> tmp = A;
861 TCudaMatrix<AFloat> tmp2(tmp);
862 TCudaTensor<AFloat> tA(tmp2);
863 Copy(B, tA);
864 }
865 } else {
866 // case of same layout (column major)
867 TMatrixT<AFloat> tmp = A;
868 TCudaMatrix<AFloat> tmp2(tmp);
869 TCudaTensor<AFloat> tA(tmp2);
870 Copy(B, tA);
871 }
872}
873
874//____________________________________________________________________________
875template <typename AFloat>
876template <typename AMatrix>
877void TCudnn<AFloat>::CopyWeightsDiffArch(TCudaTensor<AFloat> &B, const AMatrix &A)
878{
879 // copy from another architecture using the reference one
880 // this is not very efficient since creates temporary objects
881 TMatrixT<AFloat> tmp = A; // .GetMatrix();
882 // we need to traspose for different layout
883 if (B.GetLayout() == GetTensorLayout() ) {
884 // this is for CNN weights that are in row-major formats
885 //assert(B.GetShape().size() == 4); // weights shape should be 4
886 tmp.T();
887 }
888 TCudaMatrix<AFloat> tmp2(tmp);
889 TCudaTensor<AFloat> tA(tmp2);
890 Copy(B, tA);
891}
892
893//____________________________________________________________________________
894template <typename AFloat>
895template <typename AMatrix_t>
896void TCudnn<AFloat>::CopyDiffArch(std::vector<Tensor_t> &B,
897 const std::vector<AMatrix_t> &A)
898{
899 for (size_t i = 0; i < B.size(); ++i) {
900 CopyWeightsDiffArch(B[i], A[i]);
901 }
902}
903
904template <typename AFloat>
905void TCudnn<AFloat>::PrintTensor(const typename TCudnn<AFloat>::Tensor_t & A, const std::string name, bool truncate )
906{
907 std::cout << name << " size = " << A.GetSize() << " shape = { ";
908 auto shape = A.GetShape();
909 for (size_t k = 0; k < shape.size()-1; ++k)
910 std::cout << shape[k] << " , ";
911 std::cout << shape.back() << " } ";
912 std::cout << " strides = { ";
913 auto strides = A.GetStrides();
914 for (size_t k = 0; k < strides.size()-1; ++k)
915 std::cout << strides[k] << " , ";
916 std::cout << strides.back() << " }\n ";
917
918 if (A.GetShape().size() == 2 ) {
919 for (size_t i = 0; i < A.GetShape()[0]; ++i) {
920 std::cout << "{ ";
921 size_t n = A.GetShape()[1];
922 if (truncate) n = std::min(n,size_t(10));
923 for (size_t j = 0; j < n; ++j) {
924 std::cout << A(i,j) << " ";
925
926 }
927 if (truncate && n < A.GetShape()[1]) std::cout << " ...... ";
928 std::cout << " } " << std::endl;
929 }
930 } else if (A.GetShape().size() == 3 ) {
931 for (size_t i = 0; i < A.GetFirstSize(); ++i) {
932 std::cout << "{ ";
933 for (size_t j = 0; j < A.GetHSize(); ++j) {
934 std::cout << "{ ";
935 size_t n = A.GetWSize();
936 if (truncate) n = std::min(n,size_t(10));
937 for (size_t k = 0; k < n; ++k) {
938 std::cout << A(i,j,k) << " ";
939 }
940 if (truncate && n < A.GetWSize()) std::cout << " ...... ";
941 std::cout << " } " << std::endl;
942 }
943 std::cout << " } " << std::endl;
944 }
945 } else if (A.GetShape().size() == 4 ) {
946 for (size_t i = 0; i < A.GetShape()[0]; ++i) {
947 std::cout << "{ ";
948 for (size_t j = 0; j < A.GetShape()[1]; ++j) {
949 std::cout << "{ ";
950 for (size_t k = 0; k < A.GetShape()[2]; ++k) {
951 size_t n = A.GetShape()[3];
952 if (truncate) n = std::min(n,size_t(10));
953 for (size_t l = 0; l < n; ++l) {
954 std::cout << A(i,j,k,l) << " ";
955 }
956 if (truncate && n < A.GetShape()[3]) std::cout << " ...... ";
957 std::cout << " } " << std::endl;
958 }
959 std::cout << " } " << std::endl;
960 }
961 std::cout << " } " << std::endl;
962 }
963 }
964 else {
965 for (size_t l = 0; l < A.GetSize(); ++l) {
966 std::cout << A.GetData()[l] << " ";
967 }
968 std::cout << "\n";
969 }
970}
971
972template <typename AFloat>
973void TCudnn<AFloat>::PrintTensor4dDescriptor(TensorDescriptor_t descriptor) {
974 int n, c, h, w = 0;
975 int s1, s2, s3, s4 = 0;
976 cudnnDataType_t dataType;
977 cudnnGetTensor4dDescriptor(descriptor, &dataType, &n, &c, &h, &w, &s1, &s2, &s3, &s4);
978 std::cout << "Descriptor for 4d tensor of shape { " << n << " , " << c << " , " << h << " , " << w << " }"
979 << " and strides { " << s1 << " , " << s2 << " , " << s3 << " , " << s4 << " }" << std::endl;
980}
981template <typename AFloat>
982void TCudnn<AFloat>::PrintTensorNdDescriptor(TensorDescriptor_t descriptor, int ndim)
983{
984 int n = 0;
985 std::vector<int> dims(ndim);
986 std::vector<int> strides(ndim);
987 cudnnDataType_t dataType;
988 cudnnGetTensorNdDescriptor(descriptor, ndim, &dataType, &n, dims.data(), strides.data());
989 dims.resize(n);
990 strides.resize(n);
991 std::cout << "Descriptor for Nd tensor of dim = " << n << " shape { ";
992 for (auto d : dims)
993 std::cout << d << " , ";
994 std::cout << "} and strides { ";
995 for (auto s : strides)
996 std::cout << s << " , ";
997 std::cout << " }" << std::endl;
998}
999
1000// initialize the CNN options
1001// possible options for forward (from 0 to 7)
1002//
1003// 0 : CUDNN_CONVOLUTION_FWD_ALGO_IMPLICIT_GEMM;
1004// 1 : CUDNN_CONVOLUTION_FWD_ALGO_IMPLICIT_PRECOMP_GEMM;
1005// 6 : CUDNN_CONVOLUTION_FWD_ALGO_WINOGRAD;
1006// 7 : CUDNN_CONVOLUTION_FWD_ALGO_WINOGRAD_NONFUSED; (lots of memory)
1007
1008// for backward data (from 0 to 5)
1009// 1 : CUDNN_CONVOLUTION_BWD_DATA_ALGO_1;
1010// 5 CUDNN_CONVOLUTION_BWD_DATA_ALGO_WINOGRAD_NONFUSED;
1011
1012template <typename AFloat>
1013int TCudnn<AFloat>::CNNOptions::ConvFwdAlgorithm = -1;
1014template <typename AFloat>
1015int TCudnn<AFloat>::CNNOptions::ConvBwdDataAlgorithm = -1;
1016template <typename AFloat>
1017int TCudnn<AFloat>::CNNOptions::ConvBwdFilterAlgorithm = -1;
1018template <typename AFloat>
1019Long_t TCudnn<AFloat>::CNNOptions::ConvMaxWorkspaceSize = -1; // -1 let use Cudnn defaults
1020
1021} // namespace DNN
1022} // namespace TMVA
1023
1024#endif
1025#endif
#define d(i)
Definition: RSha256.hxx:102
#define c(i)
Definition: RSha256.hxx:101
#define s1(x)
Definition: RSha256.hxx:91
#define h(i)
Definition: RSha256.hxx:106
long Long_t
Definition: RtypesCore.h:52
char name[80]
Definition: TGX11.cxx:109
static void AdamUpdate(Matrix_t &A, const Matrix_t &M, const Matrix_t &V, Scalar_t alpha, Scalar_t eps)
static void ReciprocalElementWise(Matrix_t &A)
Reciprocal each element of the matrix A and write the result into A.
static void ConstAdd(Matrix_t &A, Scalar_t beta)
Add the constant beta to all the elements of matrix A and write the result into A.
static void AdamUpdateFirstMom(Matrix_t &A, const Matrix_t &B, Scalar_t beta)
static void AddL1RegularizationGradients(Matrix_t &A, const Matrix_t &W, Scalar_t weightDecay)
static void AdamUpdateSecondMom(Matrix_t &A, const Matrix_t &B, Scalar_t beta)
static void SquareElementWise(Matrix_t &A)
Square each element of the matrix A and write the result into A.
static Scalar_t L2Regularization(const Matrix_t &W)
static void Hadamard(Tensor_t &A, const Tensor_t &B)
In-place Hadamard (element-wise) product of matrices A and B with the result being written into A.
static void SqrtElementWise(Matrix_t &A)
Square root each element of the matrix A and write the result into A.
static void ConstMult(Matrix_t &A, Scalar_t beta)
Multiply the constant beta to all the elements of matrix A and write the result into A.
static Scalar_t L1Regularization(const Matrix_t &W)
TMatrixT.
Definition: TMatrixT.h:39
virtual const Element * GetMatrixArray() const
Definition: TMatrixT.h:222
TMatrixT< Element > & T()
Definition: TMatrixT.h:150
This is the base class for the ROOT Random number generators.
Definition: TRandom.h:27
double beta(double x, double y)
Calculates the beta function.
Double_t y[n]
Definition: legend1.C:17
Double_t x[n]
Definition: legend1.C:17
const Int_t n
Definition: legend1.C:16
static double B[]
static double A[]
static double C[]
double gamma(double x)
void Copy(void *source, void *dest)
T Sum(const RVec< T > &v)
Sum elements of an RVec.
Definition: RVec.hxx:759
static constexpr double s
static constexpr double L
struct TMVA::DNN::CNN::TConvParams TConvParams
std::shared_ptr< std::function< double(double)> > Tanh
Definition: NeuralNet.cxx:29
double weightDecay(double error, ItWeight itWeight, ItWeight itWeightEnd, double factorWeightDecay, EnumRegularization eRegularization)
compute the weight decay for regularization (L1 or L2)
Definition: NeuralNet.icc:498
EActivationFunction
Enum that represents layer activation functions.
Definition: Functions.h:32
std::shared_ptr< std::function< double(double)> > Gauss
Definition: NeuralNet.cxx:12
std::shared_ptr< std::function< double(double)> > Sigmoid
Definition: NeuralNet.cxx:26
std::shared_ptr< std::function< double(double)> > SoftSign
Definition: NeuralNet.cxx:32
T Identity(T value)
Identity function f(x) = x.
Definition: Objectives.hxx:41
MemoryLayout
Memory layout type (copy from RTensor.hxx)
Definition: CudaTensor.h:47
create variable transformations
auto * l
Definition: textangle.C:4
REAL epsilon
Definition: triangle.c:617
static void output(int code)
Definition: gifencode.c:226