Logo ROOT  
Reference Guide
Cpu.h
Go to the documentation of this file.
1// @(#)root/tmva/tmva/dnn:$Id$
2// Author: Simon Pfreundschuh 05/07/16
3
4/*************************************************************************
5 * Copyright (C) 2016, Simon Pfreundschuh *
6 * All rights reserved. *
7 * *
8 * For the licensing terms see $ROOTSYS/LICENSE. *
9 * For the list of contributors see $ROOTSYS/README/CREDITS. *
10 *************************************************************************/
11
12 //////////////////////////////////////////////////////////////////
13 // Definition of the TCpu architecture, which provides a //
14 // multi-threaded CPU implementation of the low-level interface //
15 // networks for Cpus using BLAS and Roots TThreadExecutor //
16 //////////////////////////////////////////////////////////////////
17
18#ifndef TMVA_DNN_ARCHITECTURES_CPU
19#define TMVA_DNN_ARCHITECTURES_CPU
20
21#include "TMVA/DNN/Functions.h"
23//#include "TMVA/DNN/CNN/Descriptors.h"
29
33
34#include <vector>
35#include <string>
36
37class TRandom;
38
39namespace TMVA
40{
41namespace DNN
42{
43 //class EActivationFunction;
44 struct DummyDescriptor {};
52 struct DummyDataType {};
53
55
56/** The TCpu architecture class.
57 *
58 * Low-level interface class for multi-threaded CPU architectures. Contains as
59 * public types the declaration of the scalar, matrix and data loader types
60 * for this architecture as well as the remaining functions in the low-level
61 * interface in the form of static members.
62 */
63template<typename AReal = Float_t>
64class TCpu
65{
66private:
68public:
69 using Scalar_t = AReal;
74
81
88
89 using EmptyDescriptor_t = DummyDescriptor; // Used if a descriptor is not needed in a class
90
94
101
104
105
107
108 static Tensor_t CreateTensor(size_t n, size_t c, size_t h, size_t w) {
109 return Tensor_t( {c,h*w,n}, GetTensorLayout());
110 }
111 static Tensor_t CreateTensor(DeviceBuffer_t buffer, size_t n, size_t c, size_t h, size_t w) {
112 return Tensor_t( buffer, {c,h*w,n}, GetTensorLayout());
113 }
114 static Tensor_t CreateTensor(size_t b, size_t t, size_t w)
115 {
116 return Tensor_t({t, w, b}, GetTensorLayout());
117 }
118 static Tensor_t CreateTensor(DeviceBuffer_t buffer, size_t b, size_t t, size_t w)
119 {
120 return Tensor_t(buffer, {t, w, b}, GetTensorLayout());
121 }
122 // create a weight tensor/matrix vector from another tensor/weight vector using the given tensor shapes
123 // this function is used by the optimizers to stgore intermidiate weights representations
124 static void CreateWeightTensors( std::vector<Matrix_t> & newWeights, const std::vector<Matrix_t> & weights) {
125 if (!newWeights.empty()) newWeights.clear();
126 size_t n = weights.size();
127 for (size_t i = 0; i < n; ++i)
128 newWeights.emplace_back( weights[i].GetNrows(), weights[i].GetNcols());
129 }
130
131 static bool IsCudnn() { return false; }
132 //____________________________________________________________________________
133 //
134 // Architecture Initialization
135 //____________________________________________________________________________
136
137 /** Initialize CNN data/operator descriptors. Not used at the moment.*/
138
139 static void InitializeBNormDescriptors(TDescriptors * & /*descriptors*/,
140 BNormLayer_t * /*L = nullptr*/) {}
141
142 static void InitializeConvDescriptors(TDescriptors * & /*descriptors*/,
143 ConvLayer_t * /*L = nullptr*/) {}
144 static void InitializePoolDescriptors(TDescriptors * & /*descriptors*/,
145 PoolingLayer_t * /*L = nullptr*/) {}
146 static void InitializeRNNDescriptors(TDescriptors *& /*descriptors*/, GenLayer_t * /*L*/) {}
147 static void InitializeLSTMDescriptors(TDescriptors *& /*descriptors*/, GenLayer_t * /*L*/) {}
148 static void InitializeGRUDescriptors(TDescriptors *& /*descriptors*/, GenLayer_t * /*L*/) {}
149
150 static void InitializeActivationDescriptor(ActivationDescriptor_t &/*descriptors*/, EActivationFunction /*activFunc */ , double /*coef*/ = 0.0) {}
151
152 /** Release CNN data/operator descriptors. Not used at the moment.*/
153 static void ReleaseConvDescriptors(TDescriptors * & /*descriptors*/) {}
154 static void ReleasePoolDescriptors(TDescriptors * & /*descriptors*/) {}
155 static void ReleaseBNormDescriptors(TDescriptors * & /*descriptors*/) {}
156 static void ReleaseRNNDescriptors(TDescriptors *& /*descriptors*/) {}
157
158 static void InitializeConvWorkspace(TWorkspace * & /*workspace*/,
159 TDescriptors * & /*descriptors*/,
160 const DNN::CNN::TConvParams & /*params*/,
161 ConvLayer_t * /*L = nullptr*/) {}
162 static void InitializePoolDropoutWorkspace(TWorkspace * & /*workspace*/,
163 TDescriptors * & /*descriptors*/,
164 const DNN::CNN::TConvParams & /*params*/,
165 PoolingLayer_t * /*L = nullptr*/) {}
166 static void InitializeRNNWorkspace(TWorkspace *& /*workspace*/, TDescriptors *& /*descriptors*/, GenLayer_t * /*L*/) {}
167 static void InitializeLSTMWorkspace(TWorkspace *& /*workspace*/, TDescriptors *& /*descriptors*/, GenLayer_t * /*L*/){}
168 static void InitializeGRUWorkspace(TWorkspace *& /*workspace*/, TDescriptors *& /*descriptors*/, GenLayer_t * /*L*/){}
169
170 static void FreeConvWorkspace(TWorkspace * & /*workspace*/) {} ///< Only used for certain cudnn on-device memory
171 static void FreePoolDropoutWorkspace(TWorkspace * & /*workspace*/) {}
172 static void FreeRNNWorkspace(TWorkspace *& /*workspace*/) {}
173
174 static void ReleaseDescriptor(ActivationDescriptor_t & /* activationDescr */) {}
175
176 static void InitializeRNNTensors(GenLayer_t * /*layer*/) {}
177 static void InitializeLSTMTensors(GenLayer_t * /*layer*/) {}
178 static void InitializeGRUTensors(GenLayer_t * /*layer*/) {}
179
180 //____________________________________________________________________________
181 //
182 // Propagation
183 //____________________________________________________________________________
184
185 /** @name Forward Propagation
186 * Low-level functions required for the forward propagation of activations
187 * through the network.
188 */
189 ///@{
190 /** Matrix-multiply \p input with the transpose of \pweights and
191 * write the results into \p output. */
192 static void MultiplyTranspose(Matrix_t &output, const Matrix_t &input, const Matrix_t &weights);
193
194 static void MultiplyTranspose(Tensor_t &output, const Tensor_t &input, const Matrix_t &weights) {
195 Matrix_t output_matrix = output.GetMatrix();
196 MultiplyTranspose( output_matrix, input.GetMatrix(), weights);
197 //ensor_t::MatrixToTensor(output_matrix, output); // this maybe is not needed
198 }
199
200 /** Add the vectors biases row-wise to the matrix output */
201 static void AddRowWise(Matrix_t &output,const Matrix_t &biases);
202
203 static void AddRowWise(Tensor_t &output, const Matrix_t &biases) {
204 Matrix_t output_matrix = output.GetMatrix();
205 AddRowWise(output_matrix, biases);
206 //Tensor_t::MatrixToTensor(output_matrix, output); // this maybe is not needed
207 }
208
209 /** @name Backward Propagation (Dense Layers)
210 * Low-level functions required for the forward propagation of activations
211 * through the network.
212 */
213 ///@{
214 /** Perform the complete backward propagation step. If the provided
215 * \p activationGradientsBackward matrix is not empty, compute the
216 * gradients of the objective function with respect to the activations
217 * of the previous layer (backward direction).
218 * Also compute the weight and the bias gradients. Modifies the values
219 * in \p df and thus produces only a valid result, if it is applied the
220 * first time after the corresponding forward propagation has been per-
221 * formed. */
222 static void Backward(Tensor_t & activationGradientsBackward,
223 Matrix_t & weightGradients,
224 Matrix_t & biasGradients,
225 const Tensor_t & df,
226 const Tensor_t & activationGradients,
227 const Matrix_t & weights,
228 const Tensor_t & activationBackward);
229
230
231 /** Adds a the elements in matrix B scaled by c to the elements in
232 * the matrix A. This is required for the weight update in the gradient
233 * descent step.*/
234 static void ScaleAdd(Matrix_t & A,
235 const Matrix_t & B,
236 Scalar_t beta = 1.0);
237
238 static void Copy(Matrix_t & B,
239 const Matrix_t & A);
240
241 // copy from another type of matrix
242 template<typename AMatrix_t>
243 static void CopyDiffArch(Matrix_t & B, const AMatrix_t & A);
244
245
246 /** Above functions extended to vectors */
247 static void ScaleAdd(Tensor_t & A,
248 const Tensor_t & B,
249 Scalar_t beta = 1.0);
250
251 static void Copy(Tensor_t & A,
252 const Tensor_t & B);
253
254 // copy from another tensor
255 template<typename ATensor_t>
256 static void CopyDiffArch(Tensor_t & A,
257 const ATensor_t & B);
258
259 // copy from vector of matrices of different types
260 template<typename AMatrix_t>
261 static void CopyDiffArch(std::vector<Matrix_t> & A,
262 const std::vector<AMatrix_t> & B);
263
264 ///@}
265
266 //____________________________________________________________________________
267 //
268 // Activation Functions
269 //____________________________________________________________________________
270
271 /** @name Activation Functions
272 * For each activation function, the low-level interface contains two routines.
273 * One that applies the acitvation function to a matrix and one that evaluate
274 * the derivatives of the activation function at the elements of a given matrix
275 * and writes the results into the result matrix.
276 */
277 ///@{
278 /* impl using Matrix */
279 /*inline void evaluate(Matrix_t &A, EActivationFunction f)
280 {
281 Tensor_t tA(A);
282 evaluate<TCpu<AReal>>(tA,f);
283 }*/
284
285 static void ActivationFunctionForward(Tensor_t & X, EActivationFunction activFunct,
286 const ActivationDescriptor_t activationDescr,
287 const double coef = 0.0, const Scalar_t alpha = 1,
288 const Scalar_t beta = 0);
289
290 /** Computes the gradient of the activation function */
291 static void ActivationFunctionBackward(Tensor_t & dX, const Tensor_t & Y,
292 const Tensor_t & dY, const Tensor_t & X,
293 EActivationFunction activFunct,
294 const ActivationDescriptor_t activationDescr,
295 const Scalar_t alpha = 1,
296 const Scalar_t beta = 0);
297
298 static void IdentityDerivative(Tensor_t & B,
299 const Tensor_t &A);
300
301 static void Relu(Tensor_t & B);
302 static void ReluDerivative(Tensor_t & B,
303 const Tensor_t & A);
304
305 static void Sigmoid(Tensor_t & B);
306 static void SigmoidDerivative(Tensor_t & B,
307 const Tensor_t & A);
308
309 static void Tanh(Tensor_t & B);
310 static void TanhDerivative(Tensor_t & B,
311 const Tensor_t & A);
312
313 // fast tanh (only when VDT is available)
314 static void FastTanh(Tensor_t &B);
315 static void FastTanhDerivative(Tensor_t &B, const Tensor_t &A);
316
317 static void SymmetricRelu(Tensor_t & B);
318 static void SymmetricReluDerivative(Tensor_t & B,
319 const Tensor_t & A);
320
321 static void SoftSign(Tensor_t & B);
322 static void SoftSignDerivative(Tensor_t & B,
323 const Tensor_t & A);
324
325 static void Gauss(Tensor_t & B);
326 static void GaussDerivative(Tensor_t & B,
327 const Tensor_t & A);
328 ///@}
329
330 //____________________________________________________________________________
331 //
332 // Loss Functions
333 //____________________________________________________________________________
334
335 /** @name Loss Functions
336 * Loss functions compute a scalar value given the \p output of the network
337 * for a given training input and the expected network prediction \p Y that
338 * quantifies the quality of the prediction. For each function also a routing
339 * that computes the gradients (suffixed by Gradients) must be provided for
340 * the starting of the backpropagation algorithm.
341 */
342 ///@{
343
344 static Scalar_t MeanSquaredError(const Matrix_t &Y, const Matrix_t &output,
345 const Matrix_t &weights);
346 static void MeanSquaredErrorGradients(Matrix_t &dY, const Matrix_t &Y,
347 const Matrix_t &output, const Matrix_t &weights);
348
349 /** Sigmoid transformation is implicitly applied, thus \p output should
350 * hold the linear activations of the last layer in the net. */
351 static Scalar_t CrossEntropy(const Matrix_t &Y, const Matrix_t &output,
352 const Matrix_t &weights);
353
354 static void CrossEntropyGradients(Matrix_t &dY, const Matrix_t &Y,
355 const Matrix_t &output, const Matrix_t &weights);
356
357 /** Softmax transformation is implicitly applied, thus \p output should
358 * hold the linear activations of the last layer in the net. */
359 static Scalar_t SoftmaxCrossEntropy(const Matrix_t &Y, const Matrix_t &output,
360 const Matrix_t &weights);
361 static void SoftmaxCrossEntropyGradients(Matrix_t &dY, const Matrix_t &Y,
362 const Matrix_t &output, const Matrix_t &weights);
363 ///@}
364
365 //____________________________________________________________________________
366 //
367 // Output Functions
368 //____________________________________________________________________________
369
370 /** @name Output Functions
371 * Output functions transform the activations \p output of the
372 * output layer in the network to a valid prediction \p YHat for
373 * the desired usage of the network, e.g. the identity function
374 * for regression or the sigmoid transformation for two-class
375 * classification.
376 */
377 ///@{
378 static void Sigmoid(Matrix_t &YHat,
379 const Matrix_t & );
380 static void Softmax(Matrix_t &YHat,
381 const Matrix_t & );
382 ///@}
383
384 //____________________________________________________________________________
385 //
386 // Regularization
387 //____________________________________________________________________________
388
389 /** @name Regularization
390 * For each regularization type two functions are required, one named
391 * <tt><Type>Regularization</tt> that evaluates the corresponding
392 * regularization functional for a given weight matrix and the
393 * <tt>Add<Type>RegularizationGradients</tt>, that adds the regularization
394 * component in the gradients to the provided matrix.
395 */
396 ///@{
397
398 static Scalar_t L1Regularization(const Matrix_t & W);
400 const Matrix_t & W,
402
403 static Scalar_t L2Regularization(const Matrix_t & W);
405 const Matrix_t & W,
407 ///@}
408
409 //____________________________________________________________________________
410 //
411 // Initialization
412 //____________________________________________________________________________
413
414 /** @name Initialization
415 * For each initialization method, one function in the low-level interface
416 * is provided. The naming scheme is <p>Initialize<Type></p> for a given
417 * initialization method Type.
418 */
419 ///@{
420
421 static void InitializeGauss(Matrix_t & A);
422 static void InitializeUniform(Matrix_t & A);
423 static void InitializeIdentity(Matrix_t & A);
424 static void InitializeZero(Matrix_t & A);
425 static void InitializeZero(Tensor_t &A);
426 static void InitializeGlorotNormal(Matrix_t & A);
427 static void InitializeGlorotUniform(Matrix_t & A);
428
429 // return static instance of random generator used for initialization
430 // if generator does not exist it is created the first time with a random seed (e.g. seed = 0)
431 static TRandom & GetRandomGenerator();
432 // set random seed for the static geenrator
433 // if the static geneerator does not exists it is created
434 static void SetRandomSeed(size_t seed);
435 ///@}
436
437 //____________________________________________________________________________
438 //
439 // Dropout
440 //____________________________________________________________________________
441
442 /** @name Dropout
443 */
444 ///@{
445
446 /** Apply dropout with activation probability \p p to the given
447 * tensor \p A and scale the result by reciprocal of \p p. */
448 static void DropoutForward(Tensor_t & A,
449 TDescriptors * descriptors,
450 TWorkspace * workspace,
451 Scalar_t p);
452
453 static void DropoutForward(Matrix_t & A, Scalar_t p) {
454 Tensor_t tA(A);
455 DropoutForward( tA, static_cast<TDescriptors *> (nullptr), static_cast<TWorkspace *> (nullptr), p );
456 }
457
458 // Only needed for cuDNN
459 static void DropoutBackward(Tensor_t & /*A */,
460 TDescriptors * /*descriptors */,
461 TWorkspace * /*workspace*/) {}
462 ///@}
463
464 //____________________________________________________________________________
465 //
466 // Batch Normalization
467 //____________________________________________________________________________
468
469 /** @name Batch Normalization Layer Propagation
470 */
471 ///@{
472
473 /** The input from each batch are normalized during training to have zero mean and unit variance
474 * and they are then scaled by two parameter, different for each input variable:
475 * - a scale factor \gamma gamma
476 * - an offset \beta beta */
477 static void BatchNormLayerForwardTraining(int axis, const Tensor_t &x, Tensor_t &y, Matrix_t &gamma, Matrix_t &beta,
478 Matrix_t &mean, Matrix_t &, Matrix_t &iVariance, Matrix_t &runningMeans,
479 Matrix_t &runningVars, Scalar_t nTrainedBatches, Scalar_t momentum,
480 Scalar_t epsilon, const TensorDescriptor_t &bnParDescriptor);
481
482
483 /** During inference the inputs are not normalized using the batch mean but the previously computed
484 * at running mean and variance */
485 static void BatchNormLayerForwardInference(int axis, const Tensor_t &x, Matrix_t &gamma, Matrix_t &beta,
486 Tensor_t &y, const Matrix_t &runningMeans,
487 const Matrix_t &runningVars, Scalar_t epsilon,
488 const TensorDescriptor_t &);
489
490 /**
491 * */
492 static void BatchNormLayerBackward(int axis, const Tensor_t &x, const Tensor_t &dy, Tensor_t &dx,
493 Matrix_t &gamma, // Matrix_t &beta, (not needed)
494 Matrix_t &dgamma, Matrix_t &dbeta, const Matrix_t &mean, const Matrix_t &variance,
495 const Matrix_t &iVariance, Scalar_t epsilon, const TensorDescriptor_t &);
496
497 // helper function for BNorm layer
498 static Tensor_t BatchNormLayerReshapeTensor(int axis, const Tensor_t &x);
499
500 ///@}
501
502 //____________________________________________________________________________
503 //
504 // Convolutional Layer Propagation
505 //____________________________________________________________________________
506
507 /** @name Forward Propagation in Convolutional Layer
508 */
509 ///@{
510
511 /** Calculate how many neurons "fit" in the output layer, given the input as well as the layer's hyperparameters.
512 */
513 static size_t calculateDimension(size_t imgDim, size_t fltDim, size_t padding, size_t stride);
514
515 /** Transform the matrix B in local view format, suitable for
516 * convolution, and store it in matrix A */
517 static void Im2col(Matrix_t &A, const Matrix_t &B, size_t imgHeight, size_t imgWidth, size_t fltHeight,
518 size_t fltWidth, size_t strideRows, size_t strideCols, size_t zeroPaddingHeight,
519 size_t zeroPaddingWidth);
520
521 static void Im2colIndices(std::vector<int> &V, const Matrix_t &B, size_t nLocalViews, size_t imgHeight,
522 size_t imgWidth, size_t fltHeight, size_t fltWidth, size_t strideRows, size_t strideCols,
523 size_t zeroPaddingHeight, size_t zeroPaddingWidth);
524 static void Im2colFast(Matrix_t &A, const Matrix_t &B, const std::vector<int> &V);
525
526 /** Rotates the matrix \p B, which is representing a weights,
527 * and stores them in the matrix \p A. */
528 static void RotateWeights(Matrix_t &A, const Matrix_t &B, size_t filterDepth, size_t filterHeight,
529 size_t filterWidth, size_t numFilters);
530
531 /** Add the biases in the Convolutional Layer. */
532 static void AddConvBiases(Matrix_t &output, const Matrix_t &biases);
533 ///@}
534
535 /** Dummy placeholder - preparation is currently only required for the CUDA architecture. */
536 static void PrepareInternals(Tensor_t &) {}
537
538 /** Forward propagation in the Convolutional layer */
539 static void ConvLayerForward(Tensor_t &output, Tensor_t &inputActivationFunc, const Tensor_t &input,
540 const Matrix_t &weights, const Matrix_t &biases, const DNN::CNN::TConvParams &params,
541 EActivationFunction activFunc, Tensor_t & /* inputPrime */,
542 const ConvDescriptors_t & /*descriptors*/, // Empty struct for cuda architecture
543 ConvWorkspace_t & /*workspace*/); // Empty struct for cuda architecture
544 // void * cudnnWorkspace = nullptr); // Remains nullptr for cuda architecture
545
546 /** @name Backward Propagation in Convolutional Layer
547 */
548 ///@{
549
550 /** Perform the complete backward propagation step in a Convolutional Layer.
551 * If the provided \p activationGradientsBackward matrix is not empty, compute the
552 * gradients of the objective function with respect to the activations
553 * of the previous layer (backward direction).
554 * Also compute the weight and the bias gradients. Modifies the values
555 * in \p df and thus produces only a valid result, if it is applied the
556 * first time after the corresponding forward propagation has been per-
557 * formed. */
558 static void
559 ConvLayerBackward(Tensor_t &activationGradientsBackward, Matrix_t &weightGradients, Matrix_t &biasGradients,
560 Tensor_t &df, Tensor_t &activationGradients, const Matrix_t &weights,
561 const Tensor_t &activationBackward, const Tensor_t &outputTensor, EActivationFunction activFunc,
562 const ConvDescriptors_t & /*descriptors*/, ConvWorkspace_t & /*workspace*/, size_t batchSize,
563 size_t inputHeight, size_t inputWidth, size_t depth, size_t height, size_t width,
564 size_t filterDepth, size_t filterHeight, size_t filterWidth, size_t nLocalViews);
565
566 /** Utility function for calculating the activation gradients of the layer
567 * before the convolutional layer. */
568 static void CalculateConvActivationGradients(Tensor_t &activationGradientsBackward, const Tensor_t &df,
569 const Matrix_t &weights, size_t batchSize, size_t inputHeight,
570 size_t inputWidth, size_t depth, size_t height, size_t width,
571 size_t filterDepth, size_t filterHeight, size_t filterWidth);
572
573 /** Utility function for calculating the weight gradients of the convolutional
574 * layer. */
575 static void CalculateConvWeightGradients(Matrix_t &weightGradients, const Tensor_t &df,
576 const Tensor_t &activations_backward, size_t batchSize, size_t inputHeight,
577 size_t inputWidth, size_t depth, size_t height, size_t width,
578 size_t filterDepth, size_t filterHeight, size_t filterWidth,
579 size_t nLocalViews);
580
581 /** Utility function for calculating the bias gradients of the convolutional
582 * layer */
583 static void CalculateConvBiasGradients(Matrix_t &biasGradients, const Tensor_t &df, size_t batchSize, size_t depth,
584 size_t nLocalViews);
585 ///@}
586
587 //____________________________________________________________________________
588 //
589 // Max Pooling Layer Propagation
590 //____________________________________________________________________________
591 /** @name Forward Propagation in Max Pooling Layer
592 */
593 ///@{
594
595 /** Downsample the matrix \p C to the matrix \p A, using max
596 * operation, such that the winning indices are stored in matrix
597 * \p B. */
598 static void Downsample(Tensor_t &A, Tensor_t &B, const Tensor_t &C, const PoolingDescriptors_t & /*descriptors*/,
599 PoolingWorkspace_t & /*workspace*/, size_t imgHeight, size_t imgWidth, size_t fltHeight,
600 size_t fltWidth, size_t strideRows, size_t strideCols);
601
602 ///@}
603
604 /** @name Backward Propagation in Max Pooling Layer
605 */
606 ///@{
607 /** Perform the complete backward propagation step in a Pooling Layer. Based on the
608 * winning idices stored in the index matrix, it just forwards the actiovation
609 * gradients to the previous layer. */
610 static void MaxPoolLayerBackward(Tensor_t &activationGradientsBackward, const Tensor_t &activationGradients,
611 const Tensor_t &indexMatrix, const Tensor_t & /*inputActivation*/,
612 const Tensor_t & /*outputTensor*/, const PoolingDescriptors_t & /*descriptors*/,
613 PoolingWorkspace_t & /*workspace*/, size_t imgHeight, size_t imgWidth,
614 size_t fltHeight, size_t fltWidth, size_t strideRows, size_t strideCols,
615 size_t nLocalViews);
616
617 //// Recurrent Network Functions
618
619 /** Backward pass for Recurrent Networks */
620 static Matrix_t &RecurrentLayerBackward(Matrix_t &state_gradients_backward, // BxH
621 Matrix_t &input_weight_gradients, Matrix_t &state_weight_gradients,
622 Matrix_t &bias_gradients,
623 Matrix_t &df, // DxH
624 const Matrix_t &state, // BxH
625 const Matrix_t &weights_input, // HxD
626 const Matrix_t &weights_state, // HxH
627 const Matrix_t &input, // BxD
628 Matrix_t &input_gradient);
629
630 // dummy RNN functions
631 static void RNNForward(const Tensor_t & /* x */, const Matrix_t & /* hx */, const Matrix_t & /* cx */,
632 const Matrix_t & /* weights */, Tensor_t & /* y */, Matrix_t & /* hy */, Matrix_t & /* cy */,
633 const RNNDescriptors_t & /* descr */, RNNWorkspace_t & /* workspace */, bool /* isTraining */)
634 {
635 }
636
637 static void RNNBackward(const Tensor_t & /* x */, const Matrix_t & /* hx */, const Matrix_t & /* cx */,
638 const Tensor_t & /* y */, const Tensor_t & /* dy */, const Matrix_t & /* dhy */,
639 const Matrix_t & /* dcy */, const Tensor_t & /* weights */, Tensor_t & /* dx */,
640 Matrix_t & /* dhx */, Matrix_t & /* dcx */, Tensor_t & /* dw */,
641 const RNNDescriptors_t & /* desc */, RNNWorkspace_t & /* workspace */)
642 {
643 }
644
645 /** Backward pass for LSTM Network */
646 static Matrix_t & LSTMLayerBackward(TCpuMatrix<Scalar_t> & state_gradients_backward,
647 TCpuMatrix<Scalar_t> & cell_gradients_backward,
648 TCpuMatrix<Scalar_t> & input_weight_gradients,
649 TCpuMatrix<Scalar_t> & forget_weight_gradients,
650 TCpuMatrix<Scalar_t> & candidate_weight_gradients,
651 TCpuMatrix<Scalar_t> & output_weight_gradients,
652 TCpuMatrix<Scalar_t> & input_state_weight_gradients,
653 TCpuMatrix<Scalar_t> & forget_state_weight_gradients,
654 TCpuMatrix<Scalar_t> & candidate_state_weight_gradients,
655 TCpuMatrix<Scalar_t> & output_state_weight_gradients,
656 TCpuMatrix<Scalar_t> & input_bias_gradients,
657 TCpuMatrix<Scalar_t> & forget_bias_gradients,
658 TCpuMatrix<Scalar_t> & candidate_bias_gradients,
659 TCpuMatrix<Scalar_t> & output_bias_gradients,
664 const TCpuMatrix<Scalar_t> & precStateActivations,
665 const TCpuMatrix<Scalar_t> & precCellActivations,
666 const TCpuMatrix<Scalar_t> & fInput,
667 const TCpuMatrix<Scalar_t> & fForget,
668 const TCpuMatrix<Scalar_t> & fCandidate,
669 const TCpuMatrix<Scalar_t> & fOutput,
670 const TCpuMatrix<Scalar_t> & weights_input,
671 const TCpuMatrix<Scalar_t> & weights_forget,
672 const TCpuMatrix<Scalar_t> & weights_candidate,
673 const TCpuMatrix<Scalar_t> & weights_output,
674 const TCpuMatrix<Scalar_t> & weights_input_state,
675 const TCpuMatrix<Scalar_t> & weights_forget_state,
676 const TCpuMatrix<Scalar_t> & weights_candidate_state,
677 const TCpuMatrix<Scalar_t> & weights_output_state,
678 const TCpuMatrix<Scalar_t> & input,
679 TCpuMatrix<Scalar_t> & input_gradient,
680 TCpuMatrix<Scalar_t> & cell_gradient,
681 TCpuMatrix<Scalar_t> & cell_tanh);
682
683
684 /** Backward pass for GRU Network */
685 static Matrix_t & GRULayerBackward(TCpuMatrix<Scalar_t> & state_gradients_backward,
686 TCpuMatrix<Scalar_t> & reset_weight_gradients,
687 TCpuMatrix<Scalar_t> & update_weight_gradients,
688 TCpuMatrix<Scalar_t> & candidate_weight_gradients,
689 TCpuMatrix<Scalar_t> & reset_state_weight_gradients,
690 TCpuMatrix<Scalar_t> & update_state_weight_gradients,
691 TCpuMatrix<Scalar_t> & candidate_state_weight_gradients,
692 TCpuMatrix<Scalar_t> & reset_bias_gradients,
693 TCpuMatrix<Scalar_t> & update_bias_gradients,
694 TCpuMatrix<Scalar_t> & candidate_bias_gradients,
698 const TCpuMatrix<Scalar_t> & precStateActivations,
699 const TCpuMatrix<Scalar_t> & fReset,
700 const TCpuMatrix<Scalar_t> & fUpdate,
701 const TCpuMatrix<Scalar_t> & fCandidate,
702 const TCpuMatrix<Scalar_t> & weights_reset,
703 const TCpuMatrix<Scalar_t> & weights_update,
704 const TCpuMatrix<Scalar_t> & weights_candidate,
705 const TCpuMatrix<Scalar_t> & weights_reset_state,
706 const TCpuMatrix<Scalar_t> & weights_update_state,
707 const TCpuMatrix<Scalar_t> & weights_candidate_state,
708 const TCpuMatrix<Scalar_t> & input,
709 TCpuMatrix<Scalar_t> & input_gradient,
710 bool resetGateAfter);
711
712
713 ///@}
714
715 //____________________________________________________________________________
716 //
717 // Reshape Layer Propagation
718 //____________________________________________________________________________
719 /** @name Forward and Backward Propagation in Reshape Layer
720 */
721 ///@{
722
723 /** Transform the matrix \p B to a matrix with different dimensions \p A */
724 static void Reshape(Matrix_t &A, const Matrix_t &B);
725
726 /** Flattens the tensor \p B, such that each matrix, is stretched in
727 * one row, resulting with a matrix \p A. */
728 static void Flatten(Tensor_t &A, const Tensor_t &B); // size_t size, size_t nRows, size_t nCols);
729
730 /** Transforms each row of \p B to a matrix and stores it in the
731 * tensor \p B. */
732 static void Deflatten(Tensor_t &A, const Tensor_t &B); // size_t index, size_t nRows,size_t nCols);
733
734 /** Rearrage data accoring to time fill B x T x D out with T x B x D matrix in*/
735 static void Rearrange(Tensor_t &out, const Tensor_t &in);
736
737
738 ///@}
739
740 //____________________________________________________________________________
741 //
742 // Additional Arithmetic Functions
743 //____________________________________________________________________________
744
745 /** @name Additional Arithmetic Functions
746 *
747 * Additional arithmetic on CUDA matrices used to implement the low-level
748 * interface.
749 */
750 ///@{
751
752 /** Standard multiplication of two matrices \p A and \p B with the result being
753 * written into C.
754 */
755 static void Multiply(Matrix_t &C, const Matrix_t &A, const Matrix_t &B);
756 /** Matrix multiplication of two matrices \p A and \p B^T (transposed) with the
757 * result being written into C.
758 */
759 static void TransposeMultiply(Matrix_t &output, const Matrix_t &input, const Matrix_t &Weights, Scalar_t alpha = 1.0,
760 Scalar_t beta = 0.);
761 /** In-place Hadamard (element-wise) product of matrices \p A and \p B
762 * with the result being written into \p A.
763 */
764 static void Hadamard(Tensor_t &A, const Tensor_t &B);
765 static void Hadamard(Matrix_t &A, const Matrix_t &B);
766 // {
767 // Tensor_t tA(A);
768 // Hadamard( tA, Tensor_t(B));
769 // }
770
771 /** Sum columns of (m x n) matrixx \p A and write the results into the first
772 * m elements in \p A.
773 */
774 static void SumColumns(Matrix_t &B, const Matrix_t &A, Scalar_t alpha = 1.0, Scalar_t beta = 0.);
775
776 /** Compute the sum of all elements in \p A */
777 static Scalar_t Sum(const Matrix_t &A);
778
779 /** Check two matrices for equality, taking floating point arithmetic errors into account. */
780 static bool AlmostEquals(const Matrix_t &A, const Matrix_t &B, double epsilon = 0.1);
781
782 /** Add the constant \p beta to all the elements of matrix \p A and write the
783 * result into \p A.
784 */
785 static void ConstAdd(Matrix_t &A, Scalar_t beta);
786
787 /** Multiply the constant \p beta to all the elements of matrix \p A and write the
788 * result into \p A.
789 */
790 static void ConstMult(Matrix_t &A, Scalar_t beta);
791
792 /** Reciprocal each element of the matrix \p A and write the result into
793 * \p A
794 */
795 static void ReciprocalElementWise(Matrix_t &A);
796
797 /** Square each element of the matrix \p A and write the result into
798 * \p A
799 */
800 static void SquareElementWise(Matrix_t &A);
801
802 /** Square root each element of the matrix \p A and write the result into
803 * \p A
804 */
805 static void SqrtElementWise(Matrix_t &A);
806
807 // optimizer functions
808 static void AdamUpdate(Matrix_t &A, const Matrix_t &M, const Matrix_t &V, Scalar_t alpha, Scalar_t eps);
809 static void AdamUpdateFirstMom(Matrix_t &A, const Matrix_t &B, Scalar_t beta);
810 static void AdamUpdateSecondMom(Matrix_t &A, const Matrix_t &B, Scalar_t beta);
811
812 // printing of tensor
813 static void PrintTensor(const Tensor_t &A, const std::string name = "Cpu-tensor", bool truncate = false);
814
815};
816
817//____________________________________________________________________________
818template <typename AReal>
819template <typename AMatrix_t>
821 const AMatrix_t &A)
822{
823 // copy from another architecture using the reference one
824 // this is not very efficient since creates temporary objects
825 TMatrixT<AReal> tmp = A; // this works also if A is a tensor
826 Copy(B, TCpuMatrix<AReal>(tmp) );
827}
828
829//____________________________________________________________________________
830template <typename AReal>
831template <typename ATensor_t>
833 const ATensor_t &A)
834{
835
836 R__ASSERT(A.GetSize() == B.GetSize());
837 // suppose A is of (B,D,H.W) and we want to convert to B,HW,D or (D,HW,B) in ColumnMajor format
838 for (size_t i = 0; i < A.GetFirstSize(); ++i) {
839 TMatrixT<AReal> tmpIn = A.At(i); // this convert tensor (B,D,H,W) in (D,H,W)i -> (D,HW)i
840
841 TCpuMatrix<AReal> tmpOut = B.At(i).GetMatrix(); // matrix (D,HW)
842 Copy(tmpOut, TCpuMatrix<AReal>(tmpIn));
843 }
844
845 // ATensor_t tmpIn = A.Reshape({A.GetNrows(), A.GetNcols()});
846 // auto tmpOut = B.Reshape({A.GetNrows(), A.GetNcols()});
847 // Matrix_t mOut = tmpOut.GetMatrix();
848 // CopyDiffArch(mOut, tmpIn.GetMatrix());
849}
850
851// Implementation using vector of matrices for the weights
852template <typename AReal>
853template <typename AMatrix_t>
854void TCpu<AReal>::CopyDiffArch(std::vector<TCpuMatrix<AReal>> &A, const std::vector<AMatrix_t> &B)
855{
856 for (size_t i = 0; i < A.size(); ++i) {
857 CopyDiffArch(A[i], B[i]);
858 }
859}
860
861template <typename AReal>
862void TCpu<AReal>::PrintTensor(const typename TCpu<AReal>::Tensor_t & A, const std::string name, bool truncate )
863{
864 std::cout << name << " size = " << A.GetSize() << " shape = { ";
865 auto shape = A.GetShape();
866 for (size_t k = 0; k < shape.size()-1; ++k)
867 std::cout << shape[k] << " , ";
868 std::cout << shape.back() << " } ";
869
870 // print elements
871 // need to find way to nice printing all elements
872 std::cout << " tensor count " << A.GetBufferUseCount() << std::endl;
873 if (A.GetShape().size() == 2 ) {
874 for (size_t i = 0; i < A.GetShape()[0]; ++i) {
875 std::cout << "{ ";
876 size_t n = A.GetShape()[1];
877 if (truncate) n = std::min(n,size_t(10));
878 for (size_t j = 0; j < n; ++j) {
879 std::cout << A(i,j) << " ";
880 }
881 if (truncate && n < A.GetShape()[1]) std::cout << " ...... ";
882 std::cout << " } " << std::endl;
883 }
884 } else if (A.GetShape().size() == 3 ) {
885 for (size_t i = 0; i < A.GetFirstSize(); ++i) {
886 std::cout << "{ ";
887 for (size_t j = 0; j < A.GetHSize(); ++j) {
888 std::cout << "{ ";
889 size_t n = A.GetWSize();
890 if (truncate) n = std::min(n,size_t(10));
891 for (size_t k = 0; k < n; ++k) {
892 std::cout << A(i,j,k) << " ";
893 }
894 if (truncate && n < A.GetWSize()) std::cout << " ...... ";
895 std::cout << " } " << std::endl;
896 }
897 std::cout << " } " << std::endl;
898 }
899 }
900 else {
901 for (size_t l = 0; l < A.GetSize(); ++l) {
902 std::cout << A.GetData()[l] << " ";
903 }
904 std::cout << "\n";
905 }
906}
907
908
909
910
911} // namespace DNN
912} // namespace TMVA
913
914#endif
#define b(i)
Definition: RSha256.hxx:100
#define c(i)
Definition: RSha256.hxx:101
#define h(i)
Definition: RSha256.hxx:106
include TDocParser_001 C image html pict1_TDocParser_001 png width
Definition: TDocParser.cxx:121
#define R__ASSERT(e)
Definition: TError.h:118
char name[80]
Definition: TGX11.cxx:110
Generic Max Pooling Layer class.
Definition: MaxPoolLayer.h:59
Layer implementing Batch Normalization.
The TCpuMatrix class.
Definition: CpuMatrix.h:86
TCpuMatrix< AFloat > GetMatrix() const
Definition: CpuTensor.h:197
The TCpu architecture class.
Definition: Cpu.h:65
static void CalculateConvBiasGradients(Matrix_t &biasGradients, const Tensor_t &df, size_t batchSize, size_t depth, size_t nLocalViews)
Utility function for calculating the bias gradients of the convolutional layer.
static void Deflatten(Tensor_t &A, const Tensor_t &B)
Transforms each row of B to a matrix and stores it in the tensor B.
static void FastTanh(Tensor_t &B)
static void TransposeMultiply(Matrix_t &output, const Matrix_t &input, const Matrix_t &Weights, Scalar_t alpha=1.0, Scalar_t beta=0.)
Matrix multiplication of two matrices A and B^T (transposed) with the result being written into C.
Definition: Arithmetic.hxx:77
static TRandom * fgRandomGen
Definition: Cpu.h:67
static Tensor_t CreateTensor(DeviceBuffer_t buffer, size_t n, size_t c, size_t h, size_t w)
Definition: Cpu.h:111
static void Gauss(Tensor_t &B)
CNN::TCNNWorkspace< PoolingLayer_t > PoolingWorkspace_t
Definition: Cpu.h:100
static Scalar_t L1Regularization(const Matrix_t &W)
static void MaxPoolLayerBackward(Tensor_t &activationGradientsBackward, const Tensor_t &activationGradients, const Tensor_t &indexMatrix, const Tensor_t &, const Tensor_t &, const PoolingDescriptors_t &, PoolingWorkspace_t &, size_t imgHeight, size_t imgWidth, size_t fltHeight, size_t fltWidth, size_t strideRows, size_t strideCols, size_t nLocalViews)
Perform the complete backward propagation step in a Pooling Layer.
static void ScaleAdd(Matrix_t &A, const Matrix_t &B, Scalar_t beta=1.0)
Adds a the elements in matrix B scaled by c to the elements in the matrix A.
Definition: Arithmetic.hxx:248
AReal Scalar_t
Definition: Cpu.h:69
static void AddL1RegularizationGradients(Matrix_t &A, const Matrix_t &W, Scalar_t weightDecay)
static void InitializeLSTMTensors(GenLayer_t *)
Definition: Cpu.h:177
static void AddRowWise(Tensor_t &output, const Matrix_t &biases)
Definition: Cpu.h:203
static void ConstAdd(Matrix_t &A, Scalar_t beta)
Add the constant beta to all the elements of matrix A and write the result into A.
Definition: Arithmetic.hxx:302
DummyDescriptor TensorDescriptor_t
Definition: Cpu.h:80
CNN::TCNNDescriptors< PoolingLayer_t > PoolingDescriptors_t
Definition: Cpu.h:99
static void SumColumns(Matrix_t &B, const Matrix_t &A, Scalar_t alpha=1.0, Scalar_t beta=0.)
Sum columns of (m x n) matrixx A and write the results into the first m elements in A.
Definition: Arithmetic.hxx:212
static void Sigmoid(Tensor_t &B)
static void ConvLayerForward(Tensor_t &output, Tensor_t &inputActivationFunc, const Tensor_t &input, const Matrix_t &weights, const Matrix_t &biases, const DNN::CNN::TConvParams &params, EActivationFunction activFunc, Tensor_t &, const ConvDescriptors_t &, ConvWorkspace_t &)
Forward propagation in the Convolutional layer.
static Tensor_t CreateTensor(DeviceBuffer_t buffer, size_t b, size_t t, size_t w)
Definition: Cpu.h:118
static void DropoutBackward(Tensor_t &, TDescriptors *, TWorkspace *)
Definition: Cpu.h:459
static Scalar_t Sum(const Matrix_t &A)
Compute the sum of all elements in A.
static void InitializeLSTMWorkspace(TWorkspace *&, TDescriptors *&, GenLayer_t *)
Definition: Cpu.h:167
CNN::TCNNWorkspace< ConvLayer_t > ConvWorkspace_t
Definition: Cpu.h:97
static void SoftSign(Tensor_t &B)
static void Sigmoid(Matrix_t &YHat, const Matrix_t &)
CNN::TCNNDescriptors< ConvLayer_t > ConvDescriptors_t
Definition: Cpu.h:96
TCpuTensor< AReal > Tensor_t
Definition: Cpu.h:70
static void SymmetricReluDerivative(Tensor_t &B, const Tensor_t &A)
static void InitializeBNormDescriptors(TDescriptors *&, BNormLayer_t *)
Initialize CNN data/operator descriptors.
Definition: Cpu.h:139
static bool AlmostEquals(const Matrix_t &A, const Matrix_t &B, double epsilon=0.1)
Check two matrices for equality, taking floating point arithmetic errors into account.
Definition: Arithmetic.hxx:194
static void Hadamard(Tensor_t &A, const Tensor_t &B)
In-place Hadamard (element-wise) product of matrices A and B with the result being written into A.
Definition: Arithmetic.hxx:152
static void InitializeIdentity(Matrix_t &A)
static void ReleasePoolDescriptors(TDescriptors *&)
Definition: Cpu.h:154
static void InitializePoolDropoutWorkspace(TWorkspace *&, TDescriptors *&, const DNN::CNN::TConvParams &, PoolingLayer_t *)
Definition: Cpu.h:162
static void Im2colFast(Matrix_t &A, const Matrix_t &B, const std::vector< int > &V)
static void SqrtElementWise(Matrix_t &A)
Square root each element of the matrix A and write the result into A.
Definition: Arithmetic.hxx:334
static void AddRowWise(Matrix_t &output, const Matrix_t &biases)
Add the vectors biases row-wise to the matrix output.
static void SoftmaxCrossEntropyGradients(Matrix_t &dY, const Matrix_t &Y, const Matrix_t &output, const Matrix_t &weights)
static void InitializeGRUDescriptors(TDescriptors *&, GenLayer_t *)
Definition: Cpu.h:148
static void SymmetricRelu(Tensor_t &B)
static void PrintTensor(const Tensor_t &A, const std::string name="Cpu-tensor", bool truncate=false)
Definition: Cpu.h:862
static TRandom & GetRandomGenerator()
static void MultiplyTranspose(Tensor_t &output, const Tensor_t &input, const Matrix_t &weights)
Definition: Cpu.h:194
static void RNNForward(const Tensor_t &, const Matrix_t &, const Matrix_t &, const Matrix_t &, Tensor_t &, Matrix_t &, Matrix_t &, const RNNDescriptors_t &, RNNWorkspace_t &, bool)
Definition: Cpu.h:631
static void DropoutForward(Tensor_t &A, TDescriptors *descriptors, TWorkspace *workspace, Scalar_t p)
Apply dropout with activation probability p to the given tensor A and scale the result by reciprocal ...
static void FreePoolDropoutWorkspace(TWorkspace *&)
Definition: Cpu.h:171
static Tensor_t CreateTensor(size_t b, size_t t, size_t w)
Definition: Cpu.h:114
static void Softmax(Matrix_t &YHat, const Matrix_t &)
static void CalculateConvActivationGradients(Tensor_t &activationGradientsBackward, const Tensor_t &df, const Matrix_t &weights, size_t batchSize, size_t inputHeight, size_t inputWidth, size_t depth, size_t height, size_t width, size_t filterDepth, size_t filterHeight, size_t filterWidth)
Utility function for calculating the activation gradients of the layer before the convolutional layer...
static void TanhDerivative(Tensor_t &B, const Tensor_t &A)
static void InitializeGRUWorkspace(TWorkspace *&, TDescriptors *&, GenLayer_t *)
Definition: Cpu.h:168
static void BatchNormLayerForwardTraining(int axis, const Tensor_t &x, Tensor_t &y, Matrix_t &gamma, Matrix_t &beta, Matrix_t &mean, Matrix_t &, Matrix_t &iVariance, Matrix_t &runningMeans, Matrix_t &runningVars, Scalar_t nTrainedBatches, Scalar_t momentum, Scalar_t epsilon, const TensorDescriptor_t &bnParDescriptor)
The input from each batch are normalized during training to have zero mean and unit variance and they...
static void Tanh(Tensor_t &B)
static void Multiply(Matrix_t &C, const Matrix_t &A, const Matrix_t &B)
Standard multiplication of two matrices A and B with the result being written into C.
Definition: Arithmetic.hxx:42
DummyDescriptor ActivationDescriptor_t
Definition: Cpu.h:75
static void Backward(Tensor_t &activationGradientsBackward, Matrix_t &weightGradients, Matrix_t &biasGradients, const Tensor_t &df, const Tensor_t &activationGradients, const Matrix_t &weights, const Tensor_t &activationBackward)
Perform the complete backward propagation step.
static void InitializeUniform(Matrix_t &A)
static void ActivationFunctionForward(Tensor_t &X, EActivationFunction activFunct, const ActivationDescriptor_t activationDescr, const double coef=0.0, const Scalar_t alpha=1, const Scalar_t beta=0)
static void SoftSignDerivative(Tensor_t &B, const Tensor_t &A)
static void AdamUpdateSecondMom(Matrix_t &A, const Matrix_t &B, Scalar_t beta)
Definition: Arithmetic.hxx:369
static void Copy(Matrix_t &B, const Matrix_t &A)
Definition: Arithmetic.hxx:269
static void ReleaseBNormDescriptors(TDescriptors *&)
Definition: Cpu.h:155
static void SetRandomSeed(size_t seed)
static void FreeConvWorkspace(TWorkspace *&)
Only used for certain cudnn on-device memory.
Definition: Cpu.h:170
static Matrix_t & LSTMLayerBackward(TCpuMatrix< Scalar_t > &state_gradients_backward, TCpuMatrix< Scalar_t > &cell_gradients_backward, TCpuMatrix< Scalar_t > &input_weight_gradients, TCpuMatrix< Scalar_t > &forget_weight_gradients, TCpuMatrix< Scalar_t > &candidate_weight_gradients, TCpuMatrix< Scalar_t > &output_weight_gradients, TCpuMatrix< Scalar_t > &input_state_weight_gradients, TCpuMatrix< Scalar_t > &forget_state_weight_gradients, TCpuMatrix< Scalar_t > &candidate_state_weight_gradients, TCpuMatrix< Scalar_t > &output_state_weight_gradients, TCpuMatrix< Scalar_t > &input_bias_gradients, TCpuMatrix< Scalar_t > &forget_bias_gradients, TCpuMatrix< Scalar_t > &candidate_bias_gradients, TCpuMatrix< Scalar_t > &output_bias_gradients, TCpuMatrix< Scalar_t > &di, TCpuMatrix< Scalar_t > &df, TCpuMatrix< Scalar_t > &dc, TCpuMatrix< Scalar_t > &dout, const TCpuMatrix< Scalar_t > &precStateActivations, const TCpuMatrix< Scalar_t > &precCellActivations, const TCpuMatrix< Scalar_t > &fInput, const TCpuMatrix< Scalar_t > &fForget, const TCpuMatrix< Scalar_t > &fCandidate, const TCpuMatrix< Scalar_t > &fOutput, const TCpuMatrix< Scalar_t > &weights_input, const TCpuMatrix< Scalar_t > &weights_forget, const TCpuMatrix< Scalar_t > &weights_candidate, const TCpuMatrix< Scalar_t > &weights_output, const TCpuMatrix< Scalar_t > &weights_input_state, const TCpuMatrix< Scalar_t > &weights_forget_state, const TCpuMatrix< Scalar_t > &weights_candidate_state, const TCpuMatrix< Scalar_t > &weights_output_state, const TCpuMatrix< Scalar_t > &input, TCpuMatrix< Scalar_t > &input_gradient, TCpuMatrix< Scalar_t > &cell_gradient, TCpuMatrix< Scalar_t > &cell_tanh)
Backward pass for LSTM Network.
static Scalar_t L2Regularization(const Matrix_t &W)
static void CreateWeightTensors(std::vector< Matrix_t > &newWeights, const std::vector< Matrix_t > &weights)
Definition: Cpu.h:124
static void AddL2RegularizationGradients(Matrix_t &A, const Matrix_t &W, Scalar_t weightDecay)
static void InitializeGauss(Matrix_t &A)
static void Reshape(Matrix_t &A, const Matrix_t &B)
Transform the matrix B to a matrix with different dimensions A.
static void IdentityDerivative(Tensor_t &B, const Tensor_t &A)
static Matrix_t & RecurrentLayerBackward(Matrix_t &state_gradients_backward, Matrix_t &input_weight_gradients, Matrix_t &state_weight_gradients, Matrix_t &bias_gradients, Matrix_t &df, const Matrix_t &state, const Matrix_t &weights_input, const Matrix_t &weights_state, const Matrix_t &input, Matrix_t &input_gradient)
Backward pass for Recurrent Networks.
static void Rearrange(Tensor_t &out, const Tensor_t &in)
Rearrage data accoring to time fill B x T x D out with T x B x D matrix in.
static void MultiplyTranspose(Matrix_t &output, const Matrix_t &input, const Matrix_t &weights)
Matrix-multiply input with the transpose of \pweights and write the results into output.
static void CrossEntropyGradients(Matrix_t &dY, const Matrix_t &Y, const Matrix_t &output, const Matrix_t &weights)
static void InitializeGRUTensors(GenLayer_t *)
Definition: Cpu.h:178
static void InitializeRNNDescriptors(TDescriptors *&, GenLayer_t *)
Definition: Cpu.h:146
static Matrix_t & GRULayerBackward(TCpuMatrix< Scalar_t > &state_gradients_backward, TCpuMatrix< Scalar_t > &reset_weight_gradients, TCpuMatrix< Scalar_t > &update_weight_gradients, TCpuMatrix< Scalar_t > &candidate_weight_gradients, TCpuMatrix< Scalar_t > &reset_state_weight_gradients, TCpuMatrix< Scalar_t > &update_state_weight_gradients, TCpuMatrix< Scalar_t > &candidate_state_weight_gradients, TCpuMatrix< Scalar_t > &reset_bias_gradients, TCpuMatrix< Scalar_t > &update_bias_gradients, TCpuMatrix< Scalar_t > &candidate_bias_gradients, TCpuMatrix< Scalar_t > &dr, TCpuMatrix< Scalar_t > &du, TCpuMatrix< Scalar_t > &dc, const TCpuMatrix< Scalar_t > &precStateActivations, const TCpuMatrix< Scalar_t > &fReset, const TCpuMatrix< Scalar_t > &fUpdate, const TCpuMatrix< Scalar_t > &fCandidate, const TCpuMatrix< Scalar_t > &weights_reset, const TCpuMatrix< Scalar_t > &weights_update, const TCpuMatrix< Scalar_t > &weights_candidate, const TCpuMatrix< Scalar_t > &weights_reset_state, const TCpuMatrix< Scalar_t > &weights_update_state, const TCpuMatrix< Scalar_t > &weights_candidate_state, const TCpuMatrix< Scalar_t > &input, TCpuMatrix< Scalar_t > &input_gradient, bool resetGateAfter)
Backward pass for GRU Network.
static void RNNBackward(const Tensor_t &, const Matrix_t &, const Matrix_t &, const Tensor_t &, const Tensor_t &, const Matrix_t &, const Matrix_t &, const Tensor_t &, Tensor_t &, Matrix_t &, Matrix_t &, Tensor_t &, const RNNDescriptors_t &, RNNWorkspace_t &)
Definition: Cpu.h:637
static void CalculateConvWeightGradients(Matrix_t &weightGradients, const Tensor_t &df, const Tensor_t &activations_backward, size_t batchSize, size_t inputHeight, size_t inputWidth, size_t depth, size_t height, size_t width, size_t filterDepth, size_t filterHeight, size_t filterWidth, size_t nLocalViews)
Utility function for calculating the weight gradients of the convolutional layer.
static size_t calculateDimension(size_t imgDim, size_t fltDim, size_t padding, size_t stride)
Calculate how many neurons "fit" in the output layer, given the input as well as the layer's hyperpar...
static void BatchNormLayerBackward(int axis, const Tensor_t &x, const Tensor_t &dy, Tensor_t &dx, Matrix_t &gamma, Matrix_t &dgamma, Matrix_t &dbeta, const Matrix_t &mean, const Matrix_t &variance, const Matrix_t &iVariance, Scalar_t epsilon, const TensorDescriptor_t &)
static void InitializeConvWorkspace(TWorkspace *&, TDescriptors *&, const DNN::CNN::TConvParams &, ConvLayer_t *)
Definition: Cpu.h:158
static void ConvLayerBackward(Tensor_t &activationGradientsBackward, Matrix_t &weightGradients, Matrix_t &biasGradients, Tensor_t &df, Tensor_t &activationGradients, const Matrix_t &weights, const Tensor_t &activationBackward, const Tensor_t &outputTensor, EActivationFunction activFunc, const ConvDescriptors_t &, ConvWorkspace_t &, size_t batchSize, size_t inputHeight, size_t inputWidth, size_t depth, size_t height, size_t width, size_t filterDepth, size_t filterHeight, size_t filterWidth, size_t nLocalViews)
Perform the complete backward propagation step in a Convolutional Layer.
static void InitializePoolDescriptors(TDescriptors *&, PoolingLayer_t *)
Definition: Cpu.h:144
static void InitializeZero(Matrix_t &A)
static Tensor_t BatchNormLayerReshapeTensor(int axis, const Tensor_t &x)
static void PrepareInternals(Tensor_t &)
Dummy placeholder - preparation is currently only required for the CUDA architecture.
Definition: Cpu.h:536
static void MeanSquaredErrorGradients(Matrix_t &dY, const Matrix_t &Y, const Matrix_t &output, const Matrix_t &weights)
static Scalar_t MeanSquaredError(const Matrix_t &Y, const Matrix_t &output, const Matrix_t &weights)
static void InitializeGlorotUniform(Matrix_t &A)
Sample from a uniform distribution in range [ -lim,+lim] where lim = sqrt(6/N_in+N_out).
static void Relu(Tensor_t &B)
static void ActivationFunctionBackward(Tensor_t &dX, const Tensor_t &Y, const Tensor_t &dY, const Tensor_t &X, EActivationFunction activFunct, const ActivationDescriptor_t activationDescr, const Scalar_t alpha=1, const Scalar_t beta=0)
Computes the gradient of the activation function.
static void SquareElementWise(Matrix_t &A)
Square each element of the matrix A and write the result into A.
Definition: Arithmetic.hxx:326
static void Im2colIndices(std::vector< int > &V, const Matrix_t &B, size_t nLocalViews, size_t imgHeight, size_t imgWidth, size_t fltHeight, size_t fltWidth, size_t strideRows, size_t strideCols, size_t zeroPaddingHeight, size_t zeroPaddingWidth)
static void Flatten(Tensor_t &A, const Tensor_t &B)
Flattens the tensor B, such that each matrix, is stretched in one row, resulting with a matrix A.
static void AddConvBiases(Matrix_t &output, const Matrix_t &biases)
Add the biases in the Convolutional Layer.
static Scalar_t CrossEntropy(const Matrix_t &Y, const Matrix_t &output, const Matrix_t &weights)
Sigmoid transformation is implicitly applied, thus output should hold the linear activations of the l...
static void InitializeRNNTensors(GenLayer_t *)
Definition: Cpu.h:176
static void Im2col(Matrix_t &A, const Matrix_t &B, size_t imgHeight, size_t imgWidth, size_t fltHeight, size_t fltWidth, size_t strideRows, size_t strideCols, size_t zeroPaddingHeight, size_t zeroPaddingWidth)
Transform the matrix B in local view format, suitable for convolution, and store it in matrix A.
static void ReleaseConvDescriptors(TDescriptors *&)
Release CNN data/operator descriptors.
Definition: Cpu.h:153
static void InitializeRNNWorkspace(TWorkspace *&, TDescriptors *&, GenLayer_t *)
Definition: Cpu.h:166
static Tensor_t CreateTensor(size_t n, size_t c, size_t h, size_t w)
Definition: Cpu.h:108
static void InitializeGlorotNormal(Matrix_t &A)
Truncated normal initialization (Glorot, called also Xavier normal) The values are sample with a norm...
static void InitializeLSTMDescriptors(TDescriptors *&, GenLayer_t *)
Definition: Cpu.h:147
static void FreeRNNWorkspace(TWorkspace *&)
Definition: Cpu.h:172
static void GaussDerivative(Tensor_t &B, const Tensor_t &A)
static void BatchNormLayerForwardInference(int axis, const Tensor_t &x, Matrix_t &gamma, Matrix_t &beta, Tensor_t &y, const Matrix_t &runningMeans, const Matrix_t &runningVars, Scalar_t epsilon, const TensorDescriptor_t &)
During inference the inputs are not normalized using the batch mean but the previously computed at ru...
static void AdamUpdateFirstMom(Matrix_t &A, const Matrix_t &B, Scalar_t beta)
Definition: Arithmetic.hxx:357
static void DropoutForward(Matrix_t &A, Scalar_t p)
Definition: Cpu.h:453
static void Downsample(Tensor_t &A, Tensor_t &B, const Tensor_t &C, const PoolingDescriptors_t &, PoolingWorkspace_t &, size_t imgHeight, size_t imgWidth, size_t fltHeight, size_t fltWidth, size_t strideRows, size_t strideCols)
Downsample the matrix C to the matrix A, using max operation, such that the winning indices are store...
static void InitializeConvDescriptors(TDescriptors *&, ConvLayer_t *)
Definition: Cpu.h:142
static bool IsCudnn()
Definition: Cpu.h:131
static void ConstMult(Matrix_t &A, Scalar_t beta)
Multiply the constant beta to all the elements of matrix A and write the result into A.
Definition: Arithmetic.hxx:310
static void SigmoidDerivative(Tensor_t &B, const Tensor_t &A)
static void CopyDiffArch(Matrix_t &B, const AMatrix_t &A)
Definition: Cpu.h:820
static Scalar_t SoftmaxCrossEntropy(const Matrix_t &Y, const Matrix_t &output, const Matrix_t &weights)
Softmax transformation is implicitly applied, thus output should hold the linear activations of the l...
static void InitializeZero(Tensor_t &A)
static void FastTanhDerivative(Tensor_t &B, const Tensor_t &A)
static void InitializeActivationDescriptor(ActivationDescriptor_t &, EActivationFunction, double=0.0)
Definition: Cpu.h:150
TCpuMatrix< AReal > Matrix_t
Definition: Cpu.h:71
static void ReleaseDescriptor(ActivationDescriptor_t &)
Definition: Cpu.h:174
static void ReleaseRNNDescriptors(TDescriptors *&)
Definition: Cpu.h:156
static TMVA::Experimental::MemoryLayout GetTensorLayout()
Definition: Cpu.h:106
static void RotateWeights(Matrix_t &A, const Matrix_t &B, size_t filterDepth, size_t filterHeight, size_t filterWidth, size_t numFilters)
Rotates the matrix B, which is representing a weights, and stores them in the matrix A.
static void ReluDerivative(Tensor_t &B, const Tensor_t &A)
static void ReciprocalElementWise(Matrix_t &A)
Reciprocal each element of the matrix A and write the result into A.
Definition: Arithmetic.hxx:318
static void AdamUpdate(Matrix_t &A, const Matrix_t &M, const Matrix_t &V, Scalar_t alpha, Scalar_t eps)
Adam updates.
Definition: Arithmetic.hxx:343
Generic General Layer class.
Definition: GeneralLayer.h:51
TMatrixT.
Definition: TMatrixT.h:39
This is the base class for the ROOT Random number generators.
Definition: TRandom.h:27
double beta(double x, double y)
Calculates the beta function.
Double_t y[n]
Definition: legend1.C:17
Double_t x[n]
Definition: legend1.C:17
const Int_t n
Definition: legend1.C:16
static double B[]
static double A[]
static double C[]
double gamma(double x)
void Copy(void *source, void *dest)
double weightDecay(double error, ItWeight itWeight, ItWeight itWeightEnd, double factorWeightDecay, EnumRegularization eRegularization)
compute the weight decay for regularization (L1 or L2)
Definition: NeuralNet.icc:498
EActivationFunction
Enum that represents layer activation functions.
Definition: Functions.h:32
MemoryLayout
Memory layout type (copy from RTensor.hxx)
Definition: CudaTensor.h:47
create variable transformations
auto * l
Definition: textangle.C:4
REAL epsilon
Definition: triangle.c:618
static void output(int code)
Definition: gifencode.c:226