Logo ROOT   6.18/05
Reference Guide
Reference.h
Go to the documentation of this file.
1// @(#)root/tmva/tmva/dnn:$Id$
2// Author: Simon Pfreundschuh 20/06/16
3
4/*************************************************************************
5 * Copyright (C) 2016, Simon Pfreundschuh *
6 * All rights reserved. *
7 * *
8 * For the licensing terms see $ROOTSYS/LICENSE. *
9 * For the list of contributors see $ROOTSYS/README/CREDITS. *
10 *************************************************************************/
11
12///////////////////////////////////////////////////////////////////////
13// Declaration of the TReference architecture, which provides a //
14// reference implementation of the low-level interface for the DNN //
15// implementation based on ROOT's TMatrixT matrix type. //
16///////////////////////////////////////////////////////////////////////
17
18#ifndef TMVA_DNN_ARCHITECTURES_REFERENCE
19#define TMVA_DNN_ARCHITECTURES_REFERENCE
20
21#include "TMatrix.h"
22#include "TMVA/DNN/Functions.h"
26#include <vector>
27
28class TRandom;
29
30namespace TMVA
31{
32namespace DNN
33{
34
35/*! The reference architecture class.
36*
37* Class template that contains the reference implementation of the low-level
38* interface for the DNN implementation. The reference implementation uses the
39* TMatrixT class template to represent matrices.
40*
41* \tparam AReal The floating point type used to represent scalars.
42*/
43template<typename AReal>
45{
46private:
48public:
49
50 using Scalar_t = AReal;
52
53 //____________________________________________________________________________
54 //
55 // Propagation
56 //____________________________________________________________________________
57
58 /** @name Forward Propagation
59 * Low-level functions required for the forward propagation of activations
60 * through the network.
61 */
62 ///@{
63 /** Matrix-multiply \p input with the transpose of \pweights and
64 * write the results into \p output. */
66 const TMatrixT<Scalar_t> &input,
67 const TMatrixT<Scalar_t> &weights);
68 /** Add the vectors biases row-wise to the matrix output */
70 const TMatrixT<Scalar_t> &biases);
71 ///@}
72
73 /** @name Backward Propagation
74 * Low-level functions required for the forward propagation of activations
75 * through the network.
76 */
77 ///@{
78 /** Perform the complete backward propagation step. If the provided
79 * \p activationGradientsBackward matrix is not empty, compute the
80 * gradients of the objective function with respect to the activations
81 * of the previous layer (backward direction).
82 * Also compute the weight and the bias gradients. Modifies the values
83 * in \p df and thus produces only a valid result, if it is applied the
84 * first time after the corresponding forward propagation has been per-
85 * formed. */
86 static void Backward(TMatrixT<Scalar_t> & activationGradientsBackward,
87 TMatrixT<Scalar_t> & weightGradients,
88 TMatrixT<Scalar_t> & biasGradients,
90 const TMatrixT<Scalar_t> & activationGradients,
91 const TMatrixT<Scalar_t> & weights,
92 const TMatrixT<Scalar_t> & activationBackward);
93 /** Backpropagation step for a Recurrent Neural Network */
94 static Matrix_t & RecurrentLayerBackward(TMatrixT<Scalar_t> & state_gradients_backward, // BxH
95 TMatrixT<Scalar_t> & input_weight_gradients,
96 TMatrixT<Scalar_t> & state_weight_gradients,
97 TMatrixT<Scalar_t> & bias_gradients,
98 TMatrixT<Scalar_t> & df, //DxH
99 const TMatrixT<Scalar_t> & state, // BxH
100 const TMatrixT<Scalar_t> & weights_input, // HxD
101 const TMatrixT<Scalar_t> & weights_state, // HxH
102 const TMatrixT<Scalar_t> & input, // BxD
103 TMatrixT<Scalar_t> & input_gradient);
104 /** Adds a the elements in matrix B scaled by c to the elements in
105 * the matrix A. This is required for the weight update in the gradient
106 * descent step.*/
107 static void ScaleAdd(TMatrixT<Scalar_t> & A,
108 const TMatrixT<Scalar_t> & B,
109 Scalar_t beta = 1.0);
110
111 static void Copy(TMatrixT<Scalar_t> & A,
112 const TMatrixT<Scalar_t> & B);
113
114 // copy from another type of matrix
115 template<typename AMatrix_t>
116 static void CopyDiffArch(TMatrixT<Scalar_t> & A, const AMatrix_t & B);
117
118
119 /** Above functions extended to vectors */
120 static void ScaleAdd(std::vector<TMatrixT<Scalar_t>> & A,
121 const std::vector<TMatrixT<Scalar_t>> & B,
122 Scalar_t beta = 1.0);
123
124 static void Copy(std::vector<TMatrixT<Scalar_t>> & A, const std::vector<TMatrixT<Scalar_t>> & B);
125
126 // copy from another architecture
127 template<typename AMatrix_t>
128 static void CopyDiffArch(std::vector<TMatrixT<Scalar_t> > & A, const std::vector<AMatrix_t> & B);
129
130
131 ///@}
132
133 //____________________________________________________________________________
134 //
135 // Activation Functions
136 //____________________________________________________________________________
137
138 /** @name Activation Functions
139 * For each activation function, the low-level interface contains two routines.
140 * One that applies the acitvation function to a matrix and one that evaluate
141 * the derivatives of the activation function at the elements of a given matrix
142 * and writes the results into the result matrix.
143 */
144 ///@{
145 static void Identity(TMatrixT<AReal> & B);
147 const TMatrixT<AReal> & A);
148
149 static void Relu(TMatrixT<AReal> & B);
150 static void ReluDerivative(TMatrixT<AReal> & B,
151 const TMatrixT<AReal> & A);
152
153 static void Sigmoid(TMatrixT<AReal> & B);
154 static void SigmoidDerivative(TMatrixT<AReal> & B,
155 const TMatrixT<AReal> & A);
156
157 static void Tanh(TMatrixT<AReal> & B);
158 static void TanhDerivative(TMatrixT<AReal> & B,
159 const TMatrixT<AReal> & A);
160
161 static void SymmetricRelu(TMatrixT<AReal> & B);
163 const TMatrixT<AReal> & A);
164
165 static void SoftSign(TMatrixT<AReal> & B);
167 const TMatrixT<AReal> & A);
168
169 static void Gauss(TMatrixT<AReal> & B);
170 static void GaussDerivative(TMatrixT<AReal> & B,
171 const TMatrixT<AReal> & A);
172
173 ///@}
174
175 //____________________________________________________________________________
176 //
177 // Loss Functions
178 //____________________________________________________________________________
179
180 /** @name Loss Functions
181 * Loss functions compute a scalar value given the \p output of the network
182 * for a given training input and the expected network prediction \p Y that
183 * quantifies the quality of the prediction. For each function also a routing
184 * that computes the gradients (suffixed by Gradients) must be provided for
185 * the starting of the backpropagation algorithm.
186 */
187 ///@{
188
189 static AReal MeanSquaredError(const TMatrixT<AReal> &Y, const TMatrixT<AReal> &output,
190 const TMatrixT<AReal> &weights);
192 const TMatrixT<AReal> &weights);
193
194 /** Sigmoid transformation is implicitly applied, thus \p output should
195 * hold the linear activations of the last layer in the net. */
196 static AReal CrossEntropy(const TMatrixT<AReal> &Y, const TMatrixT<AReal> &output, const TMatrixT<AReal> &weights);
197
199 const TMatrixT<AReal> &weights);
200
201 /** Softmax transformation is implicitly applied, thus \p output should
202 * hold the linear activations of the last layer in the net. */
203 static AReal SoftmaxCrossEntropy(const TMatrixT<AReal> &Y, const TMatrixT<AReal> &output,
204 const TMatrixT<AReal> &weights);
206 const TMatrixT<AReal> &output, const TMatrixT<AReal> &weights);
207 ///@}
208
209 //____________________________________________________________________________
210 //
211 // Output Functions
212 //____________________________________________________________________________
213
214 /** @name Output Functions
215 * Output functions transform the activations \p output of the
216 * output layer in the network to a valid prediction \p YHat for
217 * the desired usage of the network, e.g. the identity function
218 * for regression or the sigmoid transformation for two-class
219 * classification.
220 */
221 ///@{
222 static void Sigmoid(TMatrixT<AReal> &YHat,
223 const TMatrixT<AReal> & );
224 static void Softmax(TMatrixT<AReal> &YHat,
225 const TMatrixT<AReal> & );
226 ///@}
227
228 //____________________________________________________________________________
229 //
230 // Regularization
231 //____________________________________________________________________________
232
233 /** @name Regularization
234 * For each regularization type two functions are required, one named
235 * <tt><Type>Regularization</tt> that evaluates the corresponding
236 * regularization functional for a given weight matrix and the
237 * <tt>Add<Type>RegularizationGradients</tt>, that adds the regularization
238 * component in the gradients to the provided matrix.
239 */
240 ///@{
241
242 static AReal L1Regularization(const TMatrixT<AReal> & W);
244 const TMatrixT<AReal> & W,
245 AReal weightDecay);
246
247 static AReal L2Regularization(const TMatrixT<AReal> & W);
249 const TMatrixT<AReal> & W,
250 AReal weightDecay);
251 ///@}
252
253 //____________________________________________________________________________
254 //
255 // Initialization
256 //____________________________________________________________________________
257
258 /** @name Initialization
259 * For each initialization method, one function in the low-level interface
260 * is provided. The naming scheme is <p>Initialize<Type></p> for a given
261 * initialization method Type.
262 */
263 ///@{
264
265 static void InitializeGauss(TMatrixT<AReal> & A);
266
267 static void InitializeUniform(TMatrixT<AReal> & A);
268
269 static void InitializeIdentity(TMatrixT<AReal> & A);
270
271 static void InitializeZero(TMatrixT<AReal> & A);
272
274
276
277 // return static instance of random generator used for initialization
278 // if generator does not exist it is created the first time with a random seed (e.g. seed = 0)
279 static TRandom & GetRandomGenerator();
280 // set random seed for the static geenrator
281 // if the static geneerator does not exists it is created
282 static void SetRandomSeed(size_t seed);
283
284
285 ///@}
286
287 //____________________________________________________________________________
288 //
289 // Dropout
290 //____________________________________________________________________________
291
292 /** @name Dropout
293 */
294 ///@{
295
296 /** Apply dropout with activation probability \p p to the given
297 * matrix \p A and scale the result by reciprocal of \p p. */
298 static void Dropout(TMatrixT<AReal> & A, AReal dropoutProbability);
299
300 ///@}
301
302
303 //____________________________________________________________________________
304 //
305 // Convolutional Layer Propagation
306 //____________________________________________________________________________
307
308 /** @name Forward Propagation in Convolutional Layer
309 */
310 ///@{
311
312 /** Transform the matrix \p B in local view format, suitable for
313 * convolution, and store it in matrix \p A. */
314 static void Im2col(TMatrixT<AReal> &A,
315 const TMatrixT<AReal> &B,
316 size_t imgHeight,
317 size_t imgWidth,
318 size_t fltHeight,
319 size_t fltWidth,
320 size_t strideRows,
321 size_t strideCols,
322 size_t zeroPaddingHeight,
323 size_t zeroPaddingWidth);
324
325 static void Im2colIndices(std::vector<int> &, const TMatrixT<AReal> &, size_t, size_t, size_t, size_t ,
326 size_t , size_t , size_t , size_t ,size_t ) {
327 Fatal("Im2ColIndices","This function is not implemented for ref architectures");
328 }
329 static void Im2colFast(TMatrixT<AReal> &, const TMatrixT<AReal> &, const std::vector<int> & ) {
330 Fatal("Im2ColFast","This function is not implemented for ref architectures");
331 }
332
333 /** Rotates the matrix \p B, which is representing a weights,
334 * and stores them in the matrix \p A. */
335 static void RotateWeights(TMatrixT<AReal> &A, const TMatrixT<AReal> &B, size_t filterDepth, size_t filterHeight,
336 size_t filterWidth, size_t numFilters);
337
338 /** Add the biases in the Convolutional Layer. */
339 static void AddConvBiases(TMatrixT<AReal> &output, const TMatrixT<AReal> &biases);
340 ///@}
341
342 /** Dummy placeholder - preparation is currently only required for the CUDA architecture. */
343 static void PrepareInternals(std::vector<TMatrixT<AReal>> &) {}
344
345 /** Forward propagation in the Convolutional layer */
346 static void ConvLayerForward(std::vector<TMatrixT<AReal>> & /*output*/,
347 std::vector<TMatrixT<AReal>> & /*derivatives*/,
348 const std::vector<TMatrixT<AReal>> & /*input*/,
349 const TMatrixT<AReal> & /*weights*/, const TMatrixT<AReal> & /*biases*/,
350 const DNN::CNN::TConvParams & /*params*/, EActivationFunction /*activFunc*/,
351 std::vector<TMatrixT<AReal>> & /*inputPrime*/) {
352 Fatal("ConvLayerForward","This function is not implemented for ref architectures");
353 }
354
355
356 /** @name Backward Propagation in Convolutional Layer
357 */
358 ///@{
359
360 /** Perform the complete backward propagation step in a Convolutional Layer.
361 * If the provided \p activationGradientsBackward matrix is not empty, compute the
362 * gradients of the objective function with respect to the activations
363 * of the previous layer (backward direction).
364 * Also compute the weight and the bias gradients. Modifies the values
365 * in \p df and thus produces only a valid result, if it is applied the
366 * first time after the corresponding forward propagation has been per-
367 * formed. */
368 static void ConvLayerBackward(std::vector<TMatrixT<AReal>> &,
370 std::vector<TMatrixT<AReal>> &,
371 const std::vector<TMatrixT<AReal>> &,
372 const TMatrixT<AReal> &, const std::vector<TMatrixT<AReal>> &,
373 size_t , size_t , size_t , size_t , size_t,
374 size_t , size_t , size_t , size_t , size_t) {
375 Fatal("ConvLayerBackward","This function is not implemented for ref architectures");
376
377 }
378
379#ifdef HAVE_CNN_REFERENCE
380 /** Utility function for calculating the activation gradients of the layer
381 * before the convolutional layer. */
382 static void CalculateConvActivationGradients(std::vector<TMatrixT<AReal>> &activationGradientsBackward,
383 const std::vector<TMatrixT<AReal>> &df, const TMatrixT<AReal> &weights,
384 size_t batchSize, size_t inputHeight, size_t inputWidth, size_t depth,
385 size_t height, size_t width, size_t filterDepth, size_t filterHeight,
386 size_t filterWidth);
387
388 /** Utility function for calculating the weight gradients of the convolutional
389 * layer. */
390 static void CalculateConvWeightGradients(TMatrixT<AReal> &weightGradients, const std::vector<TMatrixT<AReal>> &df,
391 const std::vector<TMatrixT<AReal>> &activationBackward, size_t batchSize,
392 size_t inputHeight, size_t inputWidth, size_t depth, size_t height,
393 size_t width, size_t filterDepth, size_t filterHeight, size_t filterWidth,
394 size_t nLocalViews);
395
396 /** Utility function for calculating the bias gradients of the convolutional
397 * layer. */
398 static void CalculateConvBiasGradients(TMatrixT<AReal> &biasGradients, const std::vector<TMatrixT<AReal>> &df,
399 size_t batchSize, size_t depth, size_t nLocalViews);
400 ///@}
401
402#endif
403
404 //____________________________________________________________________________
405 //
406 // Max Pooling Layer Propagation
407 //____________________________________________________________________________
408 /** @name Forward Propagation in Max Pooling Layer
409 */
410 ///@{
411
412 /** Downsample the matrix \p C to the matrix \p A, using max
413 * operation, such that the winning indices are stored in matrix
414 * \p B. */
415 static void Downsample(TMatrixT<AReal> &A, TMatrixT<AReal> &B, const TMatrixT<AReal> &C, size_t imgHeight,
416 size_t imgWidth, size_t fltHeight, size_t fltWidth, size_t strideRows, size_t strideCols);
417
418 ///@}
419
420 /** @name Backward Propagation in Max Pooling Layer
421 */
422 ///@{
423
424 /** Perform the complete backward propagation step in a Max Pooling Layer. Based on the
425 * winning idices stored in the index matrix, it just forwards the actiovation
426 * gradients to the previous layer. */
427 static void MaxPoolLayerBackward(TMatrixT<AReal> &activationGradientsBackward,
428 const TMatrixT<AReal> &activationGradients,
429 const TMatrixT<AReal> &indexMatrix,
430 size_t imgHeight,
431 size_t imgWidth,
432 size_t fltHeight,
433 size_t fltWidth,
434 size_t strideRows,
435 size_t strideCol,
436 size_t nLocalViews);
437 ///@}
438 //____________________________________________________________________________
439 //
440 // Reshape Layer Propagation
441 //____________________________________________________________________________
442 /** @name Forward and Backward Propagation in Reshape Layer
443 */
444 ///@{
445
446 /** Transform the matrix \p B to a matrix with different dimensions \p A */
447 static void Reshape(TMatrixT<AReal> &A, const TMatrixT<AReal> &B);
448
449 /** Flattens the tensor \p B, such that each matrix, is stretched in one row, resulting with a matrix \p A. */
450 static void Flatten(TMatrixT<AReal> &A, const std::vector<TMatrixT<AReal>> &B, size_t size, size_t nRows,
451 size_t nCols);
452
453 /** Transforms each row of \p B to a matrix and stores it in the tensor \p B. */
454 static void Deflatten(std::vector<TMatrixT<AReal>> &A, const TMatrixT<Scalar_t> &B, size_t index, size_t nRows,
455 size_t nCols);
456 /** Rearrage data accoring to time fill B x T x D out with T x B x D matrix in*/
457 static void Rearrange(std::vector<TMatrixT<AReal>> &out, const std::vector<TMatrixT<AReal>> &in);
458
459 ///@}
460
461 //____________________________________________________________________________
462 //
463 // Additional Arithmetic Functions
464 //____________________________________________________________________________
465
466 /** Sum columns of (m x n) matrixx \p A and write the results into the first
467 * m elements in \p A.
468 */
469 static void SumColumns(TMatrixT<AReal> &B, const TMatrixT<AReal> &A);
470
471 /** In-place Hadamard (element-wise) product of matrices \p A and \p B
472 * with the result being written into \p A.
473 */
474 static void Hadamard(TMatrixT<AReal> &A, const TMatrixT<AReal> &B);
475
476 /** Add the constant \p beta to all the elements of matrix \p A and write the
477 * result into \p A.
478 */
479 static void ConstAdd(TMatrixT<AReal> &A, AReal beta);
480
481 /** Multiply the constant \p beta to all the elements of matrix \p A and write the
482 * result into \p A.
483 */
484 static void ConstMult(TMatrixT<AReal> &A, AReal beta);
485
486 /** Reciprocal each element of the matrix \p A and write the result into
487 * \p A
488 */
490
491 /** Square each element of the matrix \p A and write the result into
492 * \p A
493 */
494 static void SquareElementWise(TMatrixT<AReal> &A);
495
496 /** Square root each element of the matrix \p A and write the result into
497 * \p A
498 */
499 static void SqrtElementWise(TMatrixT<AReal> &A);
500
501 // optimizer update functions
502
503 /// Update functions for ADAM optimizer
504 static void AdamUpdate(TMatrixT<AReal> & A, const TMatrixT<AReal> & M, const TMatrixT<AReal> & V, AReal alpha, AReal eps);
505 static void AdamUpdateFirstMom(TMatrixT<AReal> & A, const TMatrixT<AReal> & B, AReal beta);
506 static void AdamUpdateSecondMom(TMatrixT<AReal> & A, const TMatrixT<AReal> & B, AReal beta);
507
508
509
510 //____________________________________________________________________________
511 //
512 // AutoEncoder Propagation
513 //____________________________________________________________________________
514
515 // Add Biases to the output
516 static void AddBiases(TMatrixT<AReal> &A,
517 const TMatrixT<AReal> &biases);
518
519 // Updating parameters after every backward pass. Weights and biases are
520 // updated.
521 static void
523 TMatrixT<AReal> &z, TMatrixT<AReal> &fVBiases,
524 TMatrixT<AReal> &fHBiases, TMatrixT<AReal> &fWeights,
525 TMatrixT<AReal> &VBiasError, TMatrixT<AReal> &HBiasError,
526 AReal learningRate, size_t fBatchSize);
527
528 // Softmax functions redifined
529 static void SoftmaxAE(TMatrixT<AReal> & A);
530
531
532 // Corrupt the input values randomly on corruption Level.
533 //Basically inputs are masked currently.
534 static void CorruptInput(TMatrixT<AReal> & input,
535 TMatrixT<AReal> & corruptedInput,
536 AReal corruptionLevel);
537
538 //Encodes the input Values in the compressed form.
539 static void EncodeInput(TMatrixT<AReal> &input,
540 TMatrixT<AReal> &compressedInput,
541 TMatrixT<AReal> &Weights);
542
543 // reconstructs the input. The reconstructed Input has same dimensions as that
544 // of the input.
545 static void ReconstructInput(TMatrixT<AReal> & compressedInput,
546 TMatrixT<AReal> & reconstructedInput,
547 TMatrixT<AReal> &fWeights);
548
549
550 static void ForwardLogReg(TMatrixT<AReal> &input,
552 TMatrixT<AReal> &fWeights);
553
554 static void UpdateParamsLogReg(TMatrixT<AReal> &input,
556 TMatrixT<AReal> &difference,
558 TMatrixT<AReal> &fWeights,
559 TMatrixT<AReal> &fBiases,
560 AReal learningRate,
561 size_t fBatchSize);
562
563};
564
565
566// implement the templated member functions
567template <typename AReal>
568template <typename AMatrix_t>
570{
571 TMatrixT<AReal> tmp = B;
572 A = tmp;
573}
574
575template <typename AReal>
576template <typename AMatrix_t>
577void TReference<AReal>::CopyDiffArch(std::vector<TMatrixT<AReal>> &A, const std::vector<AMatrix_t> &B)
578{
579 for (size_t i = 0; i < A.size(); ++i) {
580 CopyDiffArch(A[i], B[i]);
581 }
582}
583
584
585
586} // namespace DNN
587} // namespace TMVA
588
589#endif
include TDocParser_001 C image html pict1_TDocParser_001 png width
Definition: TDocParser.cxx:121
void Fatal(const char *location, const char *msgfmt,...)
The reference architecture class.
Definition: Reference.h:45
static void AdamUpdate(TMatrixT< AReal > &A, const TMatrixT< AReal > &M, const TMatrixT< AReal > &V, AReal alpha, AReal eps)
Update functions for ADAM optimizer.
Definition: Arithmetic.cxx:103
static void AdamUpdateSecondMom(TMatrixT< AReal > &A, const TMatrixT< AReal > &B, AReal beta)
Definition: Arithmetic.cxx:129
static void SymmetricRelu(TMatrixT< AReal > &B)
static void InitializeIdentity(TMatrixT< AReal > &A)
static void MultiplyTranspose(TMatrixT< Scalar_t > &output, const TMatrixT< Scalar_t > &input, const TMatrixT< Scalar_t > &weights)
Matrix-multiply input with the transpose of \pweights and write the results into output.
Definition: Propagation.cxx:23
static void InitializeGlorotNormal(TMatrixT< AReal > &A)
Truncated normal initialization (Glorot, called also Xavier normal) The values are sample with a norm...
static void AdamUpdateFirstMom(TMatrixT< AReal > &A, const TMatrixT< AReal > &B, AReal beta)
Definition: Arithmetic.cxx:117
static void Flatten(TMatrixT< AReal > &A, const std::vector< TMatrixT< AReal > > &B, size_t size, size_t nRows, size_t nCols)
Flattens the tensor B, such that each matrix, is stretched in one row, resulting with a matrix A.
static void Relu(TMatrixT< AReal > &B)
static void MaxPoolLayerBackward(TMatrixT< AReal > &activationGradientsBackward, const TMatrixT< AReal > &activationGradients, const TMatrixT< AReal > &indexMatrix, size_t imgHeight, size_t imgWidth, size_t fltHeight, size_t fltWidth, size_t strideRows, size_t strideCol, size_t nLocalViews)
Perform the complete backward propagation step in a Max Pooling Layer.
static void GaussDerivative(TMatrixT< AReal > &B, const TMatrixT< AReal > &A)
static void SoftmaxAE(TMatrixT< AReal > &A)
static void AddL1RegularizationGradients(TMatrixT< AReal > &A, const TMatrixT< AReal > &W, AReal weightDecay)
static void AddRowWise(TMatrixT< Scalar_t > &output, const TMatrixT< Scalar_t > &biases)
Add the vectors biases row-wise to the matrix output.
Definition: Propagation.cxx:30
static void CrossEntropyGradients(TMatrixT< AReal > &dY, const TMatrixT< AReal > &Y, const TMatrixT< AReal > &output, const TMatrixT< AReal > &weights)
static void Im2colFast(TMatrixT< AReal > &, const TMatrixT< AReal > &, const std::vector< int > &)
Definition: Reference.h:329
static void Downsample(TMatrixT< AReal > &A, TMatrixT< AReal > &B, const TMatrixT< AReal > &C, size_t imgHeight, size_t imgWidth, size_t fltHeight, size_t fltWidth, size_t strideRows, size_t strideCols)
Downsample the matrix C to the matrix A, using max operation, such that the winning indices are store...
static void EncodeInput(TMatrixT< AReal > &input, TMatrixT< AReal > &compressedInput, TMatrixT< AReal > &Weights)
static void TanhDerivative(TMatrixT< AReal > &B, const TMatrixT< AReal > &A)
static void ReconstructInput(TMatrixT< AReal > &compressedInput, TMatrixT< AReal > &reconstructedInput, TMatrixT< AReal > &fWeights)
static void Dropout(TMatrixT< AReal > &A, AReal dropoutProbability)
Apply dropout with activation probability p to the given matrix A and scale the result by reciprocal ...
Definition: Dropout.cxx:29
static AReal L2Regularization(const TMatrixT< AReal > &W)
static void Im2colIndices(std::vector< int > &, const TMatrixT< AReal > &, size_t, size_t, size_t, size_t, size_t, size_t, size_t, size_t, size_t)
Definition: Reference.h:325
static void IdentityDerivative(TMatrixT< AReal > &B, const TMatrixT< AReal > &A)
static AReal SoftmaxCrossEntropy(const TMatrixT< AReal > &Y, const TMatrixT< AReal > &output, const TMatrixT< AReal > &weights)
Softmax transformation is implicitly applied, thus output should hold the linear activations of the l...
static void ConstAdd(TMatrixT< AReal > &A, AReal beta)
Add the constant beta to all the elements of matrix A and write the result into A.
Definition: Arithmetic.cxx:48
static void Gauss(TMatrixT< AReal > &B)
static void SetRandomSeed(size_t seed)
static void SigmoidDerivative(TMatrixT< AReal > &B, const TMatrixT< AReal > &A)
static void SoftSignDerivative(TMatrixT< AReal > &B, const TMatrixT< AReal > &A)
static AReal CrossEntropy(const TMatrixT< AReal > &Y, const TMatrixT< AReal > &output, const TMatrixT< AReal > &weights)
Sigmoid transformation is implicitly applied, thus output should hold the linear activations of the l...
static void InitializeZero(TMatrixT< AReal > &A)
static void Tanh(TMatrixT< AReal > &B)
static void SoftSign(TMatrixT< AReal > &B)
static void Softmax(TMatrixT< AReal > &YHat, const TMatrixT< AReal > &)
static void ReciprocalElementWise(TMatrixT< AReal > &A)
Reciprocal each element of the matrix A and write the result into A.
Definition: Arithmetic.cxx:70
static void Backward(TMatrixT< Scalar_t > &activationGradientsBackward, TMatrixT< Scalar_t > &weightGradients, TMatrixT< Scalar_t > &biasGradients, TMatrixT< Scalar_t > &df, const TMatrixT< Scalar_t > &activationGradients, const TMatrixT< Scalar_t > &weights, const TMatrixT< Scalar_t > &activationBackward)
Perform the complete backward propagation step.
Definition: Propagation.cxx:40
static void SquareElementWise(TMatrixT< AReal > &A)
Square each element of the matrix A and write the result into A.
Definition: Arithmetic.cxx:81
static void MeanSquaredErrorGradients(TMatrixT< AReal > &dY, const TMatrixT< AReal > &Y, const TMatrixT< AReal > &output, const TMatrixT< AReal > &weights)
static TRandom * fgRandomGen
Definition: Reference.h:47
static void RotateWeights(TMatrixT< AReal > &A, const TMatrixT< AReal > &B, size_t filterDepth, size_t filterHeight, size_t filterWidth, size_t numFilters)
Rotates the matrix B, which is representing a weights, and stores them in the matrix A.
static void Rearrange(std::vector< TMatrixT< AReal > > &out, const std::vector< TMatrixT< AReal > > &in)
Rearrage data accoring to time fill B x T x D out with T x B x D matrix in.
static void Deflatten(std::vector< TMatrixT< AReal > > &A, const TMatrixT< Scalar_t > &B, size_t index, size_t nRows, size_t nCols)
Transforms each row of B to a matrix and stores it in the tensor B.
static void Im2col(TMatrixT< AReal > &A, const TMatrixT< AReal > &B, size_t imgHeight, size_t imgWidth, size_t fltHeight, size_t fltWidth, size_t strideRows, size_t strideCols, size_t zeroPaddingHeight, size_t zeroPaddingWidth)
Transform the matrix B in local view format, suitable for convolution, and store it in matrix A.
static void Hadamard(TMatrixT< AReal > &A, const TMatrixT< AReal > &B)
In-place Hadamard (element-wise) product of matrices A and B with the result being written into A.
Definition: Arithmetic.cxx:37
static void UpdateParams(TMatrixT< AReal > &x, TMatrixT< AReal > &tildeX, TMatrixT< AReal > &y, TMatrixT< AReal > &z, TMatrixT< AReal > &fVBiases, TMatrixT< AReal > &fHBiases, TMatrixT< AReal > &fWeights, TMatrixT< AReal > &VBiasError, TMatrixT< AReal > &HBiasError, AReal learningRate, size_t fBatchSize)
static void ScaleAdd(TMatrixT< Scalar_t > &A, const TMatrixT< Scalar_t > &B, Scalar_t beta=1.0)
Adds a the elements in matrix B scaled by c to the elements in the matrix A.
Definition: Propagation.cxx:76
static void Sigmoid(TMatrixT< AReal > &B)
static void CopyDiffArch(TMatrixT< Scalar_t > &A, const AMatrix_t &B)
Definition: Reference.h:569
static void SymmetricReluDerivative(TMatrixT< AReal > &B, const TMatrixT< AReal > &A)
static void Identity(TMatrixT< AReal > &B)
static void UpdateParamsLogReg(TMatrixT< AReal > &input, TMatrixT< AReal > &output, TMatrixT< AReal > &difference, TMatrixT< AReal > &p, TMatrixT< AReal > &fWeights, TMatrixT< AReal > &fBiases, AReal learningRate, size_t fBatchSize)
static void ConvLayerBackward(std::vector< TMatrixT< AReal > > &, TMatrixT< AReal > &, TMatrixT< AReal > &, std::vector< TMatrixT< AReal > > &, const std::vector< TMatrixT< AReal > > &, const TMatrixT< AReal > &, const std::vector< TMatrixT< AReal > > &, size_t, size_t, size_t, size_t, size_t, size_t, size_t, size_t, size_t, size_t)
Perform the complete backward propagation step in a Convolutional Layer.
Definition: Reference.h:368
static void SoftmaxCrossEntropyGradients(TMatrixT< AReal > &dY, const TMatrixT< AReal > &Y, const TMatrixT< AReal > &output, const TMatrixT< AReal > &weights)
static AReal L1Regularization(const TMatrixT< AReal > &W)
static void AddConvBiases(TMatrixT< AReal > &output, const TMatrixT< AReal > &biases)
Add the biases in the Convolutional Layer.
static void ConvLayerForward(std::vector< TMatrixT< AReal > > &, std::vector< TMatrixT< AReal > > &, const std::vector< TMatrixT< AReal > > &, const TMatrixT< AReal > &, const TMatrixT< AReal > &, const DNN::CNN::TConvParams &, EActivationFunction, std::vector< TMatrixT< AReal > > &)
Forward propagation in the Convolutional layer.
Definition: Reference.h:346
static void AddL2RegularizationGradients(TMatrixT< AReal > &A, const TMatrixT< AReal > &W, AReal weightDecay)
static void Copy(TMatrixT< Scalar_t > &A, const TMatrixT< Scalar_t > &B)
Definition: Propagation.cxx:86
static void CorruptInput(TMatrixT< AReal > &input, TMatrixT< AReal > &corruptedInput, AReal corruptionLevel)
static void InitializeGauss(TMatrixT< AReal > &A)
static AReal MeanSquaredError(const TMatrixT< AReal > &Y, const TMatrixT< AReal > &output, const TMatrixT< AReal > &weights)
static void SqrtElementWise(TMatrixT< AReal > &A)
Square root each element of the matrix A and write the result into A.
Definition: Arithmetic.cxx:92
static void SumColumns(TMatrixT< AReal > &B, const TMatrixT< AReal > &A)
Sum columns of (m x n) matrixx A and write the results into the first m elements in A.
Definition: Arithmetic.cxx:25
static void ReluDerivative(TMatrixT< AReal > &B, const TMatrixT< AReal > &A)
static void ForwardLogReg(TMatrixT< AReal > &input, TMatrixT< AReal > &p, TMatrixT< AReal > &fWeights)
static void ConstMult(TMatrixT< AReal > &A, AReal beta)
Multiply the constant beta to all the elements of matrix A and write the result into A.
Definition: Arithmetic.cxx:59
static void InitializeGlorotUniform(TMatrixT< AReal > &A)
Sample from a uniform distribution in range [ -lim,+lim] where lim = sqrt(6/N_in+N_out).
static void Reshape(TMatrixT< AReal > &A, const TMatrixT< AReal > &B)
Transform the matrix B to a matrix with different dimensions A.
static void AddBiases(TMatrixT< AReal > &A, const TMatrixT< AReal > &biases)
static TRandom & GetRandomGenerator()
static void PrepareInternals(std::vector< TMatrixT< AReal > > &)
Dummy placeholder - preparation is currently only required for the CUDA architecture.
Definition: Reference.h:343
static Matrix_t & RecurrentLayerBackward(TMatrixT< Scalar_t > &state_gradients_backward, TMatrixT< Scalar_t > &input_weight_gradients, TMatrixT< Scalar_t > &state_weight_gradients, TMatrixT< Scalar_t > &bias_gradients, TMatrixT< Scalar_t > &df, const TMatrixT< Scalar_t > &state, const TMatrixT< Scalar_t > &weights_input, const TMatrixT< Scalar_t > &weights_state, const TMatrixT< Scalar_t > &input, TMatrixT< Scalar_t > &input_gradient)
Backpropagation step for a Recurrent Neural Network.
static void InitializeUniform(TMatrixT< AReal > &A)
This is the base class for the ROOT Random number generators.
Definition: TRandom.h:27
double beta(double x, double y)
Calculates the beta function.
Double_t y[n]
Definition: legend1.C:17
Double_t x[n]
Definition: legend1.C:17
static double B[]
static double A[]
static double C[]
double weightDecay(double error, ItWeight itWeight, ItWeight itWeightEnd, double factorWeightDecay, EnumRegularization eRegularization)
compute the weight decay for regularization (L1 or L2)
Definition: NeuralNet.icc:496
EActivationFunction
Enum that represents layer activation functions.
Definition: Functions.h:32
create variable transformations
static void output(int code)
Definition: gifencode.c:226