Logo ROOT  
Reference Guide
 
Loading...
Searching...
No Matches
Reference.h
Go to the documentation of this file.
1// @(#)root/tmva/tmva/dnn:$Id$
2// Author: Simon Pfreundschuh 20/06/16
3
4/*************************************************************************
5 * Copyright (C) 2016, Simon Pfreundschuh *
6 * All rights reserved. *
7 * *
8 * For the licensing terms see $ROOTSYS/LICENSE. *
9 * For the list of contributors see $ROOTSYS/README/CREDITS. *
10 *************************************************************************/
11
12///////////////////////////////////////////////////////////////////////
13// Declaration of the TReference architecture, which provides a //
14// reference implementation of the low-level interface for the DNN //
15// implementation based on ROOT's TMatrixT matrix type. //
16///////////////////////////////////////////////////////////////////////
17
18#ifndef TMVA_DNN_ARCHITECTURES_REFERENCE
19#define TMVA_DNN_ARCHITECTURES_REFERENCE
20
21#include "TMatrix.h"
22//#include "TMVA/RTensor.hxx"
23#include "TMVA/DNN/Functions.h"
27#include <vector>
28
29
30class TRandom;
31
32namespace TMVA
33{
34namespace DNN
35{
36// struct TDescriptors {
37// };
38// struct TWorkspace {
39// };
40
41/*! The reference architecture class.
42*
43* Class template that contains the reference implementation of the low-level
44* interface for the DNN implementation. The reference implementation uses the
45* TMatrixT class template to represent matrices.
46*
47* \tparam AReal The floating point type used to represent scalars.
48*/
49
50
51template<typename AReal>
53{
54private:
56public:
57 using Scalar_t = AReal;
60 //using Tensor_t = TMVA::Experimental::RTensor<AReal>;
61
62 //____________________________________________________________________________
63 //
64 // Propagation
65 //____________________________________________________________________________
66
67 /** @name Forward Propagation
68 * Low-level functions required for the forward propagation of activations
69 * through the network.
70 */
71 ///@{
72 /** Matrix-multiply \p input with the transpose of \p weights and
73 * write the results into \p output. */
74
77 const TMatrixT<Scalar_t> &weights);
78
79
80 /** Add the vectors biases row-wise to the matrix output */
82 const TMatrixT<Scalar_t> &biases);
83 ///@}
84
85 /** @name Backward Propagation
86 * Low-level functions required for the forward propagation of activations
87 * through the network.
88 */
89 ///@{
90 /** Perform the complete backward propagation step. If the provided
91 * \p activationGradientsBackward matrix is not empty, compute the
92 * gradients of the objective function with respect to the activations
93 * of the previous layer (backward direction).
94 * Also compute the weight and the bias gradients. Modifies the values
95 * in \p df and thus produces only a valid result, if it is applied the
96 * first time after the corresponding forward propagation has been per-
97 * formed. */
98 static void Backward(TMatrixT<Scalar_t> & activationGradientsBackward,
99 TMatrixT<Scalar_t> & weightGradients,
100 TMatrixT<Scalar_t> & biasGradients,
102 const TMatrixT<Scalar_t> & activationGradients,
103 const TMatrixT<Scalar_t> & weights,
104 const TMatrixT<Scalar_t> & activationBackward);
105 /** Backpropagation step for a Recurrent Neural Network */
106 static Matrix_t & RecurrentLayerBackward(TMatrixT<Scalar_t> & state_gradients_backward, // BxH
107 TMatrixT<Scalar_t> & input_weight_gradients,
108 TMatrixT<Scalar_t> & state_weight_gradients,
109 TMatrixT<Scalar_t> & bias_gradients,
110 TMatrixT<Scalar_t> & df, //DxH
111 const TMatrixT<Scalar_t> & state, // BxH
112 const TMatrixT<Scalar_t> & weights_input, // HxD
113 const TMatrixT<Scalar_t> & weights_state, // HxH
114 const TMatrixT<Scalar_t> & input, // BxD
115 TMatrixT<Scalar_t> & input_gradient);
116
117
118
119 /** Backward pass for LSTM Network */
120 static Matrix_t & LSTMLayerBackward(TMatrixT<Scalar_t> & state_gradients_backward,
121 TMatrixT<Scalar_t> & cell_gradients_backward,
122 TMatrixT<Scalar_t> & input_weight_gradients,
123 TMatrixT<Scalar_t> & forget_weight_gradients,
124 TMatrixT<Scalar_t> & candidate_weight_gradients,
125 TMatrixT<Scalar_t> & output_weight_gradients,
126 TMatrixT<Scalar_t> & input_state_weight_gradients,
127 TMatrixT<Scalar_t> & forget_state_weight_gradients,
128 TMatrixT<Scalar_t> & candidate_state_weight_gradients,
129 TMatrixT<Scalar_t> & output_state_weight_gradients,
130 TMatrixT<Scalar_t> & input_bias_gradients,
131 TMatrixT<Scalar_t> & forget_bias_gradients,
132 TMatrixT<Scalar_t> & candidate_bias_gradients,
133 TMatrixT<Scalar_t> & output_bias_gradients,
137 TMatrixT<Scalar_t> & dout,
138 const TMatrixT<Scalar_t> & precStateActivations,
139 const TMatrixT<Scalar_t> & precCellActivations,
140 const TMatrixT<Scalar_t> & fInput,
141 const TMatrixT<Scalar_t> & fForget,
142 const TMatrixT<Scalar_t> & fCandidate,
143 const TMatrixT<Scalar_t> & fOutput,
144 const TMatrixT<Scalar_t> & weights_input,
145 const TMatrixT<Scalar_t> & weights_forget,
146 const TMatrixT<Scalar_t> & weights_candidate,
147 const TMatrixT<Scalar_t> & weights_output,
148 const TMatrixT<Scalar_t> & weights_input_state,
149 const TMatrixT<Scalar_t> & weights_forget_state,
150 const TMatrixT<Scalar_t> & weights_candidate_state,
151 const TMatrixT<Scalar_t> & weights_output_state,
153 TMatrixT<Scalar_t> & input_gradient,
154 TMatrixT<Scalar_t> & cell_gradient,
155 TMatrixT<Scalar_t> & cell_tanh);
156
157
158 /** Backward pass for GRU Network */
159 static Matrix_t & GRULayerBackward(TMatrixT<Scalar_t> & state_gradients_backward,
160 TMatrixT<Scalar_t> & reset_weight_gradients,
161 TMatrixT<Scalar_t> & update_weight_gradients,
162 TMatrixT<Scalar_t> & candidate_weight_gradients,
163 TMatrixT<Scalar_t> & reset_state_weight_gradients,
164 TMatrixT<Scalar_t> & update_state_weight_gradients,
165 TMatrixT<Scalar_t> & candidate_state_weight_gradients,
166 TMatrixT<Scalar_t> & reset_bias_gradients,
167 TMatrixT<Scalar_t> & update_bias_gradients,
168 TMatrixT<Scalar_t> & candidate_bias_gradients,
172 const TMatrixT<Scalar_t> & precStateActivations,
173 const TMatrixT<Scalar_t> & fReset,
174 const TMatrixT<Scalar_t> & fUpdate,
175 const TMatrixT<Scalar_t> & fCandidate,
176 const TMatrixT<Scalar_t> & weights_reset,
177 const TMatrixT<Scalar_t> & weights_update,
178 const TMatrixT<Scalar_t> & weights_candidate,
179 const TMatrixT<Scalar_t> & weights_reset_state,
180 const TMatrixT<Scalar_t> & weights_update_state,
181 const TMatrixT<Scalar_t> & weights_candidate_state,
183 TMatrixT<Scalar_t> & input_gradient);
184
185 /** Adds a the elements in matrix B scaled by c to the elements in
186 * the matrix A. This is required for the weight update in the gradient
187 * descent step.*/
188 static void ScaleAdd(TMatrixT<Scalar_t> & A,
189 const TMatrixT<Scalar_t> & B,
190 Scalar_t beta = 1.0);
191
192 static void Copy(TMatrixT<Scalar_t> & A,
193 const TMatrixT<Scalar_t> & B);
194
195 // copy from another type of matrix
196 template<typename AMatrix_t>
197 static void CopyDiffArch(TMatrixT<Scalar_t> & A, const AMatrix_t & B);
198
199
200 /** Above functions extended to vectors */
201 static void ScaleAdd(std::vector<TMatrixT<Scalar_t>> & A,
202 const std::vector<TMatrixT<Scalar_t>> & B,
203 Scalar_t beta = 1.0);
204
205 static void Copy(std::vector<TMatrixT<Scalar_t>> & A, const std::vector<TMatrixT<Scalar_t>> & B);
206
207 // copy from another architecture
208 template<typename AMatrix_t>
209 static void CopyDiffArch(std::vector<TMatrixT<Scalar_t> > & A, const std::vector<AMatrix_t> & B);
210
211
212 ///@}
213
214 //____________________________________________________________________________
215 //
216 // Activation Functions
217 //____________________________________________________________________________
218
219 /** @name Activation Functions
220 * For each activation function, the low-level interface contains two routines.
221 * One that applies the activation function to a matrix and one that evaluate
222 * the derivatives of the activation function at the elements of a given matrix
223 * and writes the results into the result matrix.
224 */
225 ///@{
226 static void Identity(TMatrixT<AReal> & B);
227 static void IdentityDerivative(TMatrixT<AReal> & B,
228 const TMatrixT<AReal> & A);
229
230 static void Relu(TMatrixT<AReal> & B);
231 static void ReluDerivative(TMatrixT<AReal> & B,
232 const TMatrixT<AReal> & A);
233
234 static void Sigmoid(TMatrixT<AReal> & B);
235 static void SigmoidDerivative(TMatrixT<AReal> & B,
236 const TMatrixT<AReal> & A);
237
238 static void Tanh(TMatrixT<AReal> & B);
239 static void TanhDerivative(TMatrixT<AReal> & B,
240 const TMatrixT<AReal> & A);
241
242 static void FastTanh(Tensor_t &B) { return Tanh(B); }
243 static void FastTanhDerivative(Tensor_t &B, const Tensor_t &A) { return TanhDerivative(B, A); }
244
245 static void SymmetricRelu(TMatrixT<AReal> & B);
247 const TMatrixT<AReal> & A);
248
249 static void SoftSign(TMatrixT<AReal> & B);
250 static void SoftSignDerivative(TMatrixT<AReal> & B,
251 const TMatrixT<AReal> & A);
252
253 static void Gauss(TMatrixT<AReal> & B);
254 static void GaussDerivative(TMatrixT<AReal> & B,
255 const TMatrixT<AReal> & A);
256
257
258 ///@}
259
260 //____________________________________________________________________________
261 //
262 // Loss Functions
263 //____________________________________________________________________________
264
265 /** @name Loss Functions
266 * Loss functions compute a scalar value given the \p output of the network
267 * for a given training input and the expected network prediction \p Y that
268 * quantifies the quality of the prediction. For each function also a routing
269 * that computes the gradients (suffixed by Gradients) must be provided for
270 * the starting of the backpropagation algorithm.
271 */
272 ///@{
273
275 const TMatrixT<AReal> &weights);
277 const TMatrixT<AReal> &weights);
278
279 /** Sigmoid transformation is implicitly applied, thus \p output should
280 * hold the linear activations of the last layer in the net. */
281 static AReal CrossEntropy(const TMatrixT<AReal> &Y, const TMatrixT<AReal> &output, const TMatrixT<AReal> &weights);
282
284 const TMatrixT<AReal> &weights);
285
286 /** Softmax transformation is implicitly applied, thus \p output should
287 * hold the linear activations of the last layer in the net. */
289 const TMatrixT<AReal> &weights);
291 const TMatrixT<AReal> &output, const TMatrixT<AReal> &weights);
292 ///@}
293
294 //____________________________________________________________________________
295 //
296 // Output Functions
297 //____________________________________________________________________________
298
299 /** @name Output Functions
300 * Output functions transform the activations \p output of the
301 * output layer in the network to a valid prediction \p YHat for
302 * the desired usage of the network, e.g. the identity function
303 * for regression or the sigmoid transformation for two-class
304 * classification.
305 */
306 ///@{
307 static void Sigmoid(TMatrixT<AReal> &YHat,
308 const TMatrixT<AReal> & );
309 static void Softmax(TMatrixT<AReal> &YHat,
310 const TMatrixT<AReal> & );
311 ///@}
312
313 //____________________________________________________________________________
314 //
315 // Regularization
316 //____________________________________________________________________________
317
318 /** @name Regularization
319 * For each regularization type, two functions are required, one named
320 * `<Type>Regularization` that evaluates the corresponding
321 * regularization functional for a given weight matrix and the
322 * `Add<Type>RegularizationGradients`, that adds the regularization
323 * component in the gradients to the provided matrix.
324 */
325 ///@{
326
327 static AReal L1Regularization(const TMatrixT<AReal> & W);
329 const TMatrixT<AReal> & W,
331
332 static AReal L2Regularization(const TMatrixT<AReal> & W);
334 const TMatrixT<AReal> & W,
336 ///@}
337
338 //____________________________________________________________________________
339 //
340 // Initialization
341 //____________________________________________________________________________
342
343 /** @name Initialization
344 * For each initialization method, one function in the low-level interface
345 * is provided. The naming scheme is `Initialize<Type>` for a given
346 * initialization method Type.
347 */
348 ///@{
349
350 static void InitializeGauss(TMatrixT<AReal> & A);
351
352 static void InitializeUniform(TMatrixT<AReal> & A);
353
354 static void InitializeIdentity(TMatrixT<AReal> & A);
355
356 static void InitializeZero(TMatrixT<AReal> & A);
357
359
361
362 // return static instance of random generator used for initialization
363 // if generator does not exist it is created the first time with a random seed (e.g. seed = 0)
364 static TRandom & GetRandomGenerator();
365 // set random seed for the static generator
366 // if the static generator does not exists it is created
367 static void SetRandomSeed(size_t seed);
368
369
370 ///@}
371
372 //____________________________________________________________________________
373 //
374 // Dropout
375 //____________________________________________________________________________
376
377 /** @name Dropout
378 */
379 ///@{
380
381 /** Apply dropout with activation probability \p p to the given
382 * matrix \p A and scale the result by reciprocal of \p p. */
383 //static void Dropout(TMatrixT<AReal> & A, AReal dropoutProbability);
384 static void DropoutForward(Tensor_t &A, TDescriptors *descriptors, TWorkspace *workspace, Scalar_t p);
386 {
387 Tensor_t & tA = A; // Tensor and matrix are same types
388 DropoutForward(tA, static_cast<TDescriptors *>(nullptr), static_cast<TWorkspace *>(nullptr), p);
389 }
390
391 ///@}
392
393
394 //____________________________________________________________________________
395 //
396 // Convolutional Layer Propagation
397 //____________________________________________________________________________
398
399 /** @name Forward Propagation in Convolutional Layer
400 */
401 ///@{
402
403 /** Transform the matrix \p B in local view format, suitable for
404 * convolution, and store it in matrix \p A. */
405 static void Im2col(TMatrixT<AReal> &A,
406 const TMatrixT<AReal> &B,
407 size_t imgHeight,
408 size_t imgWidth,
409 size_t fltHeight,
410 size_t fltWidth,
411 size_t strideRows,
412 size_t strideCols,
413 size_t zeroPaddingHeight,
414 size_t zeroPaddingWidth);
415
416 static void Im2colIndices(std::vector<int> &, const TMatrixT<AReal> &, size_t, size_t, size_t, size_t ,
417 size_t , size_t , size_t , size_t ,size_t ) {
418 Fatal("Im2ColIndices","This function is not implemented for ref architectures");
419 }
420 static void Im2colFast(TMatrixT<AReal> &, const TMatrixT<AReal> &, const std::vector<int> & ) {
421 Fatal("Im2ColFast","This function is not implemented for ref architectures");
422 }
423
424 /** Rotates the matrix \p B, which is representing a weights,
425 * and stores them in the matrix \p A. */
426 static void RotateWeights(TMatrixT<AReal> &A, const TMatrixT<AReal> &B, size_t filterDepth, size_t filterHeight,
427 size_t filterWidth, size_t numFilters);
428
429 /** Add the biases in the Convolutional Layer. */
430 static void AddConvBiases(TMatrixT<AReal> &output, const TMatrixT<AReal> &biases);
431 ///@}
432
433 /** Dummy placeholder - preparation is currently only required for the CUDA architecture. */
434 static void PrepareInternals(std::vector<TMatrixT<AReal>> &) {}
435
436 /** Forward propagation in the Convolutional layer */
437 static void ConvLayerForward(std::vector<TMatrixT<AReal>> & /*output*/,
438 std::vector<TMatrixT<AReal>> & /*derivatives*/,
439 const std::vector<TMatrixT<AReal>> & /*input*/,
440 const TMatrixT<AReal> & /*weights*/, const TMatrixT<AReal> & /*biases*/,
441 const DNN::CNN::TConvParams & /*params*/, EActivationFunction /*activFunc*/,
442 std::vector<TMatrixT<AReal>> & /*inputPrime*/) {
443 Fatal("ConvLayerForward","This function is not implemented for ref architectures");
444 }
445
446
447 /** @name Backward Propagation in Convolutional Layer
448 */
449 ///@{
450
451 /** Perform the complete backward propagation step in a Convolutional Layer.
452 * If the provided \p activationGradientsBackward matrix is not empty, compute the
453 * gradients of the objective function with respect to the activations
454 * of the previous layer (backward direction).
455 * Also compute the weight and the bias gradients. Modifies the values
456 * in \p df and thus produces only a valid result, if it is applied the
457 * first time after the corresponding forward propagation has been per-
458 * formed. */
459 static void ConvLayerBackward(std::vector<TMatrixT<AReal>> &,
461 std::vector<TMatrixT<AReal>> &,
462 const std::vector<TMatrixT<AReal>> &,
463 const TMatrixT<AReal> &, const std::vector<TMatrixT<AReal>> &,
464 size_t , size_t , size_t , size_t , size_t,
465 size_t , size_t , size_t , size_t , size_t) {
466 Fatal("ConvLayerBackward","This function is not implemented for ref architectures");
467
468 }
469
470#ifdef HAVE_CNN_REFERENCE
471 /** Utility function for calculating the activation gradients of the layer
472 * before the convolutional layer. */
473 static void CalculateConvActivationGradients(std::vector<TMatrixT<AReal>> &activationGradientsBackward,
474 const std::vector<TMatrixT<AReal>> &df, const TMatrixT<AReal> &weights,
475 size_t batchSize, size_t inputHeight, size_t inputWidth, size_t depth,
476 size_t height, size_t width, size_t filterDepth, size_t filterHeight,
477 size_t filterWidth);
478
479 /** Utility function for calculating the weight gradients of the convolutional
480 * layer. */
481 static void CalculateConvWeightGradients(TMatrixT<AReal> &weightGradients, const std::vector<TMatrixT<AReal>> &df,
482 const std::vector<TMatrixT<AReal>> &activationBackward, size_t batchSize,
483 size_t inputHeight, size_t inputWidth, size_t depth, size_t height,
484 size_t width, size_t filterDepth, size_t filterHeight, size_t filterWidth,
485 size_t nLocalViews);
486
487 /** Utility function for calculating the bias gradients of the convolutional
488 * layer. */
489 static void CalculateConvBiasGradients(TMatrixT<AReal> &biasGradients, const std::vector<TMatrixT<AReal>> &df,
490 size_t batchSize, size_t depth, size_t nLocalViews);
491 ///@}
492
493#endif
494
495 //____________________________________________________________________________
496 //
497 // Max Pooling Layer Propagation
498 //____________________________________________________________________________
499 /** @name Forward Propagation in Max Pooling Layer
500 */
501 ///@{
502
503 /** Downsample the matrix \p C to the matrix \p A, using max
504 * operation, such that the winning indices are stored in matrix
505 * \p B. */
506 static void Downsample(TMatrixT<AReal> &A, TMatrixT<AReal> &B, const TMatrixT<AReal> &C, size_t imgHeight,
507 size_t imgWidth, size_t fltHeight, size_t fltWidth, size_t strideRows, size_t strideCols);
508
509 ///@}
510
511 /** @name Backward Propagation in Max Pooling Layer
512 */
513 ///@{
514
515 /** Perform the complete backward propagation step in a Max Pooling Layer. Based on the
516 * winning indices stored in the index matrix, it just forwards the activation
517 * gradients to the previous layer. */
518 static void MaxPoolLayerBackward(TMatrixT<AReal> &activationGradientsBackward,
519 const TMatrixT<AReal> &activationGradients,
520 const TMatrixT<AReal> &indexMatrix,
521 size_t imgHeight,
522 size_t imgWidth,
523 size_t fltHeight,
524 size_t fltWidth,
525 size_t strideRows,
526 size_t strideCol,
527 size_t nLocalViews);
528 ///@}
529 //____________________________________________________________________________
530 //
531 // Reshape Layer Propagation
532 //____________________________________________________________________________
533 /** @name Forward and Backward Propagation in Reshape Layer
534 */
535 ///@{
536
537 /** Transform the matrix \p B to a matrix with different dimensions \p A */
538 static void Reshape(TMatrixT<AReal> &A, const TMatrixT<AReal> &B);
539
540 /** Flattens the tensor \p B, such that each matrix, is stretched in one row, resulting with a matrix \p A. */
541 static void Flatten(TMatrixT<AReal> &A, const std::vector<TMatrixT<AReal>> &B, size_t size, size_t nRows,
542 size_t nCols);
543
544 /** Transforms each row of \p B to a matrix and stores it in the tensor \p B. */
545 static void Deflatten(std::vector<TMatrixT<AReal>> &A, const TMatrixT<Scalar_t> &B, size_t index, size_t nRows,
546 size_t nCols);
547 /** Rearrage data according to time fill B x T x D out with T x B x D matrix in*/
548 static void Rearrange(std::vector<TMatrixT<AReal>> &out, const std::vector<TMatrixT<AReal>> &in);
549
550 ///@}
551
552 //____________________________________________________________________________
553 //
554 // Additional Arithmetic Functions
555 //____________________________________________________________________________
556
557 /** Sum columns of (m x n) matrix \p A and write the results into the first
558 * m elements in \p A.
559 */
560 static void SumColumns(TMatrixT<AReal> &B, const TMatrixT<AReal> &A);
561
562 /** In-place Hadamard (element-wise) product of matrices \p A and \p B
563 * with the result being written into \p A.
564 */
565 static void Hadamard(TMatrixT<AReal> &A, const TMatrixT<AReal> &B);
566
567 /** Add the constant \p beta to all the elements of matrix \p A and write the
568 * result into \p A.
569 */
570 static void ConstAdd(TMatrixT<AReal> &A, AReal beta);
571
572 /** Multiply the constant \p beta to all the elements of matrix \p A and write the
573 * result into \p A.
574 */
575 static void ConstMult(TMatrixT<AReal> &A, AReal beta);
576
577 /** Reciprocal each element of the matrix \p A and write the result into
578 * \p A
579 */
581
582 /** Square each element of the matrix \p A and write the result into
583 * \p A
584 */
585 static void SquareElementWise(TMatrixT<AReal> &A);
586
587 /** Square root each element of the matrix \p A and write the result into
588 * \p A
589 */
590 static void SqrtElementWise(TMatrixT<AReal> &A);
591
592 // optimizer update functions
593
594 /// Update functions for ADAM optimizer
595 static void AdamUpdate(TMatrixT<AReal> & A, const TMatrixT<AReal> & M, const TMatrixT<AReal> & V, AReal alpha, AReal eps);
596 static void AdamUpdateFirstMom(TMatrixT<AReal> & A, const TMatrixT<AReal> & B, AReal beta);
597 static void AdamUpdateSecondMom(TMatrixT<AReal> & A, const TMatrixT<AReal> & B, AReal beta);
598
599
600
601 //____________________________________________________________________________
602 //
603 // AutoEncoder Propagation
604 //____________________________________________________________________________
605
606 // Add Biases to the output
607 static void AddBiases(TMatrixT<AReal> &A,
608 const TMatrixT<AReal> &biases);
609
610 // Updating parameters after every backward pass. Weights and biases are
611 // updated.
612 static void
614 TMatrixT<AReal> &z, TMatrixT<AReal> &fVBiases,
615 TMatrixT<AReal> &fHBiases, TMatrixT<AReal> &fWeights,
616 TMatrixT<AReal> &VBiasError, TMatrixT<AReal> &HBiasError,
617 AReal learningRate, size_t fBatchSize);
618
619 // Softmax functions redefined
620 static void SoftmaxAE(TMatrixT<AReal> & A);
621
622
623 // Corrupt the input values randomly on corruption Level.
624 //Basically inputs are masked currently.
625 static void CorruptInput(TMatrixT<AReal> & input,
626 TMatrixT<AReal> & corruptedInput,
627 AReal corruptionLevel);
628
629 //Encodes the input Values in the compressed form.
630 static void EncodeInput(TMatrixT<AReal> &input,
631 TMatrixT<AReal> &compressedInput,
632 TMatrixT<AReal> &Weights);
633
634 // reconstructs the input. The reconstructed Input has same dimensions as that
635 // of the input.
636 static void ReconstructInput(TMatrixT<AReal> & compressedInput,
637 TMatrixT<AReal> & reconstructedInput,
638 TMatrixT<AReal> &fWeights);
639
640
643 TMatrixT<AReal> &fWeights);
644
647 TMatrixT<AReal> &difference,
649 TMatrixT<AReal> &fWeights,
650 TMatrixT<AReal> &fBiases,
651 AReal learningRate,
652 size_t fBatchSize);
653
654};
655
656
657// implement the templated member functions
658template <typename AReal>
659template <typename AMatrix_t>
661{
662 TMatrixT<AReal> tmp = B;
663 A = tmp;
664}
665
666template <typename AReal>
667template <typename AMatrix_t>
668void TReference<AReal>::CopyDiffArch(std::vector<TMatrixT<AReal>> &A, const std::vector<AMatrix_t> &B)
669{
670 for (size_t i = 0; i < A.size(); ++i) {
671 CopyDiffArch(A[i], B[i]);
672 }
673}
674
675
676
677} // namespace DNN
678} // namespace TMVA
679
680#endif
size_t size(const MatrixT &matrix)
retrieve the size of a square matrix
void Fatal(const char *location, const char *msgfmt,...)
Use this function in case of a fatal error. It will abort the program.
Definition TError.cxx:244
winID h TVirtualViewer3D TVirtualGLPainter p
Option_t Option_t TPoint TPoint const char GetTextMagnitude GetFillStyle GetLineColor GetLineWidth GetMarkerStyle GetTextAlign GetTextColor GetTextSize void input
Option_t Option_t TPoint TPoint const char GetTextMagnitude GetFillStyle GetLineColor GetLineWidth GetMarkerStyle GetTextAlign GetTextColor GetTextSize void char Point_t Rectangle_t WindowAttributes_t index
Option_t Option_t width
Option_t Option_t TPoint TPoint const char GetTextMagnitude GetFillStyle GetLineColor GetLineWidth GetMarkerStyle GetTextAlign GetTextColor GetTextSize void char Point_t Rectangle_t height
Implementation of the CrossEntropy as separation criterion.
The reference architecture class.
Definition Reference.h:53
static void AdamUpdate(TMatrixT< AReal > &A, const TMatrixT< AReal > &M, const TMatrixT< AReal > &V, AReal alpha, AReal eps)
Update functions for ADAM optimizer.
static void AdamUpdateSecondMom(TMatrixT< AReal > &A, const TMatrixT< AReal > &B, AReal beta)
static void DropoutForward(Matrix_t &A, Scalar_t p)
Definition Reference.h:385
static void SymmetricRelu(TMatrixT< AReal > &B)
static void InitializeIdentity(TMatrixT< AReal > &A)
static void MultiplyTranspose(TMatrixT< Scalar_t > &output, const TMatrixT< Scalar_t > &input, const TMatrixT< Scalar_t > &weights)
Matrix-multiply input with the transpose of weights and write the results into output.
static void InitializeGlorotNormal(TMatrixT< AReal > &A)
Truncated normal initialization (Glorot, called also Xavier normal) The values are sample with a norm...
static void AdamUpdateFirstMom(TMatrixT< AReal > &A, const TMatrixT< AReal > &B, AReal beta)
static void Flatten(TMatrixT< AReal > &A, const std::vector< TMatrixT< AReal > > &B, size_t size, size_t nRows, size_t nCols)
Flattens the tensor B, such that each matrix, is stretched in one row, resulting with a matrix A.
static void Relu(TMatrixT< AReal > &B)
static void MaxPoolLayerBackward(TMatrixT< AReal > &activationGradientsBackward, const TMatrixT< AReal > &activationGradients, const TMatrixT< AReal > &indexMatrix, size_t imgHeight, size_t imgWidth, size_t fltHeight, size_t fltWidth, size_t strideRows, size_t strideCol, size_t nLocalViews)
Perform the complete backward propagation step in a Max Pooling Layer.
static void GaussDerivative(TMatrixT< AReal > &B, const TMatrixT< AReal > &A)
static void SoftmaxAE(TMatrixT< AReal > &A)
static void AddL1RegularizationGradients(TMatrixT< AReal > &A, const TMatrixT< AReal > &W, AReal weightDecay)
static void AddRowWise(TMatrixT< Scalar_t > &output, const TMatrixT< Scalar_t > &biases)
Add the vectors biases row-wise to the matrix output.
static void CrossEntropyGradients(TMatrixT< AReal > &dY, const TMatrixT< AReal > &Y, const TMatrixT< AReal > &output, const TMatrixT< AReal > &weights)
static void Im2colFast(TMatrixT< AReal > &, const TMatrixT< AReal > &, const std::vector< int > &)
Definition Reference.h:420
static void Downsample(TMatrixT< AReal > &A, TMatrixT< AReal > &B, const TMatrixT< AReal > &C, size_t imgHeight, size_t imgWidth, size_t fltHeight, size_t fltWidth, size_t strideRows, size_t strideCols)
Downsample the matrix C to the matrix A, using max operation, such that the winning indices are store...
static void EncodeInput(TMatrixT< AReal > &input, TMatrixT< AReal > &compressedInput, TMatrixT< AReal > &Weights)
static void TanhDerivative(TMatrixT< AReal > &B, const TMatrixT< AReal > &A)
static void ReconstructInput(TMatrixT< AReal > &compressedInput, TMatrixT< AReal > &reconstructedInput, TMatrixT< AReal > &fWeights)
static AReal L2Regularization(const TMatrixT< AReal > &W)
static void Im2colIndices(std::vector< int > &, const TMatrixT< AReal > &, size_t, size_t, size_t, size_t, size_t, size_t, size_t, size_t, size_t)
Definition Reference.h:416
static void IdentityDerivative(TMatrixT< AReal > &B, const TMatrixT< AReal > &A)
static AReal SoftmaxCrossEntropy(const TMatrixT< AReal > &Y, const TMatrixT< AReal > &output, const TMatrixT< AReal > &weights)
Softmax transformation is implicitly applied, thus output should hold the linear activations of the l...
static Matrix_t & GRULayerBackward(TMatrixT< Scalar_t > &state_gradients_backward, TMatrixT< Scalar_t > &reset_weight_gradients, TMatrixT< Scalar_t > &update_weight_gradients, TMatrixT< Scalar_t > &candidate_weight_gradients, TMatrixT< Scalar_t > &reset_state_weight_gradients, TMatrixT< Scalar_t > &update_state_weight_gradients, TMatrixT< Scalar_t > &candidate_state_weight_gradients, TMatrixT< Scalar_t > &reset_bias_gradients, TMatrixT< Scalar_t > &update_bias_gradients, TMatrixT< Scalar_t > &candidate_bias_gradients, TMatrixT< Scalar_t > &dr, TMatrixT< Scalar_t > &du, TMatrixT< Scalar_t > &dc, const TMatrixT< Scalar_t > &precStateActivations, const TMatrixT< Scalar_t > &fReset, const TMatrixT< Scalar_t > &fUpdate, const TMatrixT< Scalar_t > &fCandidate, const TMatrixT< Scalar_t > &weights_reset, const TMatrixT< Scalar_t > &weights_update, const TMatrixT< Scalar_t > &weights_candidate, const TMatrixT< Scalar_t > &weights_reset_state, const TMatrixT< Scalar_t > &weights_update_state, const TMatrixT< Scalar_t > &weights_candidate_state, const TMatrixT< Scalar_t > &input, TMatrixT< Scalar_t > &input_gradient)
Backward pass for GRU Network.
static void ConstAdd(TMatrixT< AReal > &A, AReal beta)
Add the constant beta to all the elements of matrix A and write the result into A.
static void FastTanhDerivative(Tensor_t &B, const Tensor_t &A)
Definition Reference.h:243
static void SetRandomSeed(size_t seed)
static void SigmoidDerivative(TMatrixT< AReal > &B, const TMatrixT< AReal > &A)
static void SoftSignDerivative(TMatrixT< AReal > &B, const TMatrixT< AReal > &A)
static void InitializeZero(TMatrixT< AReal > &A)
static void Softmax(TMatrixT< AReal > &YHat, const TMatrixT< AReal > &)
static void ReciprocalElementWise(TMatrixT< AReal > &A)
Reciprocal each element of the matrix A and write the result into A.
static void Backward(TMatrixT< Scalar_t > &activationGradientsBackward, TMatrixT< Scalar_t > &weightGradients, TMatrixT< Scalar_t > &biasGradients, TMatrixT< Scalar_t > &df, const TMatrixT< Scalar_t > &activationGradients, const TMatrixT< Scalar_t > &weights, const TMatrixT< Scalar_t > &activationBackward)
Perform the complete backward propagation step.
static void SquareElementWise(TMatrixT< AReal > &A)
Square each element of the matrix A and write the result into A.
static void MeanSquaredErrorGradients(TMatrixT< AReal > &dY, const TMatrixT< AReal > &Y, const TMatrixT< AReal > &output, const TMatrixT< AReal > &weights)
static TRandom * fgRandomGen
Definition Reference.h:55
static void RotateWeights(TMatrixT< AReal > &A, const TMatrixT< AReal > &B, size_t filterDepth, size_t filterHeight, size_t filterWidth, size_t numFilters)
Rotates the matrix B, which is representing a weights, and stores them in the matrix A.
static void Rearrange(std::vector< TMatrixT< AReal > > &out, const std::vector< TMatrixT< AReal > > &in)
Rearrage data according to time fill B x T x D out with T x B x D matrix in.
static Matrix_t & LSTMLayerBackward(TMatrixT< Scalar_t > &state_gradients_backward, TMatrixT< Scalar_t > &cell_gradients_backward, TMatrixT< Scalar_t > &input_weight_gradients, TMatrixT< Scalar_t > &forget_weight_gradients, TMatrixT< Scalar_t > &candidate_weight_gradients, TMatrixT< Scalar_t > &output_weight_gradients, TMatrixT< Scalar_t > &input_state_weight_gradients, TMatrixT< Scalar_t > &forget_state_weight_gradients, TMatrixT< Scalar_t > &candidate_state_weight_gradients, TMatrixT< Scalar_t > &output_state_weight_gradients, TMatrixT< Scalar_t > &input_bias_gradients, TMatrixT< Scalar_t > &forget_bias_gradients, TMatrixT< Scalar_t > &candidate_bias_gradients, TMatrixT< Scalar_t > &output_bias_gradients, TMatrixT< Scalar_t > &di, TMatrixT< Scalar_t > &df, TMatrixT< Scalar_t > &dc, TMatrixT< Scalar_t > &dout, const TMatrixT< Scalar_t > &precStateActivations, const TMatrixT< Scalar_t > &precCellActivations, const TMatrixT< Scalar_t > &fInput, const TMatrixT< Scalar_t > &fForget, const TMatrixT< Scalar_t > &fCandidate, const TMatrixT< Scalar_t > &fOutput, const TMatrixT< Scalar_t > &weights_input, const TMatrixT< Scalar_t > &weights_forget, const TMatrixT< Scalar_t > &weights_candidate, const TMatrixT< Scalar_t > &weights_output, const TMatrixT< Scalar_t > &weights_input_state, const TMatrixT< Scalar_t > &weights_forget_state, const TMatrixT< Scalar_t > &weights_candidate_state, const TMatrixT< Scalar_t > &weights_output_state, const TMatrixT< Scalar_t > &input, TMatrixT< Scalar_t > &input_gradient, TMatrixT< Scalar_t > &cell_gradient, TMatrixT< Scalar_t > &cell_tanh)
Backward pass for LSTM Network.
static void Deflatten(std::vector< TMatrixT< AReal > > &A, const TMatrixT< Scalar_t > &B, size_t index, size_t nRows, size_t nCols)
Transforms each row of B to a matrix and stores it in the tensor B.
static void Im2col(TMatrixT< AReal > &A, const TMatrixT< AReal > &B, size_t imgHeight, size_t imgWidth, size_t fltHeight, size_t fltWidth, size_t strideRows, size_t strideCols, size_t zeroPaddingHeight, size_t zeroPaddingWidth)
Transform the matrix B in local view format, suitable for convolution, and store it in matrix A.
static void Hadamard(TMatrixT< AReal > &A, const TMatrixT< AReal > &B)
In-place Hadamard (element-wise) product of matrices A and B with the result being written into A.
static void UpdateParams(TMatrixT< AReal > &x, TMatrixT< AReal > &tildeX, TMatrixT< AReal > &y, TMatrixT< AReal > &z, TMatrixT< AReal > &fVBiases, TMatrixT< AReal > &fHBiases, TMatrixT< AReal > &fWeights, TMatrixT< AReal > &VBiasError, TMatrixT< AReal > &HBiasError, AReal learningRate, size_t fBatchSize)
static void ScaleAdd(TMatrixT< Scalar_t > &A, const TMatrixT< Scalar_t > &B, Scalar_t beta=1.0)
Adds a the elements in matrix B scaled by c to the elements in the matrix A.
static void Sigmoid(TMatrixT< AReal > &B)
static void CopyDiffArch(TMatrixT< Scalar_t > &A, const AMatrix_t &B)
Definition Reference.h:660
static void SymmetricReluDerivative(TMatrixT< AReal > &B, const TMatrixT< AReal > &A)
static void Identity(TMatrixT< AReal > &B)
static void UpdateParamsLogReg(TMatrixT< AReal > &input, TMatrixT< AReal > &output, TMatrixT< AReal > &difference, TMatrixT< AReal > &p, TMatrixT< AReal > &fWeights, TMatrixT< AReal > &fBiases, AReal learningRate, size_t fBatchSize)
static void ConvLayerBackward(std::vector< TMatrixT< AReal > > &, TMatrixT< AReal > &, TMatrixT< AReal > &, std::vector< TMatrixT< AReal > > &, const std::vector< TMatrixT< AReal > > &, const TMatrixT< AReal > &, const std::vector< TMatrixT< AReal > > &, size_t, size_t, size_t, size_t, size_t, size_t, size_t, size_t, size_t, size_t)
Perform the complete backward propagation step in a Convolutional Layer.
Definition Reference.h:459
static void SoftmaxCrossEntropyGradients(TMatrixT< AReal > &dY, const TMatrixT< AReal > &Y, const TMatrixT< AReal > &output, const TMatrixT< AReal > &weights)
static AReal L1Regularization(const TMatrixT< AReal > &W)
static void AddConvBiases(TMatrixT< AReal > &output, const TMatrixT< AReal > &biases)
Add the biases in the Convolutional Layer.
static void DropoutForward(Tensor_t &A, TDescriptors *descriptors, TWorkspace *workspace, Scalar_t p)
Apply dropout with activation probability p to the given matrix A and scale the result by reciprocal ...
static void ConvLayerForward(std::vector< TMatrixT< AReal > > &, std::vector< TMatrixT< AReal > > &, const std::vector< TMatrixT< AReal > > &, const TMatrixT< AReal > &, const TMatrixT< AReal > &, const DNN::CNN::TConvParams &, EActivationFunction, std::vector< TMatrixT< AReal > > &)
Forward propagation in the Convolutional layer.
Definition Reference.h:437
static void AddL2RegularizationGradients(TMatrixT< AReal > &A, const TMatrixT< AReal > &W, AReal weightDecay)
static void Copy(TMatrixT< Scalar_t > &A, const TMatrixT< Scalar_t > &B)
static void CorruptInput(TMatrixT< AReal > &input, TMatrixT< AReal > &corruptedInput, AReal corruptionLevel)
static void InitializeGauss(TMatrixT< AReal > &A)
static AReal MeanSquaredError(const TMatrixT< AReal > &Y, const TMatrixT< AReal > &output, const TMatrixT< AReal > &weights)
static void SqrtElementWise(TMatrixT< AReal > &A)
Square root each element of the matrix A and write the result into A.
static void SumColumns(TMatrixT< AReal > &B, const TMatrixT< AReal > &A)
Sum columns of (m x n) matrix A and write the results into the first m elements in A.
static void ReluDerivative(TMatrixT< AReal > &B, const TMatrixT< AReal > &A)
static void ForwardLogReg(TMatrixT< AReal > &input, TMatrixT< AReal > &p, TMatrixT< AReal > &fWeights)
static void ConstMult(TMatrixT< AReal > &A, AReal beta)
Multiply the constant beta to all the elements of matrix A and write the result into A.
static void InitializeGlorotUniform(TMatrixT< AReal > &A)
Sample from a uniform distribution in range [ -lim,+lim] where lim = sqrt(6/N_in+N_out).
static void FastTanh(Tensor_t &B)
Definition Reference.h:242
static void Reshape(TMatrixT< AReal > &A, const TMatrixT< AReal > &B)
Transform the matrix B to a matrix with different dimensions A.
static void AddBiases(TMatrixT< AReal > &A, const TMatrixT< AReal > &biases)
static TRandom & GetRandomGenerator()
static void PrepareInternals(std::vector< TMatrixT< AReal > > &)
Dummy placeholder - preparation is currently only required for the CUDA architecture.
Definition Reference.h:434
static Matrix_t & RecurrentLayerBackward(TMatrixT< Scalar_t > &state_gradients_backward, TMatrixT< Scalar_t > &input_weight_gradients, TMatrixT< Scalar_t > &state_weight_gradients, TMatrixT< Scalar_t > &bias_gradients, TMatrixT< Scalar_t > &df, const TMatrixT< Scalar_t > &state, const TMatrixT< Scalar_t > &weights_input, const TMatrixT< Scalar_t > &weights_state, const TMatrixT< Scalar_t > &input, TMatrixT< Scalar_t > &input_gradient)
Backpropagation step for a Recurrent Neural Network.
static void InitializeUniform(TMatrixT< AReal > &A)
This is the base class for the ROOT Random number generators.
Definition TRandom.h:27
Double_t y[n]
Definition legend1.C:17
Double_t x[n]
Definition legend1.C:17
std::shared_ptr< std::function< double(double)> > Tanh
Definition NeuralNet.cxx:29
double weightDecay(double error, ItWeight itWeight, ItWeight itWeightEnd, double factorWeightDecay, EnumRegularization eRegularization)
compute the weight decay for regularization (L1 or L2)
EActivationFunction
Enum that represents layer activation functions.
Definition Functions.h:32
std::shared_ptr< std::function< double(double)> > Gauss
Definition NeuralNet.cxx:12
std::shared_ptr< std::function< double(double)> > Sigmoid
Definition NeuralNet.cxx:26
std::shared_ptr< std::function< double(double)> > SoftSign
Definition NeuralNet.cxx:32
create variable transformations
static void output()