Logo ROOT   6.12/07
Reference Guide
Reference.h
Go to the documentation of this file.
1 // @(#)root/tmva/tmva/dnn:$Id$
2 // Author: Simon Pfreundschuh 20/06/16
3 
4 /*************************************************************************
5  * Copyright (C) 2016, Simon Pfreundschuh *
6  * All rights reserved. *
7  * *
8  * For the licensing terms see $ROOTSYS/LICENSE. *
9  * For the list of contributors see $ROOTSYS/README/CREDITS. *
10  *************************************************************************/
11 
12 ///////////////////////////////////////////////////////////////////////
13 // Declaration of the TReference architecture, which provides a //
14 // reference implementation of the low-level interface for the DNN //
15 // implementation based on ROOT's TMatrixT matrix type. //
16 ///////////////////////////////////////////////////////////////////////
17 
18 #ifndef TMVA_DNN_ARCHITECTURES_REFERENCE
19 #define TMVA_DNN_ARCHITECTURES_REFERENCE
20 
21 #include "TMatrix.h"
23 
24 namespace TMVA
25 {
26 namespace DNN
27 {
28 
29 /*! The reference architecture class.
30 *
31 * Class template that contains the reference implementation of the low-level
32 * interface for the DNN implementation. The reference implementation uses the
33 * TMatrixT class template to represent matrices.
34 *
35 * \tparam AReal The floating point type used to represent scalars.
36 */
37 template<typename AReal>
38 class TReference
39 {
40 public:
41 
42  using Scalar_t = AReal;
44 
45  //____________________________________________________________________________
46  //
47  // Propagation
48  //____________________________________________________________________________
49 
50  /** @name Forward Propagation
51  * Low-level functions required for the forward propagation of activations
52  * through the network.
53  */
54  ///@{
55  /** Matrix-multiply \p input with the transpose of \pweights and
56  * write the results into \p output. */
58  const TMatrixT<Scalar_t> &input,
59  const TMatrixT<Scalar_t> &weights);
60  /** Add the vectors biases row-wise to the matrix output */
62  const TMatrixT<Scalar_t> &biases);
63  ///@}
64 
65  /** @name Backward Propagation
66  * Low-level functions required for the forward propagation of activations
67  * through the network.
68  */
69  ///@{
70  /** Perform the complete backward propagation step. If the provided
71  * \p activationGradientsBackward matrix is not empty, compute the
72  * gradients of the objective function with respect to the activations
73  * of the previous layer (backward direction).
74  * Also compute the weight and the bias gradients. Modifies the values
75  * in \p df and thus produces only a valid result, if it is applied the
76  * first time after the corresponding forward propagation has been per-
77  * formed. */
78  static void Backward(TMatrixT<Scalar_t> & activationGradientsBackward,
79  TMatrixT<Scalar_t> & weightGradients,
80  TMatrixT<Scalar_t> & biasGradients,
81  TMatrixT<Scalar_t> & df,
82  const TMatrixT<Scalar_t> & activationGradients,
83  const TMatrixT<Scalar_t> & weights,
84  const TMatrixT<Scalar_t> & activationBackward);
85  /** Adds a the elements in matrix B scaled by c to the elements in
86  * the matrix A. This is required for the weight update in the gradient
87  * descent step.*/
88  static void ScaleAdd(TMatrixT<Scalar_t> & A,
89  const TMatrixT<Scalar_t> & B,
90  Scalar_t beta = 1.0);
91 
92  static void Copy(TMatrixT<Scalar_t> & A,
93  const TMatrixT<Scalar_t> & B);
94  ///@}
95 
96  //____________________________________________________________________________
97  //
98  // Activation Functions
99  //____________________________________________________________________________
100 
101  /** @name Activation Functions
102  * For each activation function, the low-level interface contains two routines.
103  * One that applies the acitvation function to a matrix and one that evaluate
104  * the derivatives of the activation function at the elements of a given matrix
105  * and writes the results into the result matrix.
106  */
107  ///@{
108  static void Identity(TMatrixT<AReal> & B);
109  static void IdentityDerivative(TMatrixT<AReal> & B,
110  const TMatrixT<AReal> & A);
111 
112  static void Relu(TMatrixT<AReal> & B);
113  static void ReluDerivative(TMatrixT<AReal> & B,
114  const TMatrixT<AReal> & A);
115 
116  static void Sigmoid(TMatrixT<AReal> & B);
117  static void SigmoidDerivative(TMatrixT<AReal> & B,
118  const TMatrixT<AReal> & A);
119 
120  static void Tanh(TMatrixT<AReal> & B);
121  static void TanhDerivative(TMatrixT<AReal> & B,
122  const TMatrixT<AReal> & A);
123 
124  static void SymmetricRelu(TMatrixT<AReal> & B);
126  const TMatrixT<AReal> & A);
127 
128  static void SoftSign(TMatrixT<AReal> & B);
129  static void SoftSignDerivative(TMatrixT<AReal> & B,
130  const TMatrixT<AReal> & A);
131 
132  static void Gauss(TMatrixT<AReal> & B);
133  static void GaussDerivative(TMatrixT<AReal> & B,
134  const TMatrixT<AReal> & A);
135 
136  ///@}
137 
138  //____________________________________________________________________________
139  //
140  // Loss Functions
141  //____________________________________________________________________________
142 
143  /** @name Loss Functions
144  * Loss functions compute a scalar value given the \p output of the network
145  * for a given training input and the expected network prediction \p Y that
146  * quantifies the quality of the prediction. For each function also a routing
147  * that computes the gradients (suffixed by Gradients) must be provided for
148  * the starting of the backpropagation algorithm.
149  */
150  ///@{
151 
152  static AReal MeanSquaredError(const TMatrixT<AReal> &Y, const TMatrixT<AReal> &output,
153  const TMatrixT<AReal> &weights);
155  const TMatrixT<AReal> &weights);
156 
157  /** Sigmoid transformation is implicitly applied, thus \p output should
158  * hold the linear activations of the last layer in the net. */
159  static AReal CrossEntropy(const TMatrixT<AReal> &Y, const TMatrixT<AReal> &output, const TMatrixT<AReal> &weights);
160 
162  const TMatrixT<AReal> &weights);
163 
164  /** Softmax transformation is implicitly applied, thus \p output should
165  * hold the linear activations of the last layer in the net. */
166  static AReal SoftmaxCrossEntropy(const TMatrixT<AReal> &Y, const TMatrixT<AReal> &output,
167  const TMatrixT<AReal> &weights);
169  const TMatrixT<AReal> &output, const TMatrixT<AReal> &weights);
170  ///@}
171 
172  //____________________________________________________________________________
173  //
174  // Output Functions
175  //____________________________________________________________________________
176 
177  /** @name Output Functions
178  * Output functions transform the activations \p output of the
179  * output layer in the network to a valid prediction \p YHat for
180  * the desired usage of the network, e.g. the identity function
181  * for regression or the sigmoid transformation for two-class
182  * classification.
183  */
184  ///@{
185  static void Sigmoid(TMatrixT<AReal> &YHat,
186  const TMatrixT<AReal> & );
187  static void Softmax(TMatrixT<AReal> &YHat,
188  const TMatrixT<AReal> & );
189  ///@}
190 
191  //____________________________________________________________________________
192  //
193  // Regularization
194  //____________________________________________________________________________
195 
196  /** @name Regularization
197  * For each regularization type two functions are required, one named
198  * <tt><Type>Regularization</tt> that evaluates the corresponding
199  * regularization functional for a given weight matrix and the
200  * <tt>Add<Type>RegularizationGradients</tt>, that adds the regularization
201  * component in the gradients to the provided matrix.
202  */
203  ///@{
204 
205  static AReal L1Regularization(const TMatrixT<AReal> & W);
207  const TMatrixT<AReal> & W,
208  AReal weightDecay);
209 
210  static AReal L2Regularization(const TMatrixT<AReal> & W);
212  const TMatrixT<AReal> & W,
213  AReal weightDecay);
214  ///@}
215 
216  //____________________________________________________________________________
217  //
218  // Initialization
219  //____________________________________________________________________________
220 
221  /** @name Initialization
222  * For each initialization method, one function in the low-level interface
223  * is provided. The naming scheme is <p>Initialize<Type></p> for a given
224  * initialization method Type.
225  */
226  ///@{
227 
228  static void InitializeGauss(TMatrixT<AReal> & A);
229 
230  static void InitializeUniform(TMatrixT<AReal> & A);
231 
232  static void InitializeIdentity(TMatrixT<AReal> & A);
233 
234  static void InitializeZero(TMatrixT<AReal> & A);
235 
236  ///@}
237 
238  //____________________________________________________________________________
239  //
240  // Dropout
241  //____________________________________________________________________________
242 
243  /** @name Dropout
244  */
245  ///@{
246 
247  /** Apply dropout with activation probability \p p to the given
248  * matrix \p A and scale the result by reciprocal of \p p. */
249  static void Dropout(TMatrixT<AReal> & A, AReal dropoutProbability);
250 
251  ///@}
252 
253  //____________________________________________________________________________
254  //
255  // Additional Arithmetic Functions
256  //____________________________________________________________________________
257 
258  /** Sum columns of (m x n) matrixx \p A and write the results into the first
259  * m elements in \p A.
260  */
261  static void SumColumns(TMatrixT<AReal> &B, const TMatrixT<AReal> &A);
262 };
263 
264 } // namespace DNN
265 } // namespace TMVA
266 
267 #endif
static double B[]
static void TanhDerivative(TMatrixT< AReal > &B, const TMatrixT< AReal > &A)
static void IdentityDerivative(TMatrixT< AReal > &B, const TMatrixT< AReal > &A)
static void Tanh(TMatrixT< AReal > &B)
static AReal SoftmaxCrossEntropy(const TMatrixT< AReal > &Y, const TMatrixT< AReal > &output, const TMatrixT< AReal > &weights)
Softmax transformation is implicitly applied, thus output should hold the linear activations of the l...
static void Sigmoid(TMatrixT< AReal > &B)
static AReal CrossEntropy(const TMatrixT< AReal > &Y, const TMatrixT< AReal > &output, const TMatrixT< AReal > &weights)
Sigmoid transformation is implicitly applied, thus output should hold the linear activations of the l...
static void SigmoidDerivative(TMatrixT< AReal > &B, const TMatrixT< AReal > &A)
static void SoftSign(TMatrixT< AReal > &B)
static void Identity(TMatrixT< AReal > &B)
static void SymmetricReluDerivative(TMatrixT< AReal > &B, const TMatrixT< AReal > &A)
static double A[]
double beta(double x, double y)
Calculates the beta function.
static void AddL2RegularizationGradients(TMatrixT< AReal > &A, const TMatrixT< AReal > &W, AReal weightDecay)
double weightDecay(double error, ItWeight itWeight, ItWeight itWeightEnd, double factorWeightDecay, EnumRegularization eRegularization)
compute the weight decay for regularization (L1 or L2)
Definition: NeuralNet.icc:497
static void Copy(TMatrixT< Scalar_t > &A, const TMatrixT< Scalar_t > &B)
Definition: Propagation.cxx:88
static void SoftmaxCrossEntropyGradients(TMatrixT< AReal > &dY, const TMatrixT< AReal > &Y, const TMatrixT< AReal > &output, const TMatrixT< AReal > &weights)
static AReal L1Regularization(const TMatrixT< AReal > &W)
static void InitializeUniform(TMatrixT< AReal > &A)
static void AddL1RegularizationGradients(TMatrixT< AReal > &A, const TMatrixT< AReal > &W, AReal weightDecay)
static void Backward(TMatrixT< Scalar_t > &activationGradientsBackward, TMatrixT< Scalar_t > &weightGradients, TMatrixT< Scalar_t > &biasGradients, TMatrixT< Scalar_t > &df, const TMatrixT< Scalar_t > &activationGradients, const TMatrixT< Scalar_t > &weights, const TMatrixT< Scalar_t > &activationBackward)
Perform the complete backward propagation step.
Definition: Propagation.cxx:42
static void Relu(TMatrixT< AReal > &B)
static void SymmetricRelu(TMatrixT< AReal > &B)
static void InitializeZero(TMatrixT< AReal > &A)
static AReal L2Regularization(const TMatrixT< AReal > &W)
static void ReluDerivative(TMatrixT< AReal > &B, const TMatrixT< AReal > &A)
static void SumColumns(TMatrixT< AReal > &B, const TMatrixT< AReal > &A)
Sum columns of (m x n) matrixx A and write the results into the first m elements in A...
Definition: Propagation.cxx:94
static void CrossEntropyGradients(TMatrixT< AReal > &dY, const TMatrixT< AReal > &Y, const TMatrixT< AReal > &output, const TMatrixT< AReal > &weights)
static void GaussDerivative(TMatrixT< AReal > &B, const TMatrixT< AReal > &A)
static void Softmax(TMatrixT< AReal > &YHat, const TMatrixT< AReal > &)
static void ScaleAdd(TMatrixT< Scalar_t > &A, const TMatrixT< Scalar_t > &B, Scalar_t beta=1.0)
Adds a the elements in matrix B scaled by c to the elements in the matrix A.
Definition: Propagation.cxx:78
static void InitializeIdentity(TMatrixT< AReal > &A)
static void SoftSignDerivative(TMatrixT< AReal > &B, const TMatrixT< AReal > &A)
Abstract ClassifierFactory template that handles arbitrary types.
static void InitializeGauss(TMatrixT< AReal > &A)
static void MultiplyTranspose(TMatrixT< Scalar_t > &output, const TMatrixT< Scalar_t > &input, const TMatrixT< Scalar_t > &weights)
Matrix-multiply input with the transpose of and write the results into output.
Definition: Propagation.cxx:25
static void MeanSquaredErrorGradients(TMatrixT< AReal > &dY, const TMatrixT< AReal > &Y, const TMatrixT< AReal > &output, const TMatrixT< AReal > &weights)
static void AddRowWise(TMatrixT< Scalar_t > &output, const TMatrixT< Scalar_t > &biases)
Add the vectors biases row-wise to the matrix output.
Definition: Propagation.cxx:32
static void Gauss(TMatrixT< AReal > &B)
static AReal MeanSquaredError(const TMatrixT< AReal > &Y, const TMatrixT< AReal > &output, const TMatrixT< AReal > &weights)
static void Dropout(TMatrixT< AReal > &A, AReal dropoutProbability)
Apply dropout with activation probability p to the given matrix A and scale the result by reciprocal ...
Definition: Dropout.cxx:29