Logo ROOT   6.10/09
Reference Guide
Reference.h
Go to the documentation of this file.
1 // @(#)root/tmva/tmva/dnn:$Id$
2 // Author: Simon Pfreundschuh 20/06/16
3 
4 /*************************************************************************
5  * Copyright (C) 2016, Simon Pfreundschuh *
6  * All rights reserved. *
7  * *
8  * For the licensing terms see $ROOTSYS/LICENSE. *
9  * For the list of contributors see $ROOTSYS/README/CREDITS. *
10  *************************************************************************/
11 
12 ///////////////////////////////////////////////////////////////////////
13 // Declaration of the TReference architecture, which provides a //
14 // reference implementation of the low-level interface for the DNN //
15 // implementation based on ROOT's TMatrixT matrix type. //
16 ///////////////////////////////////////////////////////////////////////
17 
18 #ifndef TMVA_DNN_ARCHITECTURES_REFERENCE
19 #define TMVA_DNN_ARCHITECTURES_REFERENCE
20 
21 #include "TMatrix.h"
22 
23 namespace TMVA
24 {
25 namespace DNN
26 {
27 
28 /*! The reference architecture class.
29 *
30 * Class template that contains the reference implementation of the low-level
31 * interface for the DNN implementation. The reference implementation uses the
32 * TMatrixT class template to represent matrices.
33 *
34 * \tparam AReal The floating point type used to represent scalars.
35 */
36 template<typename AReal>
38 {
39 public:
40 
41  using Scalar_t = AReal;
43 
44  //____________________________________________________________________________
45  //
46  // Propagation
47  //____________________________________________________________________________
48 
49  /** @name Forward Propagation
50  * Low-level functions required for the forward propagation of activations
51  * through the network.
52  */
53  ///@{
54  /** Matrix-multiply \p input with the transpose of \pweights and
55  * write the results into \p output. */
57  const TMatrixT<Scalar_t> &input,
58  const TMatrixT<Scalar_t> &weights);
59  /** Add the vectors biases row-wise to the matrix output */
61  const TMatrixT<Scalar_t> &biases);
62  ///@}
63 
64  /** @name Backward Propagation
65  * Low-level functions required for the forward propagation of activations
66  * through the network.
67  */
68  ///@{
69  /** Perform the complete backward propagation step. If the provided
70  * \p activationGradientsBackward matrix is not empty, compute the
71  * gradients of the objective function with respect to the activations
72  * of the previous layer (backward direction).
73  * Also compute the weight and the bias gradients. Modifies the values
74  * in \p df and thus produces only a valid result, if it is applied the
75  * first time after the corresponding forward propagation has been per-
76  * formed. */
77  static void Backward(TMatrixT<Scalar_t> & activationGradientsBackward,
78  TMatrixT<Scalar_t> & weightGradients,
79  TMatrixT<Scalar_t> & biasGradients,
80  TMatrixT<Scalar_t> & df,
81  const TMatrixT<Scalar_t> & activationGradients,
82  const TMatrixT<Scalar_t> & weights,
83  const TMatrixT<Scalar_t> & activationBackward);
84  /** Adds a the elements in matrix B scaled by c to the elements in
85  * the matrix A. This is required for the weight update in the gradient
86  * descent step.*/
87  static void ScaleAdd(TMatrixT<Scalar_t> & A,
88  const TMatrixT<Scalar_t> & B,
89  Scalar_t beta = 1.0);
90 
91  static void Copy(TMatrixT<Scalar_t> & A,
92  const TMatrixT<Scalar_t> & B);
93  ///@}
94 
95  //____________________________________________________________________________
96  //
97  // Activation Functions
98  //____________________________________________________________________________
99 
100  /** @name Activation Functions
101  * For each activation function, the low-level interface contains two routines.
102  * One that applies the acitvation function to a matrix and one that evaluate
103  * the derivatives of the activation function at the elements of a given matrix
104  * and writes the results into the result matrix.
105  */
106  ///@{
107  static void Identity(TMatrixT<AReal> & B);
108  static void IdentityDerivative(TMatrixT<AReal> & B,
109  const TMatrixT<AReal> & A);
110 
111  static void Relu(TMatrixT<AReal> & B);
112  static void ReluDerivative(TMatrixT<AReal> & B,
113  const TMatrixT<AReal> & A);
114 
115  static void Sigmoid(TMatrixT<AReal> & B);
116  static void SigmoidDerivative(TMatrixT<AReal> & B,
117  const TMatrixT<AReal> & A);
118 
119  static void Tanh(TMatrixT<AReal> & B);
120  static void TanhDerivative(TMatrixT<AReal> & B,
121  const TMatrixT<AReal> & A);
122 
123  static void SymmetricRelu(TMatrixT<AReal> & B);
125  const TMatrixT<AReal> & A);
126 
127  static void SoftSign(TMatrixT<AReal> & B);
128  static void SoftSignDerivative(TMatrixT<AReal> & B,
129  const TMatrixT<AReal> & A);
130 
131  static void Gauss(TMatrixT<AReal> & B);
132  static void GaussDerivative(TMatrixT<AReal> & B,
133  const TMatrixT<AReal> & A);
134 
135  ///@}
136 
137  //____________________________________________________________________________
138  //
139  // Loss Functions
140  //____________________________________________________________________________
141 
142  /** @name Loss Functions
143  * Loss functions compute a scalar value given the \p output of the network
144  * for a given training input and the expected network prediction \p Y that
145  * quantifies the quality of the prediction. For each function also a routing
146  * that computes the gradients (suffixed by Gradients) must be provided for
147  * the starting of the backpropagation algorithm.
148  */
149  ///@{
150 
151  static AReal MeanSquaredError(const TMatrixT<AReal> &Y,
152  const TMatrixT<AReal> &output);
154  const TMatrixT<AReal> &Y,
155  const TMatrixT<AReal> &output);
156 
157  /** Sigmoid transformation is implicitly applied, thus \p output should
158  * hold the linear activations of the last layer in the net. */
159  static AReal CrossEntropy(const TMatrixT<AReal> &Y,
160  const TMatrixT<AReal> &output);
161 
162  static void CrossEntropyGradients(TMatrixT<AReal> & dY,
163  const TMatrixT<AReal> & Y,
164  const TMatrixT<AReal> & output);
165 
166  /** Softmax transformation is implicitly applied, thus \p output should
167  * hold the linear activations of the last layer in the net. */
168  static AReal SoftmaxCrossEntropy(const TMatrixT<AReal> &Y,
169  const TMatrixT<AReal> &output);
171  const TMatrixT<AReal> & Y,
172  const TMatrixT<AReal> & output);
173  ///@}
174 
175  //____________________________________________________________________________
176  //
177  // Output Functions
178  //____________________________________________________________________________
179 
180  /** @name Output Functions
181  * Output functions transform the activations \p output of the
182  * output layer in the network to a valid prediction \p YHat for
183  * the desired usage of the network, e.g. the identity function
184  * for regression or the sigmoid transformation for two-class
185  * classification.
186  */
187  ///@{
188  static void Sigmoid(TMatrixT<AReal> &YHat,
189  const TMatrixT<AReal> & );
190  static void Softmax(TMatrixT<AReal> &YHat,
191  const TMatrixT<AReal> & );
192  ///@}
193 
194  //____________________________________________________________________________
195  //
196  // Regularization
197  //____________________________________________________________________________
198 
199  /** @name Regularization
200  * For each regularization type two functions are required, one named
201  * <tt><Type>Regularization</tt> that evaluates the corresponding
202  * regularization functional for a given weight matrix and the
203  * <tt>Add<Type>RegularizationGradients</tt>, that adds the regularization
204  * component in the gradients to the provided matrix.
205  */
206  ///@{
207 
208  static AReal L1Regularization(const TMatrixT<AReal> & W);
210  const TMatrixT<AReal> & W,
211  AReal weightDecay);
212 
213  static AReal L2Regularization(const TMatrixT<AReal> & W);
215  const TMatrixT<AReal> & W,
216  AReal weightDecay);
217  ///@}
218 
219  //____________________________________________________________________________
220  //
221  // Initialization
222  //____________________________________________________________________________
223 
224  /** @name Initialization
225  * For each initialization method, one function in the low-level interface
226  * is provided. The naming scheme is <p>Initialize<Type></p> for a given
227  * initialization method Type.
228  */
229  ///@{
230 
231  static void InitializeGauss(TMatrixT<AReal> & A);
232 
233  static void InitializeUniform(TMatrixT<AReal> & A);
234 
235  static void InitializeIdentity(TMatrixT<AReal> & A);
236 
237  static void InitializeZero(TMatrixT<AReal> & A);
238 
239  ///@}
240 
241  //____________________________________________________________________________
242  //
243  // Dropout
244  //____________________________________________________________________________
245 
246  /** @name Dropout
247  */
248  ///@{
249 
250  /** Apply dropout with activation probability \p p to the given
251  * matrix \p A and scale the result by reciprocal of \p p. */
252  static void Dropout(TMatrixT<AReal> & A, AReal dropoutProbability);
253 
254  ///@}
255 };
256 
257 } // namespace DNN
258 } // namespace TMVA
259 
260 #endif
static void MeanSquaredErrorGradients(TMatrixT< AReal > &dY, const TMatrixT< AReal > &Y, const TMatrixT< AReal > &output)
static double B[]
static void TanhDerivative(TMatrixT< AReal > &B, const TMatrixT< AReal > &A)
static void IdentityDerivative(TMatrixT< AReal > &B, const TMatrixT< AReal > &A)
static void SoftmaxCrossEntropyGradients(TMatrixT< AReal > &dY, const TMatrixT< AReal > &Y, const TMatrixT< AReal > &output)
static void MultiplyTranspose(TMatrixT< Scalar_t > &output, const TMatrixT< Scalar_t > &input, const TMatrixT< Scalar_t > &weights)
Matrix-multiply input with the transpose of and write the results into output.
Definition: Propagation.cxx:25
static void Tanh(TMatrixT< AReal > &B)
static void CrossEntropyGradients(TMatrixT< AReal > &dY, const TMatrixT< AReal > &Y, const TMatrixT< AReal > &output)
static void Sigmoid(TMatrixT< AReal > &B)
static void SigmoidDerivative(TMatrixT< AReal > &B, const TMatrixT< AReal > &A)
static void SoftSign(TMatrixT< AReal > &B)
static void Identity(TMatrixT< AReal > &B)
static void Backward(TMatrixT< Scalar_t > &activationGradientsBackward, TMatrixT< Scalar_t > &weightGradients, TMatrixT< Scalar_t > &biasGradients, TMatrixT< Scalar_t > &df, const TMatrixT< Scalar_t > &activationGradients, const TMatrixT< Scalar_t > &weights, const TMatrixT< Scalar_t > &activationBackward)
Perform the complete backward propagation step.
Definition: Propagation.cxx:44
static void AddRowWise(TMatrixT< Scalar_t > &output, const TMatrixT< Scalar_t > &biases)
Add the vectors biases row-wise to the matrix output.
Definition: Propagation.cxx:33
static void SymmetricReluDerivative(TMatrixT< AReal > &B, const TMatrixT< AReal > &A)
static double A[]
double beta(double x, double y)
Calculates the beta function.
TMatrixT.
Definition: TMatrixDfwd.h:22
static void AddL2RegularizationGradients(TMatrixT< AReal > &A, const TMatrixT< AReal > &W, AReal weightDecay)
double weightDecay(double error, ItWeight itWeight, ItWeight itWeightEnd, double factorWeightDecay, EnumRegularization eRegularization)
compute the weight decay for regularization (L1 or L2)
Definition: NeuralNet.icc:496
The reference architecture class.
Definition: Reference.h:37
static AReal SoftmaxCrossEntropy(const TMatrixT< AReal > &Y, const TMatrixT< AReal > &output)
Softmax transformation is implicitly applied, thus output should hold the linear activations of the l...
static AReal L1Regularization(const TMatrixT< AReal > &W)
static void InitializeUniform(TMatrixT< AReal > &A)
static void AddL1RegularizationGradients(TMatrixT< AReal > &A, const TMatrixT< AReal > &W, AReal weightDecay)
static void Relu(TMatrixT< AReal > &B)
static void SymmetricRelu(TMatrixT< AReal > &B)
static void InitializeZero(TMatrixT< AReal > &A)
static void ScaleAdd(TMatrixT< Scalar_t > &A, const TMatrixT< Scalar_t > &B, Scalar_t beta=1.0)
Adds a the elements in matrix B scaled by c to the elements in the matrix A.
Definition: Propagation.cxx:83
static AReal L2Regularization(const TMatrixT< AReal > &W)
static AReal CrossEntropy(const TMatrixT< AReal > &Y, const TMatrixT< AReal > &output)
Sigmoid transformation is implicitly applied, thus output should hold the linear activations of the l...
static void ReluDerivative(TMatrixT< AReal > &B, const TMatrixT< AReal > &A)
static void GaussDerivative(TMatrixT< AReal > &B, const TMatrixT< AReal > &A)
static void Softmax(TMatrixT< AReal > &YHat, const TMatrixT< AReal > &)
static void Copy(TMatrixT< Scalar_t > &A, const TMatrixT< Scalar_t > &B)
Definition: Propagation.cxx:95
static void InitializeIdentity(TMatrixT< AReal > &A)
static void SoftSignDerivative(TMatrixT< AReal > &B, const TMatrixT< AReal > &A)
Abstract ClassifierFactory template that handles arbitrary types.
static void InitializeGauss(TMatrixT< AReal > &A)
static AReal MeanSquaredError(const TMatrixT< AReal > &Y, const TMatrixT< AReal > &output)
static void Gauss(TMatrixT< AReal > &B)
static void Dropout(TMatrixT< AReal > &A, AReal dropoutProbability)
Apply dropout with activation probability p to the given matrix A and scale the result by reciprocal ...
Definition: Dropout.cxx:29