Logo ROOT   6.14/05
Reference Guide
Functions.h
Go to the documentation of this file.
1 // @(#)root/tmva/tmva/dnn:$Id$
2 // Author: Simon Pfreundschuh 20/06/16
3 
4 /*************************************************************************
5  * Copyright (C) 2016, Simon Pfreundschuh *
6  * All rights reserved. *
7  * *
8  * For the licensing terms see $ROOTSYS/LICENSE. *
9  * For the list of contributors see $ROOTSYS/README/CREDITS. *
10  *************************************************************************/
11 
12 /////////////////////////////////////////////////////////////////////
13 // Contains function enums for activation and output functions, as //
14 // well as generic evaluation functions, that delegate the call to //
15 // the corresponding evaluation kernel. //
16 /////////////////////////////////////////////////////////////////////
17 
18 #ifndef TMVA_DNN_FUNCTIONS
19 #define TMVA_DNN_FUNCTIONS
20 
21 namespace TMVA
22 {
23 namespace DNN
24 {
25 //______________________________________________________________________________
26 //
27 // Enum Definitions
28 //______________________________________________________________________________
29 
30 /*! Enum that represents layer activation functions. */
32 {
33  kIdentity = 0,
34  kRelu = 1,
35  kSigmoid = 2,
36  kTanh = 3,
37  kSymmRelu = 4,
38  kSoftSign = 5,
39  kGauss = 6
40 };
41 
42 /*! Enum that represents output functions */
43 enum class EOutputFunction
44 {
45  kIdentity = 'I',
46  kSigmoid = 'S',
47  kSoftmax = 'M'
48 };
49 
50 /*! Enum that represents objective functions for the net, i.e. functions
51 * that take the output from the last layer in the net together with the
52 * truths and return the objective function values that is to be minimized
53 * in the training process. */
54 enum class ELossFunction
55 {
56  kCrossEntropy = 'C',
57  kMeanSquaredError = 'R',
59 };
60 
61 /*! Enum representing the regularization type applied for a given layer */
62 enum class ERegularization
63 {
64  kNone = '0',
65  kL1 = '1',
66  kL2 = '2'
67  };
68 
69 /* Enum represnting the initialization method used for this layer. */
70 enum class EInitialization {
71  kGauss = 'G',
72  kUniform = 'U',
73  kIdentity = 'I',
74  kZero = 'Z',
75  kGlorotNormal = 'X',
76  kGlorotUniform = 'F',
77 };
78 
79 //______________________________________________________________________________
80 //
81 // Activation Functions
82 //______________________________________________________________________________
83 
84 /*! Apply the given activation function to each value in the given
85 * matrix A. */
86 template<typename Architecture_t>
87 inline void evaluate(typename Architecture_t::Matrix_t &A,
89 {
90  switch(f)
91  {
92  case EActivationFunction::kIdentity : break;
93  case EActivationFunction::kRelu : Architecture_t::Relu(A);
94  break;
96  break;
98  break;
99  case EActivationFunction::kSymmRelu : Architecture_t::SymmetricRelu(A);
100  break;
102  break;
104  break;
105  }
106 }
107 
108 
109 /*! Compute the first partial derivative of the activation function for
110 * the values given in matrix A and write the results into B. */
111 //______________________________________________________________________________
112 template<typename Architecture_t>
113 inline void evaluateDerivative(typename Architecture_t::Matrix_t & B,
115  const typename Architecture_t::Matrix_t & A)
116 {
117  switch(f)
118  {
119  case EActivationFunction::kIdentity : Architecture_t::IdentityDerivative(B, A);
120  break;
121  case EActivationFunction::kRelu : Architecture_t::ReluDerivative(B, A);
122  break;
123  case EActivationFunction::kSigmoid : Architecture_t::SigmoidDerivative(B, A);
124  break;
125  case EActivationFunction::kTanh : Architecture_t::TanhDerivative(B, A);
126  break;
127  case EActivationFunction::kSymmRelu : Architecture_t::SymmetricReluDerivative(B, A);
128  break;
129  case EActivationFunction::kSoftSign : Architecture_t::SoftSignDerivative(B, A);
130  break;
131  case EActivationFunction::kGauss : Architecture_t::GaussDerivative(B, A);
132  break;
133  }
134 }
135 
136 //______________________________________________________________________________
137 //
138 // Output Functions
139 //______________________________________________________________________________
140 
141 /*! Apply the given output function to each value in the given
142 * matrix A. */
143 template<typename Architecture_t>
144 inline void evaluate(typename Architecture_t::Matrix_t &A,
146  const typename Architecture_t::Matrix_t &X)
147 {
148  switch(f)
149  {
151  break;
153  break;
154  case EOutputFunction::kSoftmax : Architecture_t::Softmax(A, X);
155  break;
156  }
157 }
158 
159 //______________________________________________________________________________
160 //
161 // Loss Functions
162 //______________________________________________________________________________
163 
164 /*! Compute the value of the objective function f for given activations
165 * of the ouput layer and the truth Y. */
166 template <typename Architecture_t>
167 inline auto evaluate(ELossFunction f, const typename Architecture_t::Matrix_t &Y,
168  const typename Architecture_t::Matrix_t &output, const typename Architecture_t::Matrix_t &weights)
169  -> decltype(Architecture_t::CrossEntropy(Y, output, weights))
170 {
171  switch(f)
172  {
173  case ELossFunction::kCrossEntropy: return Architecture_t::CrossEntropy(Y, output, weights);
174  case ELossFunction::kMeanSquaredError: return Architecture_t::MeanSquaredError(Y, output, weights);
175  case ELossFunction::kSoftmaxCrossEntropy: return Architecture_t::SoftmaxCrossEntropy(Y, output, weights);
176  }
177  return 0.0;
178 }
179 
180 /*! Compute the gradient of the given output function f for given activations
181 * output of the output layer and truth Y and write the results into dY. */
182 //______________________________________________________________________________
183 template <typename Architecture_t>
184 inline void evaluateGradients(typename Architecture_t::Matrix_t &dY, ELossFunction f,
185  const typename Architecture_t::Matrix_t &Y,
186  const typename Architecture_t::Matrix_t &output,
187  const typename Architecture_t::Matrix_t &weights)
188 {
189  switch(f)
190  {
191  case ELossFunction::kCrossEntropy: Architecture_t::CrossEntropyGradients(dY, Y, output, weights); break;
192  case ELossFunction::kMeanSquaredError: Architecture_t::MeanSquaredErrorGradients(dY, Y, output, weights); break;
194  Architecture_t::SoftmaxCrossEntropyGradients(dY, Y, output, weights);
195  break;
196  }
197 }
198 
199 
200 //______________________________________________________________________________
201 //
202 // Regularization
203 //______________________________________________________________________________
204 
205 /*! Evaluate the regularization functional for a given weight matrix. */
206 template<typename Architecture_t>
207 inline auto regularization(const typename Architecture_t::Matrix_t &A,
209 -> decltype(Architecture_t::L1Regularization(A))
210 {
211  switch(R)
212  {
214  return 0.0;
215  case ERegularization::kL1 :
216  return Architecture_t::L1Regularization(A);
217  case ERegularization::kL2 :
218  return Architecture_t::L2Regularization(A);
219  }
220  return 0.0;
221 }
222 
223 /*! Add the regularization gradient corresponding to weight matrix W, to
224 * the matrix A. */
225 //______________________________________________________________________________
226 template<typename Architecture_t>
227 inline void addRegularizationGradients(typename Architecture_t::Matrix_t &A,
228  const typename Architecture_t::Matrix_t &W,
229  typename Architecture_t::Scalar_t weightDecay,
231 {
232  switch(R)
233  {
235  break;
236  case ERegularization::kL1 :
237  Architecture_t::AddL1RegularizationGradients(A, W, weightDecay);
238  break;
239  case ERegularization::kL2 :
240  Architecture_t::AddL2RegularizationGradients(A, W, weightDecay);
241  break;
242  }
243 }
244 
245 //______________________________________________________________________________
246 //
247 // Initialization
248 //______________________________________________________________________________
249 
250 template<typename Architecture_t>
251 inline void initialize(typename Architecture_t::Matrix_t & A,
253 {
254  switch(m) {
255  case EInitialization::kGauss : Architecture_t::InitializeGauss(A);
256  break;
257  case EInitialization::kUniform : Architecture_t::InitializeUniform(A);
258  break;
259  case EInitialization::kIdentity : Architecture_t::InitializeIdentity(A);
260  break;
261  case EInitialization::kZero : Architecture_t::InitializeZero(A);
262  break;
263  case EInitialization::kGlorotNormal : Architecture_t::InitializeGlorotNormal(A);
264  break;
265  case EInitialization::kGlorotUniform : Architecture_t::InitializeGlorotUniform(A);
266  break;
267  }
268 }
269 
270 } // namespace DNN
271 } // namespace TMVA
272 
273 #endif
static double B[]
void evaluateDerivative(typename Architecture_t::Matrix_t &B, EActivationFunction f, const typename Architecture_t::Matrix_t &A)
Compute the first partial derivative of the activation function for the values given in matrix A and ...
Definition: Functions.h:113
auto * m
Definition: textangle.C:8
#define f(i)
Definition: RSha256.hxx:104
static double A[]
#define R(a, b, c, d, e, f, g, h, i)
Definition: RSha256.hxx:110
void evaluate(typename Architecture_t::Matrix_t &A, EActivationFunction f)
Apply the given activation function to each value in the given matrix A.
Definition: Functions.h:87
std::shared_ptr< std::function< double(double)> > SoftSign
Definition: NeuralNet.cxx:32
EInitialization
Definition: Functions.h:70
double weightDecay(double error, ItWeight itWeight, ItWeight itWeightEnd, double factorWeightDecay, EnumRegularization eRegularization)
compute the weight decay for regularization (L1 or L2)
Definition: NeuralNet.icc:496
void evaluateGradients(typename Architecture_t::Matrix_t &dY, ELossFunction f, const typename Architecture_t::Matrix_t &Y, const typename Architecture_t::Matrix_t &output, const typename Architecture_t::Matrix_t &weights)
Compute the gradient of the given output function f for given activations output of the output layer ...
Definition: Functions.h:184
auto regularization(const typename Architecture_t::Matrix_t &A, ERegularization R) -> decltype(Architecture_t::L1Regularization(A))
Evaluate the regularization functional for a given weight matrix.
Definition: Functions.h:207
std::shared_ptr< std::function< double(double)> > Tanh
Definition: NeuralNet.cxx:29
void Copy(void *source, void *dest)
void addRegularizationGradients(typename Architecture_t::Matrix_t &A, const typename Architecture_t::Matrix_t &W, typename Architecture_t::Scalar_t weightDecay, ERegularization R)
Add the regularization gradient corresponding to weight matrix W, to the matrix A.
Definition: Functions.h:227
EOutputFunction
Enum that represents output functions.
Definition: Functions.h:43
ELossFunction
Enum that represents objective functions for the net, i.e.
Definition: Functions.h:54
Abstract ClassifierFactory template that handles arbitrary types.
std::shared_ptr< std::function< double(double)> > Sigmoid
Definition: NeuralNet.cxx:26
ERegularization
Enum representing the regularization type applied for a given layer.
Definition: Functions.h:62
EActivationFunction
Enum that represents layer activation functions.
Definition: Functions.h:31
std::shared_ptr< std::function< double(double)> > Gauss
Definition: NeuralNet.cxx:12
void initialize(typename Architecture_t::Matrix_t &A, EInitialization m)
Definition: Functions.h:251