Logo ROOT  
Reference Guide
 
Loading...
Searching...
No Matches
Functions.h
Go to the documentation of this file.
1// @(#)root/tmva/tmva/dnn:$Id$
2// Author: Simon Pfreundschuh 20/06/16
3
4/*************************************************************************
5 * Copyright (C) 2016, Simon Pfreundschuh *
6 * All rights reserved. *
7 * *
8 * For the licensing terms see $ROOTSYS/LICENSE. *
9 * For the list of contributors see $ROOTSYS/README/CREDITS. *
10 *************************************************************************/
11
12/////////////////////////////////////////////////////////////////////
13// Contains function enums for activation and output functions, as //
14// well as generic evaluation functions, that delegate the call to //
15// the corresponding evaluation kernel. //
16/////////////////////////////////////////////////////////////////////
17
18#ifndef TMVA_DNN_FUNCTIONS
19#define TMVA_DNN_FUNCTIONS
20
21namespace TMVA
22{
23namespace DNN
24{
25//______________________________________________________________________________
26//
27// Enum Definitions
28//______________________________________________________________________________
29
30/*! Enum that represents layer activation functions. */
32{
33 kIdentity = 0,
34 kRelu = 1,
35 kSigmoid = 2,
36 kTanh = 3,
37 kSymmRelu = 4,
38 kSoftSign = 5,
39 kGauss = 6,
40 kFastTanh = 7
41
42};
43
44/*! Enum that represents output functions */
46{
47 kIdentity = 'I',
48 kSigmoid = 'S',
49 kSoftmax = 'M'
50};
51
52/*! Enum that represents objective functions for the net, i.e. functions
53* that take the output from the last layer in the net together with the
54* truths and return the objective function values that is to be minimized
55* in the training process. */
56enum class ELossFunction
57{
58 kCrossEntropy = 'C',
61};
62
63/*! Enum representing the regularization type applied for a given layer */
65{
66 kNone = '0',
67 kL1 = '1',
68 kL2 = '2'
69 };
70
71/* Enum representing the initialization method used for this layer. */
72enum class EInitialization {
73 kGauss = 'G',
74 kUniform = 'U',
75 kIdentity = 'I',
76 kZero = 'Z',
77 kGlorotNormal = 'X',
78 kGlorotUniform = 'F',
79};
80
81/// Enum representing the optimizer used for training.
82enum class EOptimizer {
83 kSGD = 0,
84 kAdam = 1,
85 kAdagrad = 2,
86 kRMSProp = 3,
87 kAdadelta = 4,
88};
89
90//______________________________________________________________________________
91//
92// Activation Functions
93//______________________________________________________________________________
94
95/*! Apply the given activation function to each value in the given
96* tensor A. */
97template<typename Architecture_t>
98inline void evaluate(typename Architecture_t::Tensor_t &A,
100{
101 switch(f)
102 {
104 case EActivationFunction::kRelu : Architecture_t::Relu(A);
105 break;
106 case EActivationFunction::kSigmoid : Architecture_t::Sigmoid(A);
107 break;
108 case EActivationFunction::kTanh : Architecture_t::Tanh(A);
109 break;
110 case EActivationFunction::kSymmRelu : Architecture_t::SymmetricRelu(A);
111 break;
112 case EActivationFunction::kSoftSign : Architecture_t::SoftSign(A);
113 break;
114 case EActivationFunction::kGauss : Architecture_t::Gauss(A);
115 break;
116 case EActivationFunction::kFastTanh : Architecture_t::FastTanh(A);
117 break;
118 }
119}
120
121/*! Compute the first partial derivative of the activation function for
122* the values given in tensor A and write the results into B. */
123//______________________________________________________________________________
124template<typename Architecture_t>
125inline void evaluateDerivative(typename Architecture_t::Tensor_t & B,
127 const typename Architecture_t::Tensor_t & A)
128{
129 switch(f)
130 {
131 case EActivationFunction::kIdentity : Architecture_t::IdentityDerivative(B, A);
132 break;
133 case EActivationFunction::kRelu : Architecture_t::ReluDerivative(B, A);
134 break;
135 case EActivationFunction::kSigmoid : Architecture_t::SigmoidDerivative(B, A);
136 break;
137 case EActivationFunction::kTanh : Architecture_t::TanhDerivative(B, A);
138 break;
139 case EActivationFunction::kSymmRelu : Architecture_t::SymmetricReluDerivative(B, A);
140 break;
141 case EActivationFunction::kSoftSign : Architecture_t::SoftSignDerivative(B, A);
142 break;
143 case EActivationFunction::kGauss : Architecture_t::GaussDerivative(B, A);
144 break;
145 case EActivationFunction::kFastTanh : Architecture_t::FastTanhDerivative(B, A);
146 break;
147 }
148}
149
150// matrix version of the function (for backward comp.)
151template<typename Architecture_t>
152inline void evaluateMatrix( typename Architecture_t::Matrix_t &A,
154{
155 typename Architecture_t::Tensor_t t(A);
156 evaluate<Architecture_t>(t,f);
157}
158
159template<typename Architecture_t>
160inline void evaluateDerivativeMatrix( typename Architecture_t::Matrix_t &B,
162 const typename Architecture_t::Matrix_t & A)
163{
164 typename Architecture_t::Tensor_t t(B);
165 evaluateDerivative<Architecture_t>(t,f, typename Architecture_t::Tensor_t(A));
166}
167//______________________________________________________________________________
168//
169// Output Functions
170//______________________________________________________________________________
171
172/*! Apply the given output function to each value in the given
173* tensor A. */
174template<typename Architecture_t>
175inline void evaluate(typename Architecture_t::Matrix_t &A,
177 const typename Architecture_t::Matrix_t &X)
178{
179 switch(f)
180 {
181 case EOutputFunction::kIdentity : Architecture_t::Copy(A, X);
182 break;
183 case EOutputFunction::kSigmoid : Architecture_t::Sigmoid(A, X);
184 break;
185 case EOutputFunction::kSoftmax : Architecture_t::Softmax(A, X);
186 break;
187 }
188}
189
190//______________________________________________________________________________
191//
192// Loss Functions
193//______________________________________________________________________________
194
195/*! Compute the value of the objective function f for given activations
196* of the ouput layer and the truth Y. */
197template <typename Architecture_t>
198inline auto evaluate(ELossFunction f, const typename Architecture_t::Matrix_t &Y,
199 const typename Architecture_t::Matrix_t &output, const typename Architecture_t::Matrix_t &weights)
200 -> decltype(Architecture_t::CrossEntropy(Y, output, weights))
201{
202 switch(f)
203 {
204 case ELossFunction::kCrossEntropy: return Architecture_t::CrossEntropy(Y, output, weights);
205 case ELossFunction::kMeanSquaredError: return Architecture_t::MeanSquaredError(Y, output, weights);
206 case ELossFunction::kSoftmaxCrossEntropy: return Architecture_t::SoftmaxCrossEntropy(Y, output, weights);
207 }
208 return 0.0;
209}
210
211/*! Compute the gradient of the given output function f for given activations
212* output of the output layer and truth Y and write the results into dY. */
213//______________________________________________________________________________
214template <typename Architecture_t>
215inline void evaluateGradients(typename Architecture_t::Matrix_t &dY, ELossFunction f,
216 const typename Architecture_t::Matrix_t &Y,
217 const typename Architecture_t::Matrix_t &output,
218 const typename Architecture_t::Matrix_t &weights)
219{
220 switch(f)
221 {
222 case ELossFunction::kCrossEntropy: Architecture_t::CrossEntropyGradients(dY, Y, output, weights); break;
223 case ELossFunction::kMeanSquaredError: Architecture_t::MeanSquaredErrorGradients(dY, Y, output, weights); break;
225 Architecture_t::SoftmaxCrossEntropyGradients(dY, Y, output, weights);
226 break;
227 }
228}
229
230
231//______________________________________________________________________________
232//
233// Regularization
234//______________________________________________________________________________
235
236/*! Evaluate the regularization functional for a given weight matrix. */
237template<typename Architecture_t>
238inline auto regularization(const typename Architecture_t::Matrix_t &A,
240-> decltype(Architecture_t::L1Regularization(A))
241{
242 switch(R)
243 {
245 return 0.0;
247 return Architecture_t::L1Regularization(A);
249 return Architecture_t::L2Regularization(A);
250 }
251 return 0.0;
252}
253
254/*! Add the regularization gradient corresponding to weight matrix W, to
255* the matrix A. */
256//______________________________________________________________________________
257template<typename Architecture_t>
258inline void addRegularizationGradients(typename Architecture_t::Matrix_t &A,
259 const typename Architecture_t::Matrix_t &W,
260 typename Architecture_t::Scalar_t weightDecay,
262{
263 switch(R)
264 {
266 break;
268 Architecture_t::AddL1RegularizationGradients(A, W, weightDecay);
269 break;
271 Architecture_t::AddL2RegularizationGradients(A, W, weightDecay);
272 break;
273 }
274}
275
276//______________________________________________________________________________
277//
278// Initialization
279//______________________________________________________________________________
280
281template<typename Architecture_t>
282inline void initialize(typename Architecture_t::Matrix_t & A,
284{
285 switch(m) {
286 case EInitialization::kGauss : Architecture_t::InitializeGauss(A);
287 break;
288 case EInitialization::kUniform : Architecture_t::InitializeUniform(A);
289 break;
290 case EInitialization::kIdentity : Architecture_t::InitializeIdentity(A);
291 break;
292 case EInitialization::kZero : Architecture_t::InitializeZero(A);
293 break;
294 case EInitialization::kGlorotNormal : Architecture_t::InitializeGlorotNormal(A);
295 break;
296 case EInitialization::kGlorotUniform : Architecture_t::InitializeGlorotUniform(A);
297 break;
298 }
299}
300
301} // namespace DNN
302} // namespace TMVA
303
304#endif
#define f(i)
Definition RSha256.hxx:104
bool initialize()
double evaluate() const override
#define X(type, name)
void evaluateDerivativeMatrix(typename Architecture_t::Matrix_t &B, EActivationFunction f, const typename Architecture_t::Matrix_t &A)
Definition Functions.h:160
EOptimizer
Enum representing the optimizer used for training.
Definition Functions.h:82
void evaluateMatrix(typename Architecture_t::Matrix_t &A, EActivationFunction f)
Definition Functions.h:152
void addRegularizationGradients(typename Architecture_t::Matrix_t &A, const typename Architecture_t::Matrix_t &W, typename Architecture_t::Scalar_t weightDecay, ERegularization R)
Add the regularization gradient corresponding to weight matrix W, to the matrix A.
Definition Functions.h:258
EOutputFunction
Enum that represents output functions.
Definition Functions.h:46
double weightDecay(double error, ItWeight itWeight, ItWeight itWeightEnd, double factorWeightDecay, EnumRegularization eRegularization)
compute the weight decay for regularization (L1 or L2)
auto regularization(const typename Architecture_t::Matrix_t &A, ERegularization R) -> decltype(Architecture_t::L1Regularization(A))
Evaluate the regularization functional for a given weight matrix.
Definition Functions.h:238
ERegularization
Enum representing the regularization type applied for a given layer.
Definition Functions.h:65
EActivationFunction
Enum that represents layer activation functions.
Definition Functions.h:32
ELossFunction
Enum that represents objective functions for the net, i.e.
Definition Functions.h:57
void evaluateGradients(typename Architecture_t::Matrix_t &dY, ELossFunction f, const typename Architecture_t::Matrix_t &Y, const typename Architecture_t::Matrix_t &output, const typename Architecture_t::Matrix_t &weights)
Compute the gradient of the given output function f for given activations output of the output layer ...
Definition Functions.h:215
void evaluateDerivative(typename Architecture_t::Tensor_t &B, EActivationFunction f, const typename Architecture_t::Tensor_t &A)
Compute the first partial derivative of the activation function for the values given in tensor A and ...
Definition Functions.h:125
create variable transformations
TMarker m
Definition textangle.C:8
static void output()