Logo ROOT   6.18/05
Reference Guide
Functions.h
Go to the documentation of this file.
1// @(#)root/tmva/tmva/dnn:$Id$
2// Author: Simon Pfreundschuh 20/06/16
3
4/*************************************************************************
5 * Copyright (C) 2016, Simon Pfreundschuh *
6 * All rights reserved. *
7 * *
8 * For the licensing terms see $ROOTSYS/LICENSE. *
9 * For the list of contributors see $ROOTSYS/README/CREDITS. *
10 *************************************************************************/
11
12/////////////////////////////////////////////////////////////////////
13// Contains function enums for activation and output functions, as //
14// well as generic evaluation functions, that delegate the call to //
15// the corresponding evaluation kernel. //
16/////////////////////////////////////////////////////////////////////
17
18#ifndef TMVA_DNN_FUNCTIONS
19#define TMVA_DNN_FUNCTIONS
20
21namespace TMVA
22{
23namespace DNN
24{
25//______________________________________________________________________________
26//
27// Enum Definitions
28//______________________________________________________________________________
29
30/*! Enum that represents layer activation functions. */
32{
33 kIdentity = 0,
34 kRelu = 1,
35 kSigmoid = 2,
36 kTanh = 3,
37 kSymmRelu = 4,
38 kSoftSign = 5,
39 kGauss = 6
40};
41
42/*! Enum that represents output functions */
44{
45 kIdentity = 'I',
46 kSigmoid = 'S',
47 kSoftmax = 'M'
48};
49
50/*! Enum that represents objective functions for the net, i.e. functions
51* that take the output from the last layer in the net together with the
52* truths and return the objective function values that is to be minimized
53* in the training process. */
54enum class ELossFunction
55{
56 kCrossEntropy = 'C',
59};
60
61/*! Enum representing the regularization type applied for a given layer */
63{
64 kNone = '0',
65 kL1 = '1',
66 kL2 = '2'
67 };
68
69/* Enum represnting the initialization method used for this layer. */
70enum class EInitialization {
71 kGauss = 'G',
72 kUniform = 'U',
73 kIdentity = 'I',
74 kZero = 'Z',
75 kGlorotNormal = 'X',
76 kGlorotUniform = 'F',
77};
78
79/// Enum representing the optimizer used for training.
80enum class EOptimizer {
81 kSGD = 0,
82 kAdam = 1,
83 kAdagrad = 2,
84 kRMSProp = 3,
85 kAdadelta = 4,
86};
87
88//______________________________________________________________________________
89//
90// Activation Functions
91//______________________________________________________________________________
92
93/*! Apply the given activation function to each value in the given
94* matrix A. */
95template<typename Architecture_t>
96inline void evaluate(typename Architecture_t::Matrix_t &A,
98{
99 switch(f)
100 {
102 case EActivationFunction::kRelu : Architecture_t::Relu(A);
103 break;
105 break;
107 break;
108 case EActivationFunction::kSymmRelu : Architecture_t::SymmetricRelu(A);
109 break;
111 break;
113 break;
114 }
115}
116
117
118/*! Compute the first partial derivative of the activation function for
119* the values given in matrix A and write the results into B. */
120//______________________________________________________________________________
121template<typename Architecture_t>
122inline void evaluateDerivative(typename Architecture_t::Matrix_t & B,
124 const typename Architecture_t::Matrix_t & A)
125{
126 switch(f)
127 {
128 case EActivationFunction::kIdentity : Architecture_t::IdentityDerivative(B, A);
129 break;
130 case EActivationFunction::kRelu : Architecture_t::ReluDerivative(B, A);
131 break;
132 case EActivationFunction::kSigmoid : Architecture_t::SigmoidDerivative(B, A);
133 break;
134 case EActivationFunction::kTanh : Architecture_t::TanhDerivative(B, A);
135 break;
136 case EActivationFunction::kSymmRelu : Architecture_t::SymmetricReluDerivative(B, A);
137 break;
138 case EActivationFunction::kSoftSign : Architecture_t::SoftSignDerivative(B, A);
139 break;
140 case EActivationFunction::kGauss : Architecture_t::GaussDerivative(B, A);
141 break;
142 }
143}
144
145//______________________________________________________________________________
146//
147// Output Functions
148//______________________________________________________________________________
149
150/*! Apply the given output function to each value in the given
151* matrix A. */
152template<typename Architecture_t>
153inline void evaluate(typename Architecture_t::Matrix_t &A,
155 const typename Architecture_t::Matrix_t &X)
156{
157 switch(f)
158 {
160 break;
162 break;
163 case EOutputFunction::kSoftmax : Architecture_t::Softmax(A, X);
164 break;
165 }
166}
167
168//______________________________________________________________________________
169//
170// Loss Functions
171//______________________________________________________________________________
172
173/*! Compute the value of the objective function f for given activations
174* of the ouput layer and the truth Y. */
175template <typename Architecture_t>
176inline auto evaluate(ELossFunction f, const typename Architecture_t::Matrix_t &Y,
177 const typename Architecture_t::Matrix_t &output, const typename Architecture_t::Matrix_t &weights)
178 -> decltype(Architecture_t::CrossEntropy(Y, output, weights))
179{
180 switch(f)
181 {
182 case ELossFunction::kCrossEntropy: return Architecture_t::CrossEntropy(Y, output, weights);
183 case ELossFunction::kMeanSquaredError: return Architecture_t::MeanSquaredError(Y, output, weights);
184 case ELossFunction::kSoftmaxCrossEntropy: return Architecture_t::SoftmaxCrossEntropy(Y, output, weights);
185 }
186 return 0.0;
187}
188
189/*! Compute the gradient of the given output function f for given activations
190* output of the output layer and truth Y and write the results into dY. */
191//______________________________________________________________________________
192template <typename Architecture_t>
193inline void evaluateGradients(typename Architecture_t::Matrix_t &dY, ELossFunction f,
194 const typename Architecture_t::Matrix_t &Y,
195 const typename Architecture_t::Matrix_t &output,
196 const typename Architecture_t::Matrix_t &weights)
197{
198 switch(f)
199 {
200 case ELossFunction::kCrossEntropy: Architecture_t::CrossEntropyGradients(dY, Y, output, weights); break;
201 case ELossFunction::kMeanSquaredError: Architecture_t::MeanSquaredErrorGradients(dY, Y, output, weights); break;
203 Architecture_t::SoftmaxCrossEntropyGradients(dY, Y, output, weights);
204 break;
205 }
206}
207
208
209//______________________________________________________________________________
210//
211// Regularization
212//______________________________________________________________________________
213
214/*! Evaluate the regularization functional for a given weight matrix. */
215template<typename Architecture_t>
216inline auto regularization(const typename Architecture_t::Matrix_t &A,
218-> decltype(Architecture_t::L1Regularization(A))
219{
220 switch(R)
221 {
223 return 0.0;
225 return Architecture_t::L1Regularization(A);
227 return Architecture_t::L2Regularization(A);
228 }
229 return 0.0;
230}
231
232/*! Add the regularization gradient corresponding to weight matrix W, to
233* the matrix A. */
234//______________________________________________________________________________
235template<typename Architecture_t>
236inline void addRegularizationGradients(typename Architecture_t::Matrix_t &A,
237 const typename Architecture_t::Matrix_t &W,
238 typename Architecture_t::Scalar_t weightDecay,
240{
241 switch(R)
242 {
244 break;
246 Architecture_t::AddL1RegularizationGradients(A, W, weightDecay);
247 break;
249 Architecture_t::AddL2RegularizationGradients(A, W, weightDecay);
250 break;
251 }
252}
253
254//______________________________________________________________________________
255//
256// Initialization
257//______________________________________________________________________________
258
259template<typename Architecture_t>
260inline void initialize(typename Architecture_t::Matrix_t & A,
262{
263 switch(m) {
264 case EInitialization::kGauss : Architecture_t::InitializeGauss(A);
265 break;
266 case EInitialization::kUniform : Architecture_t::InitializeUniform(A);
267 break;
268 case EInitialization::kIdentity : Architecture_t::InitializeIdentity(A);
269 break;
270 case EInitialization::kZero : Architecture_t::InitializeZero(A);
271 break;
272 case EInitialization::kGlorotNormal : Architecture_t::InitializeGlorotNormal(A);
273 break;
274 case EInitialization::kGlorotUniform : Architecture_t::InitializeGlorotUniform(A);
275 break;
276 }
277}
278
279} // namespace DNN
280} // namespace TMVA
281
282#endif
#define f(i)
Definition: RSha256.hxx:104
#define R(a, b, c, d, e, f, g, h, i)
Definition: RSha256.hxx:110
static double B[]
static double A[]
void Copy(void *source, void *dest)
EInitialization
Definition: Functions.h:70
std::shared_ptr< std::function< double(double)> > Tanh
Definition: NeuralNet.cxx:29
EOptimizer
Enum representing the optimizer used for training.
Definition: Functions.h:80
void addRegularizationGradients(typename Architecture_t::Matrix_t &A, const typename Architecture_t::Matrix_t &W, typename Architecture_t::Scalar_t weightDecay, ERegularization R)
Add the regularization gradient corresponding to weight matrix W, to the matrix A.
Definition: Functions.h:236
EOutputFunction
Enum that represents output functions.
Definition: Functions.h:44
double weightDecay(double error, ItWeight itWeight, ItWeight itWeightEnd, double factorWeightDecay, EnumRegularization eRegularization)
compute the weight decay for regularization (L1 or L2)
Definition: NeuralNet.icc:496
void evaluate(typename Architecture_t::Matrix_t &A, EActivationFunction f)
Apply the given activation function to each value in the given matrix A.
Definition: Functions.h:96
auto regularization(const typename Architecture_t::Matrix_t &A, ERegularization R) -> decltype(Architecture_t::L1Regularization(A))
Evaluate the regularization functional for a given weight matrix.
Definition: Functions.h:216
ERegularization
Enum representing the regularization type applied for a given layer.
Definition: Functions.h:63
EActivationFunction
Enum that represents layer activation functions.
Definition: Functions.h:32
ELossFunction
Enum that represents objective functions for the net, i.e.
Definition: Functions.h:55
std::shared_ptr< std::function< double(double)> > Gauss
Definition: NeuralNet.cxx:12
std::shared_ptr< std::function< double(double)> > Sigmoid
Definition: NeuralNet.cxx:26
void evaluateGradients(typename Architecture_t::Matrix_t &dY, ELossFunction f, const typename Architecture_t::Matrix_t &Y, const typename Architecture_t::Matrix_t &output, const typename Architecture_t::Matrix_t &weights)
Compute the gradient of the given output function f for given activations output of the output layer ...
Definition: Functions.h:193
void evaluateDerivative(typename Architecture_t::Matrix_t &B, EActivationFunction f, const typename Architecture_t::Matrix_t &A)
Compute the first partial derivative of the activation function for the values given in matrix A and ...
Definition: Functions.h:122
std::shared_ptr< std::function< double(double)> > SoftSign
Definition: NeuralNet.cxx:32
void initialize(typename Architecture_t::Matrix_t &A, EInitialization m)
Definition: Functions.h:260
create variable transformations
auto * m
Definition: textangle.C:8
static void output(int code)
Definition: gifencode.c:226