Logo ROOT  
Reference Guide
Functions.h
Go to the documentation of this file.
1// @(#)root/tmva/tmva/dnn:$Id$
2// Author: Simon Pfreundschuh 20/06/16
3
4/*************************************************************************
5 * Copyright (C) 2016, Simon Pfreundschuh *
6 * All rights reserved. *
7 * *
8 * For the licensing terms see $ROOTSYS/LICENSE. *
9 * For the list of contributors see $ROOTSYS/README/CREDITS. *
10 *************************************************************************/
11
12/////////////////////////////////////////////////////////////////////
13// Contains function enums for activation and output functions, as //
14// well as generic evaluation functions, that delegate the call to //
15// the corresponding evaluation kernel. //
16/////////////////////////////////////////////////////////////////////
17
18#ifndef TMVA_DNN_FUNCTIONS
19#define TMVA_DNN_FUNCTIONS
20
21namespace TMVA
22{
23namespace DNN
24{
25//______________________________________________________________________________
26//
27// Enum Definitions
28//______________________________________________________________________________
29
30/*! Enum that represents layer activation functions. */
32{
33 kIdentity = 0,
34 kRelu = 1,
35 kSigmoid = 2,
36 kTanh = 3,
37 kSymmRelu = 4,
38 kSoftSign = 5,
39 kGauss = 6
40};
41
42/*! Enum that represents output functions */
44{
45 kIdentity = 'I',
46 kSigmoid = 'S',
47 kSoftmax = 'M'
48};
49
50/*! Enum that represents objective functions for the net, i.e. functions
51* that take the output from the last layer in the net together with the
52* truths and return the objective function values that is to be minimized
53* in the training process. */
54enum class ELossFunction
55{
56 kCrossEntropy = 'C',
59};
60
61/*! Enum representing the regularization type applied for a given layer */
63{
64 kNone = '0',
65 kL1 = '1',
66 kL2 = '2'
67 };
68
69/* Enum represnting the initialization method used for this layer. */
70enum class EInitialization {
71 kGauss = 'G',
72 kUniform = 'U',
73 kIdentity = 'I',
74 kZero = 'Z',
75 kGlorotNormal = 'X',
76 kGlorotUniform = 'F',
77};
78
79/// Enum representing the optimizer used for training.
80enum class EOptimizer {
81 kSGD = 0,
82 kAdam = 1,
83 kAdagrad = 2,
84 kRMSProp = 3,
85 kAdadelta = 4,
86};
87
88//______________________________________________________________________________
89//
90// Activation Functions
91//______________________________________________________________________________
92
93/*! Apply the given activation function to each value in the given
94* tensor A. */
95template<typename Architecture_t>
96inline void evaluate(typename Architecture_t::Tensor_t &A,
98{
99 switch(f)
100 {
102 case EActivationFunction::kRelu : Architecture_t::Relu(A);
103 break;
105 break;
107 break;
108 case EActivationFunction::kSymmRelu : Architecture_t::SymmetricRelu(A);
109 break;
111 break;
113 break;
114 }
115}
116
117/*! Compute the first partial derivative of the activation function for
118* the values given in tensor A and write the results into B. */
119//______________________________________________________________________________
120template<typename Architecture_t>
121inline void evaluateDerivative(typename Architecture_t::Tensor_t & B,
123 const typename Architecture_t::Tensor_t & A)
124{
125 switch(f)
126 {
127 case EActivationFunction::kIdentity : Architecture_t::IdentityDerivative(B, A);
128 break;
129 case EActivationFunction::kRelu : Architecture_t::ReluDerivative(B, A);
130 break;
131 case EActivationFunction::kSigmoid : Architecture_t::SigmoidDerivative(B, A);
132 break;
133 case EActivationFunction::kTanh : Architecture_t::TanhDerivative(B, A);
134 break;
135 case EActivationFunction::kSymmRelu : Architecture_t::SymmetricReluDerivative(B, A);
136 break;
137 case EActivationFunction::kSoftSign : Architecture_t::SoftSignDerivative(B, A);
138 break;
139 case EActivationFunction::kGauss : Architecture_t::GaussDerivative(B, A);
140 break;
141 }
142}
143//______________________________________________________________________________
144//
145// Output Functions
146//______________________________________________________________________________
147
148/*! Apply the given output function to each value in the given
149* tensor A. */
150template<typename Architecture_t>
151inline void evaluate(typename Architecture_t::Matrix_t &A,
153 const typename Architecture_t::Matrix_t &X)
154{
155 switch(f)
156 {
158 break;
160 break;
161 case EOutputFunction::kSoftmax : Architecture_t::Softmax(A, X);
162 break;
163 }
164}
165
166//______________________________________________________________________________
167//
168// Loss Functions
169//______________________________________________________________________________
170
171/*! Compute the value of the objective function f for given activations
172* of the ouput layer and the truth Y. */
173template <typename Architecture_t>
174inline auto evaluate(ELossFunction f, const typename Architecture_t::Matrix_t &Y,
175 const typename Architecture_t::Matrix_t &output, const typename Architecture_t::Matrix_t &weights)
176 -> decltype(Architecture_t::CrossEntropy(Y, output, weights))
177{
178 switch(f)
179 {
180 case ELossFunction::kCrossEntropy: return Architecture_t::CrossEntropy(Y, output, weights);
181 case ELossFunction::kMeanSquaredError: return Architecture_t::MeanSquaredError(Y, output, weights);
182 case ELossFunction::kSoftmaxCrossEntropy: return Architecture_t::SoftmaxCrossEntropy(Y, output, weights);
183 }
184 return 0.0;
185}
186
187/*! Compute the gradient of the given output function f for given activations
188* output of the output layer and truth Y and write the results into dY. */
189//______________________________________________________________________________
190template <typename Architecture_t>
191inline void evaluateGradients(typename Architecture_t::Matrix_t &dY, ELossFunction f,
192 const typename Architecture_t::Matrix_t &Y,
193 const typename Architecture_t::Matrix_t &output,
194 const typename Architecture_t::Matrix_t &weights)
195{
196 switch(f)
197 {
198 case ELossFunction::kCrossEntropy: Architecture_t::CrossEntropyGradients(dY, Y, output, weights); break;
199 case ELossFunction::kMeanSquaredError: Architecture_t::MeanSquaredErrorGradients(dY, Y, output, weights); break;
201 Architecture_t::SoftmaxCrossEntropyGradients(dY, Y, output, weights);
202 break;
203 }
204}
205
206
207//______________________________________________________________________________
208//
209// Regularization
210//______________________________________________________________________________
211
212/*! Evaluate the regularization functional for a given weight matrix. */
213template<typename Architecture_t>
214inline auto regularization(const typename Architecture_t::Matrix_t &A,
216-> decltype(Architecture_t::L1Regularization(A))
217{
218 switch(R)
219 {
221 return 0.0;
223 return Architecture_t::L1Regularization(A);
225 return Architecture_t::L2Regularization(A);
226 }
227 return 0.0;
228}
229
230/*! Add the regularization gradient corresponding to weight matrix W, to
231* the matrix A. */
232//______________________________________________________________________________
233template<typename Architecture_t>
234inline void addRegularizationGradients(typename Architecture_t::Matrix_t &A,
235 const typename Architecture_t::Matrix_t &W,
236 typename Architecture_t::Scalar_t weightDecay,
238{
239 switch(R)
240 {
242 break;
244 Architecture_t::AddL1RegularizationGradients(A, W, weightDecay);
245 break;
247 Architecture_t::AddL2RegularizationGradients(A, W, weightDecay);
248 break;
249 }
250}
251
252//______________________________________________________________________________
253//
254// Initialization
255//______________________________________________________________________________
256
257template<typename Architecture_t>
258inline void initialize(typename Architecture_t::Matrix_t & A,
260{
261 switch(m) {
262 case EInitialization::kGauss : Architecture_t::InitializeGauss(A);
263 break;
264 case EInitialization::kUniform : Architecture_t::InitializeUniform(A);
265 break;
266 case EInitialization::kIdentity : Architecture_t::InitializeIdentity(A);
267 break;
268 case EInitialization::kZero : Architecture_t::InitializeZero(A);
269 break;
270 case EInitialization::kGlorotNormal : Architecture_t::InitializeGlorotNormal(A);
271 break;
272 case EInitialization::kGlorotUniform : Architecture_t::InitializeGlorotUniform(A);
273 break;
274 }
275}
276
277} // namespace DNN
278} // namespace TMVA
279
280#endif
#define f(i)
Definition: RSha256.hxx:104
#define R(a, b, c, d, e, f, g, h, i)
Definition: RSha256.hxx:110
static double B[]
static double A[]
void Copy(void *source, void *dest)
EInitialization
Definition: Functions.h:70
std::shared_ptr< std::function< double(double)> > Tanh
Definition: NeuralNet.cxx:29
EOptimizer
Enum representing the optimizer used for training.
Definition: Functions.h:80
void addRegularizationGradients(typename Architecture_t::Matrix_t &A, const typename Architecture_t::Matrix_t &W, typename Architecture_t::Scalar_t weightDecay, ERegularization R)
Add the regularization gradient corresponding to weight matrix W, to the matrix A.
Definition: Functions.h:234
EOutputFunction
Enum that represents output functions.
Definition: Functions.h:44
double weightDecay(double error, ItWeight itWeight, ItWeight itWeightEnd, double factorWeightDecay, EnumRegularization eRegularization)
compute the weight decay for regularization (L1 or L2)
Definition: NeuralNet.icc:498
void evaluate(typename Architecture_t::Tensor_t &A, EActivationFunction f)
Apply the given activation function to each value in the given tensor A.
Definition: Functions.h:96
auto regularization(const typename Architecture_t::Matrix_t &A, ERegularization R) -> decltype(Architecture_t::L1Regularization(A))
Evaluate the regularization functional for a given weight matrix.
Definition: Functions.h:214
ERegularization
Enum representing the regularization type applied for a given layer.
Definition: Functions.h:63
EActivationFunction
Enum that represents layer activation functions.
Definition: Functions.h:32
ELossFunction
Enum that represents objective functions for the net, i.e.
Definition: Functions.h:55
std::shared_ptr< std::function< double(double)> > Gauss
Definition: NeuralNet.cxx:12
std::shared_ptr< std::function< double(double)> > Sigmoid
Definition: NeuralNet.cxx:26
void evaluateGradients(typename Architecture_t::Matrix_t &dY, ELossFunction f, const typename Architecture_t::Matrix_t &Y, const typename Architecture_t::Matrix_t &output, const typename Architecture_t::Matrix_t &weights)
Compute the gradient of the given output function f for given activations output of the output layer ...
Definition: Functions.h:191
std::shared_ptr< std::function< double(double)> > SoftSign
Definition: NeuralNet.cxx:32
void initialize(typename Architecture_t::Matrix_t &A, EInitialization m)
Definition: Functions.h:258
void evaluateDerivative(typename Architecture_t::Tensor_t &B, EActivationFunction f, const typename Architecture_t::Tensor_t &A)
Compute the first partial derivative of the activation function for the values given in tensor A and ...
Definition: Functions.h:121
create variable transformations
auto * m
Definition: textangle.C:8
static void output(int code)
Definition: gifencode.c:226