Logo ROOT  
Reference Guide
Layer.h
Go to the documentation of this file.
1// @(#)root/tmva/tmva/dnn:$Id$
2// Author: Simon Pfreundschuh 20/06/16
3
4/*************************************************************************
5 * Copyright (C) 2016, Simon Pfreundschuh *
6 * All rights reserved. *
7 * *
8 * For the licensing terms see $ROOTSYS/LICENSE. *
9 * For the list of contributors see $ROOTSYS/README/CREDITS. *
10 *************************************************************************/
11
12//////////////////////////////////////////////////////////////////////
13// Contains Layer and SharedLayer classes, that represent layers in //
14// neural networks. //
15//////////////////////////////////////////////////////////////////////
16
17#ifndef TMVA_DNN_LAYER
18#define TMVA_DNN_LAYER
19
20#include <iostream>
21
22#include "TMatrix.h"
23#include "Functions.h"
24
25namespace TMVA
26{
27namespace DNN
28{
29
30//______________________________________________________________________________
31//
32// The Layer Class
33//______________________________________________________________________________
34
35/** \class TLayer
36
37 Generic layer class.
38
39 This generic layer class represents a layer of a neural network with
40 a given width n and activation function f. The activation
41 function of each layer is given by \f$\mathbf{u} =
42 \mathbf{W}\mathbf{x} + \boldsymbol{\theta}\f$.
43
44 In addition to the weight and bias matrices, each layer allocates memory
45 for its activations and the corresponding first partial fDerivatives of
46 the activation function as well as the gradients of the fWeights and fBiases.
47
48 The layer provides member functions for the forward propagation of
49 activations through the given layer.
50*/
51template<typename Architecture_t>
52 class TLayer
53{
54
55public:
56 using Scalar_t = typename Architecture_t::Scalar_t;
57 using Matrix_t = typename Architecture_t::Matrix_t;
58 using Tensor_t = typename Architecture_t::Tensor_t;
59
60
61private:
62
63 size_t fBatchSize; ///< Batch size used for training and evaluation.
64 size_t fInputWidth; ///< Number of neurons of the previous layer.
65 size_t fWidth; ///< Number of neurons of this layer.
66
67 Scalar_t fDropoutProbability; ///< Probability that an input is active.
68
69 Matrix_t fWeights; ///< The fWeights of this layer.
70 Matrix_t fBiases; ///< The bias values of this layer.
71 Matrix_t fOutput; ///< Activations of this layer.
72 Matrix_t fDerivatives; ///< First fDerivatives of the activations of this layer.
73 Matrix_t fWeightGradients; ///< Gradients w.r.t. the weigths of this layer.
74 Matrix_t fBiasGradients; ///< Gradients w.r.t. the bias values of this layer.
75 Matrix_t fActivationGradients; ///< Gradients w.r.t. the activations of this layer.
76
77 EActivationFunction fF; ///< Activation function of the layer.
78
79public:
80
81 TLayer(size_t BatchSize,
82 size_t InputWidth,
83 size_t Width,
85 Scalar_t dropoutProbability);
86 TLayer(const TLayer &);
87
88 /*! Initialize fWeights according to the given initialization
89 * method. */
91 /*! Compute activation of the layer for the given input. The input
92 * must be in matrix form with the different rows corresponding to
93 * different events in the batch. Computes activations as well as
94 * the first partial derivative of the activation function at those
95 * activations. */
96 void inline Forward(Matrix_t & input, bool applyDropout = false);
97 /*! Compute weight, bias and activation gradients. Uses the precomputed
98 * first partial derviatives of the activation function computed during
99 * forward propagation and modifies them. Must only be called directly
100 * a the corresponding call to Forward(...). */
101 void inline Backward(Matrix_t & gradients_backward,
102 const Matrix_t & activations_backward,
105
106 void Print() const;
107
108 size_t GetBatchSize() const {return fBatchSize;}
109 size_t GetInputWidth() const {return fInputWidth;}
110 size_t GetWidth() const {return fWidth;}
112
114
116
118 const Matrix_t & GetOutput() const {return fOutput;}
120 const Matrix_t & GetWeights() const {return fWeights;}
122 const Matrix_t & GetBiases() const {return fBiases;}
126 const Matrix_t & GetBiasGradients() const {return fBiasGradients;}
129
130};
131
132//______________________________________________________________________________
133//
134// The Shared Layer Class
135//______________________________________________________________________________
136
137/** \class TSharedLayer
138
139 Layer class width shared weight and bias layers.
140
141 Like the Layer class only that weight matrices are shared between
142 different instances of the net, which can be used to implement
143 multithreading 'Hogwild' style.
144*/
145
146template<typename Architecture_t>
148{
149
150public:
151
152 using Scalar_t = typename Architecture_t::Scalar_t;
153 using Matrix_t = typename Architecture_t::Matrix_t;
154 using Tensor_t = typename Architecture_t::Tensor_t;
155
156
157private:
158
159 size_t fBatchSize; ///< Batch size used for training and evaluation.
160 size_t fInputWidth; ///< Number of neurons of the previous layer.
161 size_t fWidth; ///< Number of neurons of this layer.
162
163 Scalar_t fDropoutProbability; ///< Probability that an input is active.
164
165 Matrix_t & fWeights; ///< Reference to the weight matrix of this layer.
166 Matrix_t & fBiases; ///< Reference to the bias vectors of this layer.
167 Matrix_t fOutput; ///< Activations of this layer.
168 Matrix_t fDerivatives; ///< First fDerivatives of the activations of this layer.
169 Matrix_t fWeightGradients; ///< Gradients w.r.t. the weigths of this layer.
170 Matrix_t fBiasGradients; ///< Gradients w.r.t. the bias values of this layer.
171 Matrix_t fActivationGradients; ///< Gradients w.r.t. the activations of this layer.
172
173 EActivationFunction fF; ///< Activation function of the layer.
174
175public:
176
178 TLayer<Architecture_t> & layer);
179 TSharedLayer(const TSharedLayer & layer);
180
181 /*! Compute activation of the layer for the given input. The input
182 * must be in matrix form with the different rows corresponding to
183 * different events in the batch. Computes activations as well as
184 * the first partial derivative of the activation function at those
185 * activations. */
186 void inline Forward(Matrix_t & input, bool applyDropout = false);
187 /*! Compute weight, bias and activation gradients. Uses the precomputed
188 * first partial derviatives of the activation function computed during
189 * forward propagation and modifies them. Must only be called directly
190 * a the corresponding call to Forward(...). */
191 void inline Backward(Matrix_t & gradients_backward,
192 const Matrix_t & activations_backward,
195
196 void Print() const;
197
198 size_t GetBatchSize() const {return fBatchSize;}
199 size_t GetInputWidth() const {return fInputWidth;}
200 size_t GetWidth() const {return fWidth;}
202
204
206
208 const Matrix_t & GetOutput() const {return fOutput;}
209 Matrix_t & GetWeights() const {return fWeights;}
211 const Matrix_t & GetBiases() const {return fBiases;}
215 const Matrix_t & GetBiasGradients() const {return fBiasGradients;}
218
219};
220
221//______________________________________________________________________________
222//
223// The Layer Class - Implementation
224//______________________________________________________________________________
225
226template<typename Architecture_t>
228 size_t inputWidth,
229 size_t width,
231 Scalar_t dropoutProbability)
232 : fBatchSize(batchSize), fInputWidth(inputWidth), fWidth(width),
233 fDropoutProbability(dropoutProbability), fWeights(width, fInputWidth),
234 fBiases(width, 1), fOutput(fBatchSize, width), fDerivatives(fBatchSize, width),
235 fWeightGradients(width, fInputWidth), fBiasGradients(width, 1),
236 fActivationGradients(fBatchSize, width), fF(f)
237{
238 // Nothing to do here.
239}
240
241//______________________________________________________________________________
242template<typename Architecture_t>
244 : fBatchSize(layer.fBatchSize), fInputWidth(layer.fInputWidth),
245 fWidth(layer.fWidth), fDropoutProbability(layer.fDropoutProbability),
246 fWeights(layer.fWidth, layer.fInputWidth), fBiases(layer.fWidth, 1),
247 fOutput(layer.fBatchSize, layer.fWidth),
248 fDerivatives(layer.fBatchSize, layer.fWidth),
249 fWeightGradients(layer.fWidth, layer.fInputWidth),
250 fBiasGradients(layer.fWidth, 1),
251 fActivationGradients(layer.fBatchSize, layer.fWidth),
252 fF(layer.fF)
253{
256}
257
258//______________________________________________________________________________
259template<typename Architecture_t>
261-> void
262{
263 initialize<Architecture_t>(fWeights, m);
264 initialize<Architecture_t>(fBiases, EInitialization::kZero);
265}
266
267//______________________________________________________________________________
268template<typename Architecture_t>
270 bool applyDropout)
271-> void
272{
273 if (applyDropout && (fDropoutProbability != 1.0)) {
274 Architecture_t::DropoutForward(input, fDropoutProbability);
275 }
276 Architecture_t::MultiplyTranspose(fOutput, input, fWeights);
277 Architecture_t::AddRowWise(fOutput, fBiases);
278 Tensor_t tOutput(fOutput);
279 Tensor_t tDerivatives(fDerivatives);
280 evaluateDerivative<Architecture_t>(tDerivatives, fF, tOutput);
281
282 evaluate<Architecture_t>(tOutput, fF);
283}
284
285//______________________________________________________________________________
286template<typename Architecture_t>
288 const Matrix_t & activations_backward,
291-> void
292{
293
294 Tensor_t tGradBw(gradients_backward);
295 Tensor_t tActBw(activations_backward);
296 Tensor_t tActGrad(fActivationGradients);
297 Tensor_t tDeriv(fDerivatives);
298
299 Architecture_t::Hadamard( tDeriv, tActGrad);
300 Architecture_t::Backward( tGradBw,
301 fWeightGradients,
302 fBiasGradients,
303 tDeriv,
304 tActGrad,
305 fWeights,
306 tActBw);
307 addRegularizationGradients<Architecture_t>(fWeightGradients,
308 fWeights,
309 weightDecay, r);
310}
311
312//______________________________________________________________________________
313template<typename Architecture_t>
315{
316 std::cout << "Width = " << fWeights.GetNrows();
317 std::cout << ", Activation Function = ";
318 std::cout << static_cast<int>(fF) << std::endl;
319}
320
321//______________________________________________________________________________
322//
323// The Shared Layer Class - Implementation
324//______________________________________________________________________________
325
326//______________________________________________________________________________
327template<typename Architecture_t>
330: fBatchSize(BatchSize),
331fInputWidth(layer.GetInputWidth()), fWidth(layer.GetWidth()),
332fDropoutProbability(layer.GetDropoutProbability()),
333fWeights(layer.GetWeights()), fBiases(layer.GetBiases()),
334fOutput(fBatchSize, fWidth), fDerivatives(fBatchSize, fWidth),
335fWeightGradients(fWidth, fInputWidth), fBiasGradients(fWidth, 1),
336fActivationGradients(fBatchSize, fWidth), fF(layer.GetActivationFunction())
337{
338 // Nothing to do here.
339}
340
341//______________________________________________________________________________
342template<typename Architecture_t>
344 : fBatchSize(layer.fBatchSize),
345 fInputWidth(layer.GetInputWidth()), fWidth(layer.GetWidth()),
346 fDropoutProbability(layer.fDropoutProbability), fWeights(layer.fWeights),
347 fBiases(layer.fBiases), fOutput(layer.fBatchSize, fWidth),
348 fDerivatives(layer.fBatchSize, fWidth), fWeightGradients(fWidth, fInputWidth),
349 fBiasGradients(fWidth, 1), fActivationGradients(layer.fBatchSize, fWidth),
350 fF(layer.fF)
351{
352}
353
354//______________________________________________________________________________
355template<typename Architecture_t>
357 bool applyDropout)
358-> void
359{
360 if (applyDropout && (fDropoutProbability != 1.0)) {
361 Architecture_t::DropoutForward(input, fDropoutProbability);
362 }
363 Architecture_t::MultiplyTranspose(fOutput, input, fWeights);
364 Architecture_t::AddRowWise(fOutput, fBiases);
365 Tensor_t tOutput(fOutput);
366 Tensor_t tDerivatives(fDerivatives);
367 evaluateDerivative<Architecture_t>(tDerivatives, fF, tOutput);
368 evaluate<Architecture_t>(tOutput, fF);
369}
370
371//______________________________________________________________________________
372template<typename Architecture_t>
373auto inline TSharedLayer<Architecture_t>::Backward(Matrix_t & gradients_backward,
374 const Matrix_t & activations_backward,
377-> void
378{
379 Architecture_t::Backward(gradients_backward,
380 fWeightGradients,
381 fBiasGradients,
382 fDerivatives,
383 fActivationGradients,
384 fWeights,
385 activations_backward);
386 addRegularizationGradients<Architecture_t>(fWeightGradients,
387 fWeights,
388 weightDecay, r);
389}
390
391//______________________________________________________________________________
392template<typename Architecture_t>
394{
395 std::cout << "Width = " << fWeights.GetNrows();
396 std::cout << ", Activation Function = ";
397 std::cout << static_cast<int>(fF) << std::endl;
398}
399
400} // namespace DNN
401} // namespace TMVA
402
403#endif
ROOT::R::TRInterface & r
Definition: Object.C:4
#define f(i)
Definition: RSha256.hxx:104
include TDocParser_001 C image html pict1_TDocParser_001 png width
Definition: TDocParser.cxx:121
Generic layer class.
Definition: Layer.h:53
void Backward(Matrix_t &gradients_backward, const Matrix_t &activations_backward, ERegularization r, Scalar_t weightDecay)
Compute weight, bias and activation gradients.
Definition: Layer.h:287
const Matrix_t & GetBiasGradients() const
Definition: Layer.h:126
void SetDropoutProbability(Scalar_t p)
Definition: Layer.h:113
const Matrix_t & GetActivationGradients() const
Definition: Layer.h:124
EActivationFunction fF
Activation function of the layer.
Definition: Layer.h:77
Matrix_t & GetWeights()
Definition: Layer.h:119
Matrix_t fActivationGradients
Gradients w.r.t. the activations of this layer.
Definition: Layer.h:75
TLayer(size_t BatchSize, size_t InputWidth, size_t Width, EActivationFunction f, Scalar_t dropoutProbability)
Definition: Layer.h:227
size_t fInputWidth
Number of neurons of the previous layer.
Definition: Layer.h:64
Matrix_t fBiasGradients
Gradients w.r.t. the bias values of this layer.
Definition: Layer.h:74
const Matrix_t & GetBiases() const
Definition: Layer.h:122
const Matrix_t & GetOutput() const
Definition: Layer.h:118
Matrix_t fOutput
Activations of this layer.
Definition: Layer.h:71
EActivationFunction GetActivationFunction() const
Definition: Layer.h:115
const Matrix_t & GetWeightGradients() const
Definition: Layer.h:128
size_t GetBatchSize() const
Definition: Layer.h:108
size_t GetInputWidth() const
Definition: Layer.h:109
Matrix_t & GetBiasGradients()
Definition: Layer.h:125
Scalar_t fDropoutProbability
Probability that an input is active.
Definition: Layer.h:67
Matrix_t fBiases
The bias values of this layer.
Definition: Layer.h:70
Matrix_t & GetActivationGradients()
Definition: Layer.h:123
size_t fWidth
Number of neurons of this layer.
Definition: Layer.h:65
typename Architecture_t::Matrix_t Matrix_t
Definition: Layer.h:57
Matrix_t & GetOutput()
Definition: Layer.h:117
typename Architecture_t::Scalar_t Scalar_t
Definition: Layer.h:56
size_t GetDropoutProbability() const
Definition: Layer.h:111
void Print() const
Definition: Layer.h:314
Matrix_t & GetBiases()
Definition: Layer.h:121
typename Architecture_t::Tensor_t Tensor_t
Definition: Layer.h:58
Matrix_t fWeightGradients
Gradients w.r.t. the weigths of this layer.
Definition: Layer.h:73
size_t fBatchSize
Batch size used for training and evaluation.
Definition: Layer.h:63
void Initialize(EInitialization m)
Initialize fWeights according to the given initialization method.
Definition: Layer.h:260
Matrix_t fWeights
The fWeights of this layer.
Definition: Layer.h:69
Matrix_t fDerivatives
First fDerivatives of the activations of this layer.
Definition: Layer.h:72
Matrix_t & GetWeightGradients()
Definition: Layer.h:127
size_t GetWidth() const
Definition: Layer.h:110
const Matrix_t & GetWeights() const
Definition: Layer.h:120
void Forward(Matrix_t &input, bool applyDropout=false)
Compute activation of the layer for the given input.
Definition: Layer.h:269
Layer class width shared weight and bias layers.
Definition: Layer.h:148
Matrix_t & fBiases
Reference to the bias vectors of this layer.
Definition: Layer.h:166
Matrix_t fOutput
Activations of this layer.
Definition: Layer.h:167
Matrix_t & GetBiases()
Definition: Layer.h:210
TSharedLayer(size_t fBatchSize, TLayer< Architecture_t > &layer)
Definition: Layer.h:328
Matrix_t & fWeights
Reference to the weight matrix of this layer.
Definition: Layer.h:165
Matrix_t & GetActivationGradients()
Definition: Layer.h:212
const Matrix_t & GetBiasGradients() const
Definition: Layer.h:215
size_t GetWidth() const
Definition: Layer.h:200
Matrix_t fBiasGradients
Gradients w.r.t. the bias values of this layer.
Definition: Layer.h:170
Matrix_t & GetWeightGradients()
Definition: Layer.h:216
typename Architecture_t::Tensor_t Tensor_t
Definition: Layer.h:154
Matrix_t fDerivatives
First fDerivatives of the activations of this layer.
Definition: Layer.h:168
const Matrix_t & GetBiases() const
Definition: Layer.h:211
typename Architecture_t::Matrix_t Matrix_t
Definition: Layer.h:153
size_t fInputWidth
Number of neurons of the previous layer.
Definition: Layer.h:160
Matrix_t & GetOutput()
Definition: Layer.h:207
size_t fWidth
Number of neurons of this layer.
Definition: Layer.h:161
const Matrix_t & GetWeightGradients() const
Definition: Layer.h:217
size_t fBatchSize
Batch size used for training and evaluation.
Definition: Layer.h:159
Matrix_t fWeightGradients
Gradients w.r.t. the weigths of this layer.
Definition: Layer.h:169
EActivationFunction GetActivationFunction() const
Definition: Layer.h:205
Matrix_t fActivationGradients
Gradients w.r.t. the activations of this layer.
Definition: Layer.h:171
size_t GetDropoutProbability() const
Definition: Layer.h:201
size_t GetInputWidth() const
Definition: Layer.h:199
void Print() const
Definition: Layer.h:393
size_t GetBatchSize() const
Definition: Layer.h:198
void Forward(Matrix_t &input, bool applyDropout=false)
Compute activation of the layer for the given input.
Definition: Layer.h:356
typename Architecture_t::Scalar_t Scalar_t
Definition: Layer.h:152
EActivationFunction fF
Activation function of the layer.
Definition: Layer.h:173
Scalar_t fDropoutProbability
Probability that an input is active.
Definition: Layer.h:163
Matrix_t & GetWeights() const
Definition: Layer.h:209
const Matrix_t & GetOutput() const
Definition: Layer.h:208
const Matrix_t & GetActivationGradients() const
Definition: Layer.h:213
Matrix_t & GetBiasGradients()
Definition: Layer.h:214
void SetDropoutProbability(Scalar_t p)
Definition: Layer.h:203
void Backward(Matrix_t &gradients_backward, const Matrix_t &activations_backward, ERegularization r, Scalar_t weightDecay)
Compute weight, bias and activation gradients.
Definition: Layer.h:373
void Copy(void *source, void *dest)
EInitialization
Definition: Functions.h:72
double weightDecay(double error, ItWeight itWeight, ItWeight itWeightEnd, double factorWeightDecay, EnumRegularization eRegularization)
compute the weight decay for regularization (L1 or L2)
Definition: NeuralNet.icc:498
ERegularization
Enum representing the regularization type applied for a given layer.
Definition: Functions.h:65
EActivationFunction
Enum that represents layer activation functions.
Definition: Functions.h:32
create variable transformations
auto * m
Definition: textangle.C:8