Logo ROOT  
Reference Guide
 
Loading...
Searching...
No Matches
Layer.h
Go to the documentation of this file.
1// @(#)root/tmva/tmva/dnn:$Id$
2// Author: Simon Pfreundschuh 20/06/16
3
4/*************************************************************************
5 * Copyright (C) 2016, Simon Pfreundschuh *
6 * All rights reserved. *
7 * *
8 * For the licensing terms see $ROOTSYS/LICENSE. *
9 * For the list of contributors see $ROOTSYS/README/CREDITS. *
10 *************************************************************************/
11
12//////////////////////////////////////////////////////////////////////
13// Contains Layer and SharedLayer classes, that represent layers in //
14// neural networks. //
15//////////////////////////////////////////////////////////////////////
16
17#ifndef TMVA_DNN_LAYER
18#define TMVA_DNN_LAYER
19
20#include <iostream>
21
22#include "TMatrix.h"
23#include "Functions.h"
24
25namespace TMVA
26{
27namespace DNN
28{
29
30//______________________________________________________________________________
31//
32// The Layer Class
33//______________________________________________________________________________
34
35/** \class TLayer
36
37 Generic layer class.
38
39 This generic layer class represents a layer of a neural network with
40 a given width n and activation function f. The activation
41 function of each layer is given by \f$\mathbf{u} =
42 \mathbf{W}\mathbf{x} + \boldsymbol{\theta}\f$.
43
44 In addition to the weight and bias matrices, each layer allocates memory
45 for its activations and the corresponding first partial fDerivatives of
46 the activation function as well as the gradients of the fWeights and fBiases.
47
48 The layer provides member functions for the forward propagation of
49 activations through the given layer.
50*/
51template<typename Architecture_t>
52 class TLayer
53{
54
55public:
56 using Scalar_t = typename Architecture_t::Scalar_t;
57 using Matrix_t = typename Architecture_t::Matrix_t;
58 using Tensor_t = typename Architecture_t::Tensor_t;
59
60
61private:
62
63 size_t fBatchSize; ///< Batch size used for training and evaluation.
64 size_t fInputWidth; ///< Number of neurons of the previous layer.
65 size_t fWidth; ///< Number of neurons of this layer.
66
67 Scalar_t fDropoutProbability; ///< Probability that an input is active.
68
69 Matrix_t fWeights; ///< The fWeights of this layer.
70 Matrix_t fBiases; ///< The bias values of this layer.
71 Matrix_t fOutput; ///< Activations of this layer.
72 Matrix_t fDerivatives; ///< First fDerivatives of the activations of this layer.
73 Matrix_t fWeightGradients; ///< Gradients w.r.t. the weigths of this layer.
74 Matrix_t fBiasGradients; ///< Gradients w.r.t. the bias values of this layer.
75 Matrix_t fActivationGradients; ///< Gradients w.r.t. the activations of this layer.
76
77 EActivationFunction fF; ///< Activation function of the layer.
78
79public:
80
81 TLayer(size_t BatchSize,
82 size_t InputWidth,
83 size_t Width,
85 Scalar_t dropoutProbability);
86 TLayer(const TLayer &);
87
88 /*! Initialize fWeights according to the given initialization
89 * method. */
91 /*! Compute activation of the layer for the given input. The input
92 * must be in matrix form with the different rows corresponding to
93 * different events in the batch. Computes activations as well as
94 * the first partial derivative of the activation function at those
95 * activations. */
96 void inline Forward(Matrix_t & input, bool applyDropout = false);
97 /*! Compute weight, bias and activation gradients. Uses the precomputed
98 * first partial derviatives of the activation function computed during
99 * forward propagation and modifies them. Must only be called directly
100 * a the corresponding call to Forward(...). */
101 void inline Backward(Matrix_t & gradients_backward,
102 const Matrix_t & activations_backward,
105
106 void Print() const;
107
108 size_t GetBatchSize() const {return fBatchSize;}
109 size_t GetInputWidth() const {return fInputWidth;}
110 size_t GetWidth() const {return fWidth;}
112
114
116
118 const Matrix_t & GetOutput() const {return fOutput;}
120 const Matrix_t & GetWeights() const {return fWeights;}
122 const Matrix_t & GetBiases() const {return fBiases;}
126 const Matrix_t & GetBiasGradients() const {return fBiasGradients;}
129
130};
131
132//______________________________________________________________________________
133//
134// The Shared Layer Class
135//______________________________________________________________________________
136
137/** \class TSharedLayer
138
139 Layer class width shared weight and bias layers.
140
141 Like the Layer class only that weight matrices are shared between
142 different instances of the net, which can be used to implement
143 multithreading 'Hogwild' style.
144*/
145
146template<typename Architecture_t>
148{
149
150public:
151
152 using Scalar_t = typename Architecture_t::Scalar_t;
153 using Matrix_t = typename Architecture_t::Matrix_t;
154 using Tensor_t = typename Architecture_t::Tensor_t;
155
156
157private:
158
159 size_t fBatchSize; ///< Batch size used for training and evaluation.
160 size_t fInputWidth; ///< Number of neurons of the previous layer.
161 size_t fWidth; ///< Number of neurons of this layer.
162
163 Scalar_t fDropoutProbability; ///< Probability that an input is active.
164
165 Matrix_t & fWeights; ///< Reference to the weight matrix of this layer.
166 Matrix_t & fBiases; ///< Reference to the bias vectors of this layer.
167 Matrix_t fOutput; ///< Activations of this layer.
168 Matrix_t fDerivatives; ///< First fDerivatives of the activations of this layer.
169 Matrix_t fWeightGradients; ///< Gradients w.r.t. the weigths of this layer.
170 Matrix_t fBiasGradients; ///< Gradients w.r.t. the bias values of this layer.
171 Matrix_t fActivationGradients; ///< Gradients w.r.t. the activations of this layer.
172
173 EActivationFunction fF; ///< Activation function of the layer.
174
175public:
176
178 TLayer<Architecture_t> & layer);
179 TSharedLayer(const TSharedLayer & layer);
180
181 /*! Compute activation of the layer for the given input. The input
182 * must be in matrix form with the different rows corresponding to
183 * different events in the batch. Computes activations as well as
184 * the first partial derivative of the activation function at those
185 * activations. */
186 void inline Forward(Matrix_t & input, bool applyDropout = false);
187 /*! Compute weight, bias and activation gradients. Uses the precomputed
188 * first partial derviatives of the activation function computed during
189 * forward propagation and modifies them. Must only be called directly
190 * a the corresponding call to Forward(...). */
191 void inline Backward(Matrix_t & gradients_backward,
192 const Matrix_t & activations_backward,
195
196 void Print() const;
197
198 size_t GetBatchSize() const {return fBatchSize;}
199 size_t GetInputWidth() const {return fInputWidth;}
200 size_t GetWidth() const {return fWidth;}
202
204
206
208 const Matrix_t & GetOutput() const {return fOutput;}
209 Matrix_t & GetWeights() const {return fWeights;}
211 const Matrix_t & GetBiases() const {return fBiases;}
215 const Matrix_t & GetBiasGradients() const {return fBiasGradients;}
218
219};
220
221//______________________________________________________________________________
222//
223// The Layer Class - Implementation
224//______________________________________________________________________________
225
226template<typename Architecture_t>
228 size_t inputWidth,
229 size_t width,
231 Scalar_t dropoutProbability)
232 : fBatchSize(batchSize), fInputWidth(inputWidth), fWidth(width),
233 fDropoutProbability(dropoutProbability), fWeights(width, fInputWidth),
234 fBiases(width, 1), fOutput(fBatchSize, width), fDerivatives(fBatchSize, width),
235 fWeightGradients(width, fInputWidth), fBiasGradients(width, 1),
236 fActivationGradients(fBatchSize, width), fF(f)
237{
238 // Nothing to do here.
239}
240
241//______________________________________________________________________________
242template<typename Architecture_t>
244 : fBatchSize(layer.fBatchSize), fInputWidth(layer.fInputWidth),
245 fWidth(layer.fWidth), fDropoutProbability(layer.fDropoutProbability),
246 fWeights(layer.fWidth, layer.fInputWidth), fBiases(layer.fWidth, 1),
247 fOutput(layer.fBatchSize, layer.fWidth),
248 fDerivatives(layer.fBatchSize, layer.fWidth),
249 fWeightGradients(layer.fWidth, layer.fInputWidth),
250 fBiasGradients(layer.fWidth, 1),
251 fActivationGradients(layer.fBatchSize, layer.fWidth),
252 fF(layer.fF)
253{
254 Architecture_t::Copy(fWeights, layer.GetWeights());
255 Architecture_t::Copy(fBiases, layer.GetBiases());
256}
257
258//______________________________________________________________________________
259template<typename Architecture_t>
261-> void
262{
263 initialize<Architecture_t>(fWeights, m);
264 initialize<Architecture_t>(fBiases, EInitialization::kZero);
265}
266
267//______________________________________________________________________________
268template<typename Architecture_t>
270 bool applyDropout)
271-> void
272{
273 if (applyDropout && (fDropoutProbability != 1.0)) {
274 Architecture_t::DropoutForward(input, fDropoutProbability);
275 }
276 Architecture_t::MultiplyTranspose(fOutput, input, fWeights);
277 Architecture_t::AddRowWise(fOutput, fBiases);
278 Tensor_t tOutput(fOutput);
279 Tensor_t tDerivatives(fDerivatives);
280 evaluateDerivative<Architecture_t>(tDerivatives, fF, tOutput);
281
282 evaluate<Architecture_t>(tOutput, fF);
283}
284
285//______________________________________________________________________________
286template<typename Architecture_t>
288 const Matrix_t & activations_backward,
291-> void
292{
293
294 Tensor_t tGradBw(gradients_backward);
295 Tensor_t tActBw(activations_backward);
296 Tensor_t tActGrad(fActivationGradients);
297 Tensor_t tDeriv(fDerivatives);
298
299 Architecture_t::Hadamard( tDeriv, tActGrad);
300 Architecture_t::Backward( tGradBw,
301 fWeightGradients,
302 fBiasGradients,
303 tDeriv,
304 tActGrad,
305 fWeights,
306 tActBw);
307 addRegularizationGradients<Architecture_t>(fWeightGradients,
308 fWeights,
309 weightDecay, r);
310}
311
312//______________________________________________________________________________
313template<typename Architecture_t>
315{
316 std::cout << "Width = " << fWeights.GetNrows();
317 std::cout << ", Activation Function = ";
318 std::cout << static_cast<int>(fF) << std::endl;
319}
320
321//______________________________________________________________________________
322//
323// The Shared Layer Class - Implementation
324//______________________________________________________________________________
325
326//______________________________________________________________________________
327template<typename Architecture_t>
330: fBatchSize(BatchSize),
331fInputWidth(layer.GetInputWidth()), fWidth(layer.GetWidth()),
332fDropoutProbability(layer.GetDropoutProbability()),
333fWeights(layer.GetWeights()), fBiases(layer.GetBiases()),
334fOutput(fBatchSize, fWidth), fDerivatives(fBatchSize, fWidth),
335fWeightGradients(fWidth, fInputWidth), fBiasGradients(fWidth, 1),
336fActivationGradients(fBatchSize, fWidth), fF(layer.GetActivationFunction())
337{
338 // Nothing to do here.
339}
340
341//______________________________________________________________________________
342template<typename Architecture_t>
344 : fBatchSize(layer.fBatchSize),
345 fInputWidth(layer.GetInputWidth()), fWidth(layer.GetWidth()),
346 fDropoutProbability(layer.fDropoutProbability), fWeights(layer.fWeights),
347 fBiases(layer.fBiases), fOutput(layer.fBatchSize, fWidth),
348 fDerivatives(layer.fBatchSize, fWidth), fWeightGradients(fWidth, fInputWidth),
349 fBiasGradients(fWidth, 1), fActivationGradients(layer.fBatchSize, fWidth),
350 fF(layer.fF)
351{
352}
353
354//______________________________________________________________________________
355template<typename Architecture_t>
357 bool applyDropout)
358-> void
359{
360 if (applyDropout && (fDropoutProbability != 1.0)) {
361 Architecture_t::DropoutForward(input, fDropoutProbability);
362 }
363 Architecture_t::MultiplyTranspose(fOutput, input, fWeights);
364 Architecture_t::AddRowWise(fOutput, fBiases);
365 Tensor_t tOutput(fOutput);
366 Tensor_t tDerivatives(fDerivatives);
367 evaluateDerivative<Architecture_t>(tDerivatives, fF, tOutput);
368 evaluate<Architecture_t>(tOutput, fF);
369}
370
371//______________________________________________________________________________
372template<typename Architecture_t>
373auto inline TSharedLayer<Architecture_t>::Backward(Matrix_t & gradients_backward,
374 const Matrix_t & activations_backward,
377-> void
378{
379 Architecture_t::Backward(gradients_backward,
380 fWeightGradients,
381 fBiasGradients,
382 fDerivatives,
383 fActivationGradients,
384 fWeights,
385 activations_backward);
386 addRegularizationGradients<Architecture_t>(fWeightGradients,
387 fWeights,
388 weightDecay, r);
389}
390
391//______________________________________________________________________________
392template<typename Architecture_t>
394{
395 std::cout << "Width = " << fWeights.GetNrows();
396 std::cout << ", Activation Function = ";
397 std::cout << static_cast<int>(fF) << std::endl;
398}
399
400} // namespace DNN
401} // namespace TMVA
402
403#endif
ROOT::R::TRInterface & r
Definition Object.C:4
#define f(i)
Definition RSha256.hxx:104
include TDocParser_001 C image html pict1_TDocParser_001 png width
Generic layer class.
Definition Layer.h:53
void Backward(Matrix_t &gradients_backward, const Matrix_t &activations_backward, ERegularization r, Scalar_t weightDecay)
Compute weight, bias and activation gradients.
Definition Layer.h:287
const Matrix_t & GetBiasGradients() const
Definition Layer.h:126
void SetDropoutProbability(Scalar_t p)
Definition Layer.h:113
const Matrix_t & GetActivationGradients() const
Definition Layer.h:124
EActivationFunction fF
Activation function of the layer.
Definition Layer.h:77
Matrix_t & GetWeights()
Definition Layer.h:119
Matrix_t fActivationGradients
Gradients w.r.t. the activations of this layer.
Definition Layer.h:75
TLayer(size_t BatchSize, size_t InputWidth, size_t Width, EActivationFunction f, Scalar_t dropoutProbability)
Definition Layer.h:227
size_t fInputWidth
Number of neurons of the previous layer.
Definition Layer.h:64
Matrix_t fBiasGradients
Gradients w.r.t. the bias values of this layer.
Definition Layer.h:74
const Matrix_t & GetBiases() const
Definition Layer.h:122
const Matrix_t & GetOutput() const
Definition Layer.h:118
Matrix_t fOutput
Activations of this layer.
Definition Layer.h:71
EActivationFunction GetActivationFunction() const
Definition Layer.h:115
const Matrix_t & GetWeightGradients() const
Definition Layer.h:128
size_t GetBatchSize() const
Definition Layer.h:108
size_t GetInputWidth() const
Definition Layer.h:109
Matrix_t & GetBiasGradients()
Definition Layer.h:125
Scalar_t fDropoutProbability
Probability that an input is active.
Definition Layer.h:67
Matrix_t fBiases
The bias values of this layer.
Definition Layer.h:70
Matrix_t & GetActivationGradients()
Definition Layer.h:123
size_t fWidth
Number of neurons of this layer.
Definition Layer.h:65
typename Architecture_t::Matrix_t Matrix_t
Definition Layer.h:57
Matrix_t & GetOutput()
Definition Layer.h:117
typename Architecture_t::Scalar_t Scalar_t
Definition Layer.h:56
size_t GetDropoutProbability() const
Definition Layer.h:111
void Print() const
Definition Layer.h:314
Matrix_t & GetBiases()
Definition Layer.h:121
typename Architecture_t::Tensor_t Tensor_t
Definition Layer.h:58
Matrix_t fWeightGradients
Gradients w.r.t. the weigths of this layer.
Definition Layer.h:73
size_t fBatchSize
Batch size used for training and evaluation.
Definition Layer.h:63
void Initialize(EInitialization m)
Initialize fWeights according to the given initialization method.
Definition Layer.h:260
Matrix_t fWeights
The fWeights of this layer.
Definition Layer.h:69
Matrix_t fDerivatives
First fDerivatives of the activations of this layer.
Definition Layer.h:72
Matrix_t & GetWeightGradients()
Definition Layer.h:127
size_t GetWidth() const
Definition Layer.h:110
const Matrix_t & GetWeights() const
Definition Layer.h:120
void Forward(Matrix_t &input, bool applyDropout=false)
Compute activation of the layer for the given input.
Definition Layer.h:269
Layer class width shared weight and bias layers.
Definition Layer.h:148
Matrix_t & fBiases
Reference to the bias vectors of this layer.
Definition Layer.h:166
Matrix_t fOutput
Activations of this layer.
Definition Layer.h:167
Matrix_t & GetBiases()
Definition Layer.h:210
TSharedLayer(size_t fBatchSize, TLayer< Architecture_t > &layer)
Definition Layer.h:328
Matrix_t & fWeights
Reference to the weight matrix of this layer.
Definition Layer.h:165
Matrix_t & GetActivationGradients()
Definition Layer.h:212
const Matrix_t & GetBiasGradients() const
Definition Layer.h:215
size_t GetWidth() const
Definition Layer.h:200
Matrix_t fBiasGradients
Gradients w.r.t. the bias values of this layer.
Definition Layer.h:170
Matrix_t & GetWeightGradients()
Definition Layer.h:216
typename Architecture_t::Tensor_t Tensor_t
Definition Layer.h:154
Matrix_t fDerivatives
First fDerivatives of the activations of this layer.
Definition Layer.h:168
const Matrix_t & GetBiases() const
Definition Layer.h:211
typename Architecture_t::Matrix_t Matrix_t
Definition Layer.h:153
size_t fInputWidth
Number of neurons of the previous layer.
Definition Layer.h:160
Matrix_t & GetOutput()
Definition Layer.h:207
size_t fWidth
Number of neurons of this layer.
Definition Layer.h:161
const Matrix_t & GetWeightGradients() const
Definition Layer.h:217
size_t fBatchSize
Batch size used for training and evaluation.
Definition Layer.h:159
Matrix_t fWeightGradients
Gradients w.r.t. the weigths of this layer.
Definition Layer.h:169
EActivationFunction GetActivationFunction() const
Definition Layer.h:205
Matrix_t fActivationGradients
Gradients w.r.t. the activations of this layer.
Definition Layer.h:171
size_t GetDropoutProbability() const
Definition Layer.h:201
size_t GetInputWidth() const
Definition Layer.h:199
void Print() const
Definition Layer.h:393
size_t GetBatchSize() const
Definition Layer.h:198
void Forward(Matrix_t &input, bool applyDropout=false)
Compute activation of the layer for the given input.
Definition Layer.h:356
typename Architecture_t::Scalar_t Scalar_t
Definition Layer.h:152
EActivationFunction fF
Activation function of the layer.
Definition Layer.h:173
Scalar_t fDropoutProbability
Probability that an input is active.
Definition Layer.h:163
Matrix_t & GetWeights() const
Definition Layer.h:209
const Matrix_t & GetOutput() const
Definition Layer.h:208
const Matrix_t & GetActivationGradients() const
Definition Layer.h:213
Matrix_t & GetBiasGradients()
Definition Layer.h:214
void SetDropoutProbability(Scalar_t p)
Definition Layer.h:203
void Backward(Matrix_t &gradients_backward, const Matrix_t &activations_backward, ERegularization r, Scalar_t weightDecay)
Compute weight, bias and activation gradients.
Definition Layer.h:373
double weightDecay(double error, ItWeight itWeight, ItWeight itWeightEnd, double factorWeightDecay, EnumRegularization eRegularization)
compute the weight decay for regularization (L1 or L2)
ERegularization
Enum representing the regularization type applied for a given layer.
Definition Functions.h:65
EActivationFunction
Enum that represents layer activation functions.
Definition Functions.h:32
create variable transformations
auto * m
Definition textangle.C:8