Logo ROOT   6.18/05
Reference Guide
Layer.h
Go to the documentation of this file.
1// @(#)root/tmva/tmva/dnn:$Id$
2// Author: Simon Pfreundschuh 20/06/16
3
4/*************************************************************************
5 * Copyright (C) 2016, Simon Pfreundschuh *
6 * All rights reserved. *
7 * *
8 * For the licensing terms see $ROOTSYS/LICENSE. *
9 * For the list of contributors see $ROOTSYS/README/CREDITS. *
10 *************************************************************************/
11
12//////////////////////////////////////////////////////////////////////
13// Contains Layer and SharedLayer classes, that represent layers in //
14// neural networks. //
15//////////////////////////////////////////////////////////////////////
16
17#ifndef TMVA_DNN_LAYER
18#define TMVA_DNN_LAYER
19
20#include <iostream>
21
22#include "TMatrix.h"
23#include "Functions.h"
24
25namespace TMVA
26{
27namespace DNN
28{
29
30//______________________________________________________________________________
31//
32// The Layer Class
33//______________________________________________________________________________
34
35/** \class TLayer
36
37 Generic layer class.
38
39 This generic layer class represents a layer of a neural network with
40 a given width n and activation function f. The activation
41 function of each layer is given by \f$\mathbf{u} =
42 \mathbf{W}\mathbf{x} + \boldsymbol{\theta}\f$.
43
44 In addition to the weight and bias matrices, each layer allocates memory
45 for its activations and the corresponding first partial fDerivatives of
46 the activation function as well as the gradients of the fWeights and fBiases.
47
48 The layer provides member functions for the forward propagation of
49 activations through the given layer.
50*/
51template<typename Architecture_t>
52 class TLayer
53{
54
55public:
56 using Scalar_t = typename Architecture_t::Scalar_t;
57 using Matrix_t = typename Architecture_t::Matrix_t;
58
59private:
60
61 size_t fBatchSize; ///< Batch size used for training and evaluation.
62 size_t fInputWidth; ///< Number of neurons of the previous layer.
63 size_t fWidth; ///< Number of neurons of this layer.
64
65 Scalar_t fDropoutProbability; ///< Probability that an input is active.
66
67 Matrix_t fWeights; ///< The fWeights of this layer.
68 Matrix_t fBiases; ///< The bias values of this layer.
69 Matrix_t fOutput; ///< Activations of this layer.
70 Matrix_t fDerivatives; ///< First fDerivatives of the activations of this layer.
71 Matrix_t fWeightGradients; ///< Gradients w.r.t. the weigths of this layer.
72 Matrix_t fBiasGradients; ///< Gradients w.r.t. the bias values of this layer.
73 Matrix_t fActivationGradients; ///< Gradients w.r.t. the activations of this layer.
74
75 EActivationFunction fF; ///< Activation function of the layer.
76
77public:
78
79 TLayer(size_t BatchSize,
80 size_t InputWidth,
81 size_t Width,
83 Scalar_t dropoutProbability);
84 TLayer(const TLayer &);
85
86 /*! Initialize fWeights according to the given initialization
87 * method. */
89 /*! Compute activation of the layer for the given input. The input
90 * must be in matrix form with the different rows corresponding to
91 * different events in the batch. Computes activations as well as
92 * the first partial derivative of the activation function at those
93 * activations. */
94 void inline Forward(Matrix_t & input, bool applyDropout = false);
95 /*! Compute weight, bias and activation gradients. Uses the precomputed
96 * first partial derviatives of the activation function computed during
97 * forward propagation and modifies them. Must only be called directly
98 * a the corresponding call to Forward(...). */
99 void inline Backward(Matrix_t & gradients_backward,
100 const Matrix_t & activations_backward,
103
104 void Print() const;
105
106 size_t GetBatchSize() const {return fBatchSize;}
107 size_t GetInputWidth() const {return fInputWidth;}
108 size_t GetWidth() const {return fWidth;}
110
112
114
116 const Matrix_t & GetOutput() const {return fOutput;}
118 const Matrix_t & GetWeights() const {return fWeights;}
120 const Matrix_t & GetBiases() const {return fBiases;}
124 const Matrix_t & GetBiasGradients() const {return fBiasGradients;}
127
128};
129
130//______________________________________________________________________________
131//
132// The Shared Layer Class
133//______________________________________________________________________________
134
135/** \class TSharedLayer
136
137 Layer class width shared weight and bias layers.
138
139 Like the Layer class only that weight matrices are shared between
140 different instances of the net, which can be used to implement
141 multithreading 'Hogwild' style.
142*/
143
144template<typename Architecture_t>
146{
147
148public:
149
150 using Scalar_t = typename Architecture_t::Scalar_t;
151 using Matrix_t = typename Architecture_t::Matrix_t;
152
153private:
154
155 size_t fBatchSize; ///< Batch size used for training and evaluation.
156 size_t fInputWidth; ///< Number of neurons of the previous layer.
157 size_t fWidth; ///< Number of neurons of this layer.
158
159 Scalar_t fDropoutProbability; ///< Probability that an input is active.
160
161 Matrix_t & fWeights; ///< Reference to the weight matrix of this layer.
162 Matrix_t & fBiases; ///< Reference to the bias vectors of this layer.
163 Matrix_t fOutput; ///< Activations of this layer.
164 Matrix_t fDerivatives; ///< First fDerivatives of the activations of this layer.
165 Matrix_t fWeightGradients; ///< Gradients w.r.t. the weigths of this layer.
166 Matrix_t fBiasGradients; ///< Gradients w.r.t. the bias values of this layer.
167 Matrix_t fActivationGradients; ///< Gradients w.r.t. the activations of this layer.
168
169 EActivationFunction fF; ///< Activation function of the layer.
170
171public:
172
174 TLayer<Architecture_t> & layer);
175 TSharedLayer(const TSharedLayer & layer);
176
177 /*! Compute activation of the layer for the given input. The input
178 * must be in matrix form with the different rows corresponding to
179 * different events in the batch. Computes activations as well as
180 * the first partial derivative of the activation function at those
181 * activations. */
182 void inline Forward(Matrix_t & input, bool applyDropout = false);
183 /*! Compute weight, bias and activation gradients. Uses the precomputed
184 * first partial derviatives of the activation function computed during
185 * forward propagation and modifies them. Must only be called directly
186 * a the corresponding call to Forward(...). */
187 void inline Backward(Matrix_t & gradients_backward,
188 const Matrix_t & activations_backward,
191
192 void Print() const;
193
194 size_t GetBatchSize() const {return fBatchSize;}
195 size_t GetInputWidth() const {return fInputWidth;}
196 size_t GetWidth() const {return fWidth;}
198
200
202
204 const Matrix_t & GetOutput() const {return fOutput;}
205 Matrix_t & GetWeights() const {return fWeights;}
207 const Matrix_t & GetBiases() const {return fBiases;}
211 const Matrix_t & GetBiasGradients() const {return fBiasGradients;}
214
215};
216
217//______________________________________________________________________________
218//
219// The Layer Class - Implementation
220//______________________________________________________________________________
221
222template<typename Architecture_t>
224 size_t inputWidth,
225 size_t width,
227 Scalar_t dropoutProbability)
228 : fBatchSize(batchSize), fInputWidth(inputWidth), fWidth(width),
229 fDropoutProbability(dropoutProbability), fWeights(width, fInputWidth),
230 fBiases(width, 1), fOutput(fBatchSize, width), fDerivatives(fBatchSize, width),
231 fWeightGradients(width, fInputWidth), fBiasGradients(width, 1),
232 fActivationGradients(fBatchSize, width), fF(f)
233{
234 // Nothing to do here.
235}
236
237//______________________________________________________________________________
238template<typename Architecture_t>
240 : fBatchSize(layer.fBatchSize), fInputWidth(layer.fInputWidth),
241 fWidth(layer.fWidth), fDropoutProbability(layer.fDropoutProbability),
242 fWeights(layer.fWidth, layer.fInputWidth), fBiases(layer.fWidth, 1),
243 fOutput(layer.fBatchSize, layer.fWidth),
244 fDerivatives(layer.fBatchSize, layer.fWidth),
245 fWeightGradients(layer.fWidth, layer.fInputWidth),
246 fBiasGradients(layer.fWidth, 1),
247 fActivationGradients(layer.fBatchSize, layer.fWidth),
248 fF(layer.fF)
249{
252}
253
254//______________________________________________________________________________
255template<typename Architecture_t>
257-> void
258{
259 initialize<Architecture_t>(fWeights, m);
260 initialize<Architecture_t>(fBiases, EInitialization::kZero);
261}
262
263//______________________________________________________________________________
264template<typename Architecture_t>
266 bool applyDropout)
267-> void
268{
269 if (applyDropout && (fDropoutProbability != 1.0)) {
270 Architecture_t::Dropout(input, fDropoutProbability);
271 }
272 Architecture_t::MultiplyTranspose(fOutput, input, fWeights);
273 Architecture_t::AddRowWise(fOutput, fBiases);
274 evaluateDerivative<Architecture_t>(fDerivatives, fF, fOutput);
275 evaluate<Architecture_t>(fOutput, fF);
276}
277
278//______________________________________________________________________________
279template<typename Architecture_t>
281 const Matrix_t & activations_backward,
284-> void
285{
286 Architecture_t::Backward(gradients_backward,
287 fWeightGradients,
288 fBiasGradients,
289 fDerivatives,
290 fActivationGradients,
291 fWeights,
292 activations_backward);
293 addRegularizationGradients<Architecture_t>(fWeightGradients,
294 fWeights,
295 weightDecay, r);
296}
297
298//______________________________________________________________________________
299template<typename Architecture_t>
301{
302 std::cout << "Width = " << fWeights.GetNrows();
303 std::cout << ", Activation Function = ";
304 std::cout << static_cast<int>(fF) << std::endl;
305}
306
307//______________________________________________________________________________
308//
309// The Shared Layer Class - Implementation
310//______________________________________________________________________________
311
312//______________________________________________________________________________
313template<typename Architecture_t>
316: fBatchSize(BatchSize),
317fInputWidth(layer.GetInputWidth()), fWidth(layer.GetWidth()),
318fDropoutProbability(layer.GetDropoutProbability()),
319fWeights(layer.GetWeights()), fBiases(layer.GetBiases()),
320fOutput(fBatchSize, fWidth), fDerivatives(fBatchSize, fWidth),
321fWeightGradients(fWidth, fInputWidth), fBiasGradients(fWidth, 1),
322fActivationGradients(fBatchSize, fWidth), fF(layer.GetActivationFunction())
323{
324 // Nothing to do here.
325}
326
327//______________________________________________________________________________
328template<typename Architecture_t>
330 : fBatchSize(layer.fBatchSize),
331 fInputWidth(layer.GetInputWidth()), fWidth(layer.GetWidth()),
332 fDropoutProbability(layer.fDropoutProbability), fWeights(layer.fWeights),
333 fBiases(layer.fBiases), fOutput(layer.fBatchSize, fWidth),
334 fDerivatives(layer.fBatchSize, fWidth), fWeightGradients(fWidth, fInputWidth),
335 fBiasGradients(fWidth, 1), fActivationGradients(layer.fBatchSize, fWidth),
336 fF(layer.fF)
337{
338}
339
340//______________________________________________________________________________
341template<typename Architecture_t>
343 bool applyDropout)
344-> void
345{
346 if (applyDropout && (fDropoutProbability != 1.0)) {
347 Architecture_t::Dropout(input, fDropoutProbability);
348 }
349 Architecture_t::MultiplyTranspose(fOutput, input, fWeights);
350 Architecture_t::AddRowWise(fOutput, fBiases);
351 evaluateDerivative<Architecture_t>(fDerivatives, fF, fOutput);
352 evaluate<Architecture_t>(fOutput, fF);
353}
354
355//______________________________________________________________________________
356template<typename Architecture_t>
357auto inline TSharedLayer<Architecture_t>::Backward(Matrix_t & gradients_backward,
358 const Matrix_t & activations_backward,
361-> void
362{
363 Architecture_t::Backward(gradients_backward,
364 fWeightGradients,
365 fBiasGradients,
366 fDerivatives,
367 fActivationGradients,
368 fWeights,
369 activations_backward);
370 addRegularizationGradients<Architecture_t>(fWeightGradients,
371 fWeights,
372 weightDecay, r);
373}
374
375//______________________________________________________________________________
376template<typename Architecture_t>
378{
379 std::cout << "Width = " << fWeights.GetNrows();
380 std::cout << ", Activation Function = ";
381 std::cout << static_cast<int>(fF) << std::endl;
382}
383
384} // namespace DNN
385} // namespace TMVA
386
387#endif
ROOT::R::TRInterface & r
Definition: Object.C:4
#define f(i)
Definition: RSha256.hxx:104
include TDocParser_001 C image html pict1_TDocParser_001 png width
Definition: TDocParser.cxx:121
Generic layer class.
Definition: Layer.h:53
void Backward(Matrix_t &gradients_backward, const Matrix_t &activations_backward, ERegularization r, Scalar_t weightDecay)
Compute weight, bias and activation gradients.
Definition: Layer.h:280
const Matrix_t & GetBiasGradients() const
Definition: Layer.h:124
void SetDropoutProbability(Scalar_t p)
Definition: Layer.h:111
const Matrix_t & GetActivationGradients() const
Definition: Layer.h:122
EActivationFunction fF
Activation function of the layer.
Definition: Layer.h:75
Matrix_t & GetWeights()
Definition: Layer.h:117
Matrix_t fActivationGradients
Gradients w.r.t. the activations of this layer.
Definition: Layer.h:73
TLayer(size_t BatchSize, size_t InputWidth, size_t Width, EActivationFunction f, Scalar_t dropoutProbability)
Definition: Layer.h:223
size_t fInputWidth
Number of neurons of the previous layer.
Definition: Layer.h:62
Matrix_t fBiasGradients
Gradients w.r.t. the bias values of this layer.
Definition: Layer.h:72
const Matrix_t & GetBiases() const
Definition: Layer.h:120
const Matrix_t & GetOutput() const
Definition: Layer.h:116
Matrix_t fOutput
Activations of this layer.
Definition: Layer.h:69
EActivationFunction GetActivationFunction() const
Definition: Layer.h:113
const Matrix_t & GetWeightGradients() const
Definition: Layer.h:126
size_t GetBatchSize() const
Definition: Layer.h:106
size_t GetInputWidth() const
Definition: Layer.h:107
Matrix_t & GetBiasGradients()
Definition: Layer.h:123
Scalar_t fDropoutProbability
Probability that an input is active.
Definition: Layer.h:65
Matrix_t fBiases
The bias values of this layer.
Definition: Layer.h:68
Matrix_t & GetActivationGradients()
Definition: Layer.h:121
size_t fWidth
Number of neurons of this layer.
Definition: Layer.h:63
typename Architecture_t::Matrix_t Matrix_t
Definition: Layer.h:57
Matrix_t & GetOutput()
Definition: Layer.h:115
typename Architecture_t::Scalar_t Scalar_t
Definition: Layer.h:56
size_t GetDropoutProbability() const
Definition: Layer.h:109
void Print() const
Definition: Layer.h:300
Matrix_t & GetBiases()
Definition: Layer.h:119
Matrix_t fWeightGradients
Gradients w.r.t. the weigths of this layer.
Definition: Layer.h:71
size_t fBatchSize
Batch size used for training and evaluation.
Definition: Layer.h:61
void Initialize(EInitialization m)
Initialize fWeights according to the given initialization method.
Definition: Layer.h:256
Matrix_t fWeights
The fWeights of this layer.
Definition: Layer.h:67
Matrix_t fDerivatives
First fDerivatives of the activations of this layer.
Definition: Layer.h:70
Matrix_t & GetWeightGradients()
Definition: Layer.h:125
size_t GetWidth() const
Definition: Layer.h:108
const Matrix_t & GetWeights() const
Definition: Layer.h:118
void Forward(Matrix_t &input, bool applyDropout=false)
Compute activation of the layer for the given input.
Definition: Layer.h:265
Layer class width shared weight and bias layers.
Definition: Layer.h:146
Matrix_t & fBiases
Reference to the bias vectors of this layer.
Definition: Layer.h:162
Matrix_t fOutput
Activations of this layer.
Definition: Layer.h:163
Matrix_t & GetBiases()
Definition: Layer.h:206
TSharedLayer(size_t fBatchSize, TLayer< Architecture_t > &layer)
Definition: Layer.h:314
Matrix_t & fWeights
Reference to the weight matrix of this layer.
Definition: Layer.h:161
Matrix_t & GetActivationGradients()
Definition: Layer.h:208
const Matrix_t & GetBiasGradients() const
Definition: Layer.h:211
size_t GetWidth() const
Definition: Layer.h:196
Matrix_t fBiasGradients
Gradients w.r.t. the bias values of this layer.
Definition: Layer.h:166
Matrix_t & GetWeightGradients()
Definition: Layer.h:212
Matrix_t fDerivatives
First fDerivatives of the activations of this layer.
Definition: Layer.h:164
const Matrix_t & GetBiases() const
Definition: Layer.h:207
typename Architecture_t::Matrix_t Matrix_t
Definition: Layer.h:151
size_t fInputWidth
Number of neurons of the previous layer.
Definition: Layer.h:156
Matrix_t & GetOutput()
Definition: Layer.h:203
size_t fWidth
Number of neurons of this layer.
Definition: Layer.h:157
const Matrix_t & GetWeightGradients() const
Definition: Layer.h:213
size_t fBatchSize
Batch size used for training and evaluation.
Definition: Layer.h:155
Matrix_t fWeightGradients
Gradients w.r.t. the weigths of this layer.
Definition: Layer.h:165
EActivationFunction GetActivationFunction() const
Definition: Layer.h:201
Matrix_t fActivationGradients
Gradients w.r.t. the activations of this layer.
Definition: Layer.h:167
size_t GetDropoutProbability() const
Definition: Layer.h:197
size_t GetInputWidth() const
Definition: Layer.h:195
void Print() const
Definition: Layer.h:377
size_t GetBatchSize() const
Definition: Layer.h:194
void Forward(Matrix_t &input, bool applyDropout=false)
Compute activation of the layer for the given input.
Definition: Layer.h:342
typename Architecture_t::Scalar_t Scalar_t
Definition: Layer.h:150
EActivationFunction fF
Activation function of the layer.
Definition: Layer.h:169
Scalar_t fDropoutProbability
Probability that an input is active.
Definition: Layer.h:159
Matrix_t & GetWeights() const
Definition: Layer.h:205
const Matrix_t & GetOutput() const
Definition: Layer.h:204
const Matrix_t & GetActivationGradients() const
Definition: Layer.h:209
Matrix_t & GetBiasGradients()
Definition: Layer.h:210
void SetDropoutProbability(Scalar_t p)
Definition: Layer.h:199
void Backward(Matrix_t &gradients_backward, const Matrix_t &activations_backward, ERegularization r, Scalar_t weightDecay)
Compute weight, bias and activation gradients.
Definition: Layer.h:357
void Copy(void *source, void *dest)
EInitialization
Definition: Functions.h:70
double weightDecay(double error, ItWeight itWeight, ItWeight itWeightEnd, double factorWeightDecay, EnumRegularization eRegularization)
compute the weight decay for regularization (L1 or L2)
Definition: NeuralNet.icc:496
ERegularization
Enum representing the regularization type applied for a given layer.
Definition: Functions.h:63
EActivationFunction
Enum that represents layer activation functions.
Definition: Functions.h:32
create variable transformations
auto * m
Definition: textangle.C:8