Logo ROOT   6.10/09
Reference Guide
Net.h
Go to the documentation of this file.
1 // @(#)root/tmva: $Id$
2 // Author: Simon Pfreundschuh 20/06/16
3 
4 /*************************************************************************
5  * Copyright (C) 2016, Simon Pfreundschuh *
6  * All rights reserved. *
7  * *
8  * For the licensing terms see $ROOTSYS/LICENSE. *
9  * For the list of contributors see $ROOTSYS/README/CREDITS. *
10  *************************************************************************/
11 
12 #ifndef TMVA_DNN_NET
13 #define TMVA_DNN_NET
14 
15 #include <vector>
16 #include <iostream>
17 
18 #include "Layer.h"
19 
20 namespace TMVA {
21 namespace DNN {
22 
23 /** \class TNet
24 
25  Generic neural network class.
26 
27  This generic neural network class represents a concrete neural
28  network through a vector of layers and coordinates the forward
29  and backward propagation through the net.
30 
31  The net takes as input a batch from the training data given in
32  matrix form, with each row corresponding to a certain training
33  event.
34 
35  On construction, the neural network allocates all the memory
36  required for the training of the neural net and keeps it until
37  its destruction.
38 
39  The Architecture type argument simply holds the
40  architecture-specific data types, which are just the matrix type
41  Matrix_t and the used scalar type Scalar_t.
42 
43  \tparam Architecture The Architecture type that holds the
44  \tparam Layer_t The type used for the layers. Can be either
45  Layer<Architecture> or SharedWeightLayer<Architecture>.
46  datatypes for a given architecture.
47 */
48 template<typename Architecture_t, typename Layer_t = TLayer<Architecture_t>>
49  class TNet {
50 
51 public:
52  using Matrix_t = typename Architecture_t::Matrix_t;
53  using Scalar_t = typename Architecture_t::Scalar_t;
54  using LayerIterator_t = typename std::vector<Layer_t>::iterator;
55 
56 private:
57  size_t fBatchSize; ///< Batch size for training and evaluation of the Network.
58  size_t fInputWidth; ///< Number of features in a single input event.
59 
60  std::vector<Layer_t> fLayers; ///< Layers in the network.
61 
62  Matrix_t fDummy; ///< Empty matrix for last step in back propagation.
63  ELossFunction fJ; ///< The loss function of the network.
64  ERegularization fR; ///< The regularization used for the network.
65  Scalar_t fWeightDecay; ///< The weight decay factor.
66 
67 public:
68  TNet();
69  TNet(const TNet & other);
70  template<typename OtherArchitecture_t>
71  TNet(size_t batchSize, const TNet<OtherArchitecture_t> &);
72  /*! Construct a neural net for a given batch size with
73  * given output function * and regularization. */
74  TNet(size_t batchSize,
75  size_t inputWidth,
76  ELossFunction fJ,
78  Scalar_t fWeightDecay = 0.0);
79  /*! Create a clone that uses the same weight and biases matrices but
80  * potentially a difference batch size. */
82 
83  /*! Add a layer of the given size to the neural net. */
84  void AddLayer(size_t width, EActivationFunction f,
85  Scalar_t dropoutProbability = 1.0);
86 
87  /*! Remove all layers from the network.*/
88  void Clear();
89 
90  /*! Add a layer which shares its weights with another TNet instance. */
91  template <typename SharedLayer>
92  void AddLayer(SharedLayer & layer);
93 
94  /*! Iterator to the first layer of the net. */
96 
97  /*! Iterator to the last layer of the net. */
99 
100  /*! Initialize the weights in the net with the
101  * initialization method. */
102  inline void Initialize(EInitialization m);
103 
104  /*! Initialize the gradients in the net to zero. Required if net is
105  * used to store velocities of momentum-based minimization techniques. */
106  inline void InitializeGradients();
107 
108  /*! Forward a given input through the neural net. Computes
109  * all layer activations up to the output layer */
110  inline void Forward(Matrix_t& X, bool applyDropout = false);
111 
112  /*! Compute the weight gradients in the net from the given training
113  * samples X and training labels Y. */
114  inline void Backward(const Matrix_t &X, const Matrix_t &Y);
115 
116  /*! Evaluate the loss function of the net using the activations
117  * that are currently stored in the output layer. */
118  inline Scalar_t Loss(const Matrix_t &Y, bool includeRegularization = true) const;
119 
120  /*! Propagate the input batch X through the net and evaluate the
121  * error function for the resulting activations of the output
122  * layer */
123  inline Scalar_t Loss(Matrix_t &X, const Matrix_t &Y, bool applyDropout = false);
124 
125  /*! Compute the neural network predictionion obtained from forwarding the
126  * batch X through the neural network and applying the output function
127  * f to the activation of the last layer in the network. */
128  inline void Prediction(Matrix_t &Y_hat, Matrix_t &X, EOutputFunction f);
129 
130  /*! Compute the neural network rediction obtained from applying the output
131  * function f to the activation of the last layer in the network. */
132  inline void Prediction(Matrix_t &Y_hat, EOutputFunction f) const;
133 
135 
136  size_t GetDepth() const {return fLayers.size();}
137  size_t GetBatchSize() const {return fBatchSize;}
138  Layer_t & GetLayer(size_t i) {return fLayers[i];}
139  const Layer_t & GetLayer(size_t i) const {return fLayers[i];}
140  ELossFunction GetLossFunction() const {return fJ;}
141  Matrix_t & GetOutput() {return fLayers.back().GetOutput();}
142  size_t GetInputWidth() const {return fInputWidth;}
143  size_t GetOutputWidth() const {return fLayers.back().GetWidth();}
146 
147  void SetBatchSize(size_t batchSize) {fBatchSize = batchSize;}
148  void SetInputWidth(size_t inputWidth) {fInputWidth = inputWidth;}
150  void SetLossFunction(ELossFunction J) {fJ = J;}
152  void SetDropoutProbabilities(const std::vector<Double_t> & probabilities);
153 
154  void Print();
155 };
156 
157 //______________________________________________________________________________
158 template<typename Architecture_t, typename Layer_t>
160  : fBatchSize(0), fInputWidth(0), fLayers(), fDummy(0,0),
162  fWeightDecay(0.0)
163 {
164  // Nothing to do here.
165 }
166 
167 //______________________________________________________________________________
168 template<typename Architecture_t, typename Layer_t>
170  : fBatchSize(other.fBatchSize), fInputWidth(other.fInputWidth),
171  fLayers(other.fLayers), fDummy(0,0), fJ(other.fJ), fR(other.fR),
172  fWeightDecay(other.fWeightDecay)
173 {
174  // Nothing to do here.
175 }
176 
177 //______________________________________________________________________________
178 template<typename Architecture_t, typename Layer_t>
179 template<typename OtherArchitecture_t>
181  const TNet<OtherArchitecture_t> & other)
182  : fBatchSize(batchSize), fInputWidth(other.GetInputWidth()), fLayers(),
183  fDummy(0,0), fJ(other.GetLossFunction()), fR(other.GetRegularization()),
184  fWeightDecay(other.GetWeightDecay())
185 {
186  fLayers.reserve(other.GetDepth());
187  for (size_t i = 0; i < other.GetDepth(); i++) {
188  AddLayer(other.GetLayer(i).GetWidth(),
189  other.GetLayer(i).GetActivationFunction(),
190  other.GetLayer(i).GetDropoutProbability());
191  fLayers[i].GetWeights() = (TMatrixT<Double_t>) other.GetLayer(i).GetWeights();
192  fLayers[i].GetBiases() = (TMatrixT<Double_t>) other.GetLayer(i).GetBiases();
193  }
194 }
195 
196 //______________________________________________________________________________
197 template<typename Architecture_t, typename Layer_t>
199  size_t inputWidth,
200  ELossFunction J,
203  : fBatchSize(batchSize), fInputWidth(inputWidth), fLayers(), fDummy(0,0),
204  fJ(J), fR(R), fWeightDecay(weightDecay)
205 {
206  // Nothing to do here.
207 }
208 
209 //______________________________________________________________________________
210 template<typename Architecture_t, typename Layer_t>
213 {
214  TNet<Architecture_t, TSharedLayer<Architecture_t>> other(BatchSize, fInputWidth,
215  fJ, fR);
216  for (auto &l : fLayers) {
217  other.AddLayer(l);
218  }
219  return other;
220 }
221 
222 //______________________________________________________________________________
223 template<typename Architecture_t, typename Layer_t>
226  Scalar_t dropoutProbability)
227 {
228  if (fLayers.size() == 0) {
229  fLayers.emplace_back(fBatchSize, fInputWidth, width, f, dropoutProbability);
230  } else {
231  size_t prevWidth = fLayers.back().GetWidth();
232  fLayers.emplace_back(fBatchSize, prevWidth, width, f, dropoutProbability);
233  }
234 }
235 
236 //______________________________________________________________________________
237 template<typename Architecture_t, typename Layer_t>
239 {
240  fLayers.clear();
241 }
242 
243 //______________________________________________________________________________
244 template<typename Architecture_t, typename Layer_t>
245  template<typename SharedLayer_t>
246  inline void TNet<Architecture_t, Layer_t>::AddLayer(SharedLayer_t & layer)
247 {
248  fLayers.emplace_back(fBatchSize, layer);
249 }
250 
251 //______________________________________________________________________________
252 template<typename Architecture_t, typename Layer_t>
254 {
255  for (auto &l : fLayers) {
256  l.Initialize(m);
257  }
258 }
259 
260 //______________________________________________________________________________
261 template<typename Architecture_t, typename Layer_t>
263 {
264  for (auto &l : fLayers) {
265  initialize<Architecture_t>(l.GetWeightGradients(), EInitialization::kZero);
266  initialize<Architecture_t>(l.GetBiasGradients(), EInitialization::kZero);
267  }
268 }
269 
270 //______________________________________________________________________________
271 template<typename Architecture_t, typename Layer_t>
273  bool applyDropout)
274 {
275  fLayers.front().Forward(input, applyDropout);
276 
277  for (size_t i = 1; i < fLayers.size(); i++) {
278  fLayers[i].Forward(fLayers[i-1].GetOutput(), applyDropout);
279  }
280 }
281 
282 //______________________________________________________________________________
283 template<typename Architecture_t, typename Layer_t>
285  const Matrix_t &Y)
286 {
287 
288  evaluateGradients<Architecture_t>(fLayers.back().GetActivationGradients(),
289  fJ, Y, fLayers.back().GetOutput());
290 
291  for (size_t i = fLayers.size()-1; i > 0; i--) {
292  auto & activation_gradient_backward
293  = fLayers[i-1].GetActivationGradients();
294  auto & activations_backward
295  = fLayers[i-1].GetOutput();
296  fLayers[i].Backward(activation_gradient_backward,
297  activations_backward, fR, fWeightDecay);
298  }
299  fLayers[0].Backward(fDummy, X, fR, fWeightDecay);
300 
301 }
302 
303 //______________________________________________________________________________
304 template<typename Architecture_t, typename Layer_t>
306  bool includeRegularization) const
307  -> Scalar_t
308 {
309  auto loss = evaluate<Architecture_t>(fJ, Y, fLayers.back().GetOutput());
310  includeRegularization &= (fR != ERegularization::kNone);
311  if (includeRegularization) {
312  for (auto &l : fLayers) {
313  loss += fWeightDecay * regularization<Architecture_t>(l.GetWeights(), fR);
314  }
315  }
316  return loss;
317 }
318 
319 //______________________________________________________________________________
320 template<typename Architecture_t, typename Layer_t>
322  const Matrix_t &Y,
323  bool applyDropout)
324  -> Scalar_t
325 {
326  Forward(X, applyDropout);
327  return Loss(Y);
328 }
329 
330 //______________________________________________________________________________
331 template<typename Architecture_t, typename Layer_t>
333  Matrix_t &X,
335 {
336  Forward(X, false);
337  evaluate<Architecture_t>(Yhat, f, fLayers.back().GetOutput());
338 }
339 
340 //______________________________________________________________________________
341 template<typename Architecture_t, typename Layer_t>
343  EOutputFunction f) const
344 {
345  evaluate<Architecture_t>(Y_hat, f, fLayers.back().GetOutput());
346 }
347 
348 //______________________________________________________________________________
349 template<typename Architecture_t, typename Layer_t>
351  -> Scalar_t
352 {
353  Scalar_t flops = 0;
354 
355  Scalar_t nb = (Scalar_t) fBatchSize;
356  Scalar_t nlp = (Scalar_t) fInputWidth;
357 
358  for(size_t i = 0; i < fLayers.size(); i++) {
359  Layer_t & layer = fLayers[i];
360  Scalar_t nl = (Scalar_t) layer.GetWidth();
361 
362  // Forward propagation.
363  flops += nb * nl * (2.0 * nlp - 1); // Matrix mult.
364  flops += nb * nl; // Add bias values.
365  flops += 2 * nb * nl; // Apply activation function and compute
366  // derivative.
367  // Backward propagation.
368  flops += nb * nl; // Hadamard
369  flops += nlp * nl * (2.0 * nb - 1.0); // Weight gradients
370  flops += nl * (nb - 1); // Bias gradients
371  if (i > 0) {
372  flops += nlp * nb * (2.0 * nl - 1.0); // Previous layer gradients.
373  }
374  nlp = nl;
375  }
376  return flops;
377 }
378 
379 //______________________________________________________________________________
380 template<typename Architecture_t, typename Layer_t>
382  const std::vector<Double_t> & probabilities)
383 {
384  for (size_t i = 0; i < fLayers.size(); i++) {
385  if (i < probabilities.size()) {
386  fLayers[i].SetDropoutProbability(probabilities[i]);
387  } else {
388  fLayers[i].SetDropoutProbability(1.0);
389  }
390  }
391 }
392 
393 //______________________________________________________________________________
394 template<typename Architecture_t, typename Layer_t>
396 {
397  std::cout << "DEEP NEURAL NETWORK:";
398  std::cout << " Loss function = " << static_cast<char>(fJ);
399  std::cout << ", Depth = " << fLayers.size() << std::endl;
400 
401  size_t i = 1;
402  for (auto & l : fLayers) {
403  std::cout << "DNN Layer " << i << ":" << std::endl;
404  l.Print();
405  i++;
406  }
407 
408 }
409 
410 } // namespace DNN
411 } // namespace TMVA
412 
413 #endif
Matrix_t & GetOutput()
Definition: Net.h:141
Matrix_t fDummy
Empty matrix for last step in back propagation.
Definition: Net.h:62
void SetDropoutProbabilities(const std::vector< Double_t > &probabilities)
Definition: Net.h:381
Scalar_t GetNFlops()
Definition: Net.h:350
void Print()
Definition: Net.h:395
LayerIterator_t LayersBegin()
Iterator to the first layer of the net.
Definition: Net.h:95
std::vector< Layer_t > fLayers
Layers in the network.
Definition: Net.h:60
void Backward(const Matrix_t &X, const Matrix_t &Y)
Compute the weight gradients in the net from the given training samples X and training labels Y...
Definition: Net.h:284
void SetWeightDecay(Scalar_t weightDecay)
Definition: Net.h:151
ELossFunction fJ
The loss function of the network.
Definition: Net.h:63
Scalar_t GetWeightDecay() const
Definition: Net.h:145
EInitialization
Definition: Functions.h:70
void AddLayer(size_t width, EActivationFunction f, Scalar_t dropoutProbability=1.0)
Add a layer of the given size to the neural net.
Definition: Net.h:224
size_t fInputWidth
Number of features in a single input event.
Definition: Net.h:58
void Initialize(EInitialization m)
Initialize the weights in the net with the initialization method.
Definition: Net.h:253
double weightDecay(double error, ItWeight itWeight, ItWeight itWeightEnd, double factorWeightDecay, EnumRegularization eRegularization)
compute the weight decay for regularization (L1 or L2)
Definition: NeuralNet.icc:496
typename std::vector< TLayer< Architecture_t > >::iterator LayerIterator_t
Definition: Net.h:54
Generic neural network class.
Definition: Net.h:49
void Forward(Matrix_t &X, bool applyDropout=false)
Forward a given input through the neural net.
Definition: Net.h:272
size_t fBatchSize
Batch size for training and evaluation of the Network.
Definition: Net.h:57
Scalar_t fWeightDecay
The weight decay factor.
Definition: Net.h:65
LayerIterator_t LayersEnd()
Iterator to the last layer of the net.
Definition: Net.h:98
void Initialize(Bool_t useTMVAStyle=kTRUE)
Definition: tmvaglob.cxx:176
typename Architecture_t::Matrix_t Matrix_t
Definition: Net.h:52
ERegularization GetRegularization() const
Definition: Net.h:144
void SetRegularization(ERegularization R)
Definition: Net.h:149
TMarker * m
Definition: textangle.C:8
void Clear()
Remove all layers from the network.
Definition: Net.h:238
const Handle_t kNone
Definition: GuiTypes.h:87
size_t GetDepth() const
Definition: Net.h:136
TLine * l
Definition: textangle.C:4
virtual void Print(Option_t *option="") const
Dump this line with its attributes.
Definition: TLine.cxx:398
TNet< Architecture_t, TSharedLayer< Architecture_t > > CreateClone(size_t batchSize)
Create a clone that uses the same weight and biases matrices but potentially a difference batch size...
Definition: Net.h:211
size_t GetOutputWidth() const
Definition: Net.h:143
typename Architecture_t::Scalar_t Scalar_t
Definition: Net.h:53
size_t GetInputWidth() const
Definition: Net.h:142
double f(double x)
void Print(std::ostream &os, const OptionType &opt)
ERegularization fR
The regularization used for the network.
Definition: Net.h:64
EOutputFunction
Enum that represents output functions.
Definition: Functions.h:43
ELossFunction
Enum that represents objective functions for the net, i.e.
Definition: Functions.h:54
Scalar_t Loss(const Matrix_t &Y, bool includeRegularization=true) const
Evaluate the loss function of the net using the activations that are currently stored in the output l...
Definition: Net.h:305
Abstract ClassifierFactory template that handles arbitrary types.
void InitializeGradients()
Initialize the gradients in the net to zero.
Definition: Net.h:262
void SetLossFunction(ELossFunction J)
Definition: Net.h:150
void Prediction(Matrix_t &Y_hat, Matrix_t &X, EOutputFunction f)
Compute the neural network predictionion obtained from forwarding the batch X through the neural netw...
Definition: Net.h:332
void SetInputWidth(size_t inputWidth)
Definition: Net.h:148
ERegularization
Enum representing the regularization type applied for a given layer.
Definition: Functions.h:62
EActivationFunction
Enum that represents layer activation functions.
Definition: Functions.h:31
size_t GetBatchSize() const
Definition: Net.h:137
TRandom3 R
a TMatrixD.
Definition: testIO.cxx:28
Layer_t & GetLayer(size_t i)
Definition: Net.h:138
void SetBatchSize(size_t batchSize)
Definition: Net.h:147
const Layer_t & GetLayer(size_t i) const
Definition: Net.h:139
ELossFunction GetLossFunction() const
Definition: Net.h:140