Logo ROOT   6.07/09
Reference Guide
Net.h
Go to the documentation of this file.
1 // @(#)root/tmva: $Id$
2 // Author: Simon Pfreundschuh 20/06/16
3 
4 /*************************************************************************
5  * Copyright (C) 2016, Simon Pfreundschuh *
6  * All rights reserved. *
7  * *
8  * For the licensing terms see $ROOTSYS/LICENSE. *
9  * For the list of contributors see $ROOTSYS/README/CREDITS. *
10  *************************************************************************/
11 
12 #ifndef TMVA_DNN_NET
13 #define TMVA_DNN_NET
14 
15 #include <vector>
16 #include <iostream>
17 
18 #include "Layer.h"
19 
20 namespace TMVA {
21 namespace DNN {
22 
23 /** \class TNet
24 
25  Generic neural network class.
26 
27  This generic neural network class represents a concrete neural
28  network through a vector of layers and coordinates the forward
29  and backward propagation through the net.
30 
31  The net takes as input a batch from the training data given in
32  matrix form, with each row corresponding to a certain training
33  event.
34 
35  On construction, the neural network allocates all the memory
36  required for the training of the neural net and keeps it until
37  its destruction.
38 
39  The Architecture type argument simply holds the
40  architecture-specific data types, which are just the matrix type
41  Matrix_t and the used scalar type Scalar_t.
42 
43  \tparam Architecture The Architecture type that holds the
44  \tparam Layer_t The type used for the layers. Can be either
45  Layer<Architecture> or SharedWeightLayer<Architecture>.
46  datatypes for a given architecture.
47 */
48 template<typename Architecture_t, typename Layer_t = TLayer<Architecture_t>>
49  class TNet {
50 
51 public:
52  using Matrix_t = typename Architecture_t::Matrix_t;
53  using Scalar_t = typename Architecture_t::Scalar_t;
54  using LayerIterator_t = typename std::vector<Layer_t>::iterator;
55 
56 private:
57  size_t fBatchSize; ///< Batch size for training and evaluation of the Network.
58  size_t fInputWidth; ///< Number of features in a single input event.
59 
60  std::vector<Layer_t> fLayers; ///< Layers in the network.
61 
62  Matrix_t fDummy; ///< Empty matrix for last step in back propagation.
63  ELossFunction fJ; ///< The loss function of the network.
64  ERegularization fR; ///< The regularization used for the network.
65  Scalar_t fWeightDecay; ///< The weight decay factor.
66 
67 public:
68  TNet();
69  TNet(const TNet & other);
70  template<typename OtherArchitecture_t>
71  TNet(size_t batchSize, const TNet<OtherArchitecture_t> &);
72  /*! Construct a neural net for a given batch size with
73  * given output function * and regularization. */
74  TNet(size_t batchSize,
75  size_t inputWidth,
76  ELossFunction fJ,
78  Scalar_t fWeightDecay = 0.0);
79  /*! Create a clone that uses the same weight and biases matrices but
80  * potentially a difference batch size. */
82 
83  /*! Add a layer of the given size to the neural net. */
84  void AddLayer(size_t width, EActivationFunction f,
85  Scalar_t dropoutProbability = 1.0);
86 
87  /*! Remove all layers from the network.*/
88  void Clear();
89 
90  /*! Add a layer which shares its weights with another TNet instance. */
91  template <typename SharedLayer>
92  void AddLayer(SharedLayer & layer);
93 
94  /*! Iterator to the first layer of the net. */
96 
97  /*! Iterator to the last layer of the net. */
99 
100  /*! Initialize the weights in the net with the
101  * initialization method. */
102  inline void Initialize(EInitialization m);
103 
104  /*! Initialize the gradients in the net to zero. Required if net is
105  * used to store velocities of momentum-based minimization techniques. */
106  inline void InitializeGradients();
107 
108  /*! Forward a given input through the neural net. Computes
109  * all layer activations up to the output layer */
110  inline void Forward(Matrix_t& X, bool applyDropout = false);
111 
112  /*! Compute the weight gradients in the net from the given training
113  * samples X and training labels Y. */
114  inline void Backward(const Matrix_t &X, const Matrix_t &Y);
115 
116  /*! Evaluate the loss function of the net using the activations
117  * that are currently stored in the output layer. */
118  inline Scalar_t Loss(const Matrix_t &Y) const;
119 
120  /*! Propagate the input batch X through the net and evaluate the
121  * error function for the resulting activations of the output
122  * layer */
123  inline Scalar_t Loss(Matrix_t &X, const Matrix_t &Y, bool applyDropout = false);
124 
125  /*! Compute the neural network predictionion obtained from forwarding the
126  * batch X through the neural network and applying the output function
127  * f to the activation of the last layer in the network. */
128  inline void Prediction(Matrix_t &Y_hat, Matrix_t &X, EOutputFunction f);
129 
130  /*! Compute the neural network rediction obtained from applying the output
131  * function f to the activation of the last layer in the network. */
132  inline void Prediction(Matrix_t &Y_hat, EOutputFunction f) const;
133 
135 
136  size_t GetDepth() const {return fLayers.size();}
137  size_t GetBatchSize() const {return fBatchSize;}
138  Layer_t & GetLayer(size_t i) {return fLayers[i];}
139  const Layer_t & GetLayer(size_t i) const {return fLayers[i];}
140  ELossFunction GetLossFunction() const {return fJ;}
141  Matrix_t & GetOutput() {return fLayers.back().GetOutput();}
142  size_t GetInputWidth() const {return fInputWidth;}
143  size_t GetOutputWidth() const {return fLayers.back().GetWidth();}
146 
147  void SetBatchSize(size_t batchSize) {fBatchSize = batchSize;}
148  void SetInputWidth(size_t inputWidth) {fInputWidth = inputWidth;}
150  void SetLossFunction(ELossFunction J) {fJ = J;}
152  void SetDropoutProbabilities(const std::vector<Double_t> & probabilities);
153 
154  void Print();
155 };
156 
157 //______________________________________________________________________________
158 template<typename Architecture_t, typename Layer_t>
160  : fBatchSize(0), fInputWidth(0), fLayers(), fDummy(0,0),
162  fWeightDecay(0.0)
163 {
164  // Nothing to do here.
165 }
166 
167 //______________________________________________________________________________
168 template<typename Architecture_t, typename Layer_t>
170  : fBatchSize(other.fBatchSize), fInputWidth(other.fInputWidth),
171  fLayers(other.fLayers), fDummy(0,0), fJ(other.fJ), fR(other.fR),
172  fWeightDecay(other.fWeightDecay)
173 {
174  // Nothing to do here.
175 }
176 
177 //______________________________________________________________________________
178 template<typename Architecture_t, typename Layer_t>
179 template<typename OtherArchitecture_t>
181  const TNet<OtherArchitecture_t> & other)
182  : fBatchSize(batchSize), fInputWidth(other.GetInputWidth()), fLayers(),
183  fDummy(0,0), fJ(other.GetLossFunction()), fR(other.GetRegularization()),
184  fWeightDecay(other.GetWeightDecay())
185 {
186  fLayers.reserve(other.GetDepth());
187  for (size_t i = 0; i < other.GetDepth(); i++) {
188  AddLayer(other.GetLayer(i).GetWidth(),
189  other.GetLayer(i).GetActivationFunction(),
190  other.GetLayer(i).GetDropoutProbability());
191  fLayers[i].GetWeights() = (TMatrixT<Double_t>) other.GetLayer(i).GetWeights();
192  fLayers[i].GetBiases() = (TMatrixT<Double_t>) other.GetLayer(i).GetBiases();
193  }
194 }
195 
196 //______________________________________________________________________________
197 template<typename Architecture_t, typename Layer_t>
199  size_t inputWidth,
200  ELossFunction J,
203  : fBatchSize(batchSize), fInputWidth(inputWidth), fLayers(), fDummy(0,0),
204  fJ(J), fR(R), fWeightDecay(weightDecay)
205 {
206  // Nothing to do here.
207 }
208 
209 //______________________________________________________________________________
210 template<typename Architecture_t, typename Layer_t>
213 {
214  TNet<Architecture_t, TSharedLayer<Architecture_t>> other(BatchSize, fInputWidth,
215  fJ, fR);
216  for (auto &l : fLayers) {
217  other.AddLayer(l);
218  }
219  return other;
220 }
221 
222 //______________________________________________________________________________
223 template<typename Architecture_t, typename Layer_t>
226  Scalar_t dropoutProbability)
227 {
228  if (fLayers.size() == 0) {
229  fLayers.emplace_back(fBatchSize, fInputWidth, width, f, dropoutProbability);
230  } else {
231  size_t prevWidth = fLayers.back().GetWidth();
232  fLayers.emplace_back(fBatchSize, prevWidth, width, f, dropoutProbability);
233  }
234 }
235 
236 //______________________________________________________________________________
237 template<typename Architecture_t, typename Layer_t>
239 {
240  fLayers.clear();
241 }
242 
243 //______________________________________________________________________________
244 template<typename Architecture_t, typename Layer_t>
245  template<typename SharedLayer_t>
246  inline void TNet<Architecture_t, Layer_t>::AddLayer(SharedLayer_t & layer)
247 {
248  fLayers.emplace_back(fBatchSize, layer);
249 }
250 
251 //______________________________________________________________________________
252 template<typename Architecture_t, typename Layer_t>
254 {
255  for (auto &l : fLayers) {
256  l.Initialize(m);
257  }
258 }
259 
260 //______________________________________________________________________________
261 template<typename Architecture_t, typename Layer_t>
263 {
264  for (auto &l : fLayers) {
265  initialize<Architecture_t>(l.GetWeightGradients(), EInitialization::kZero);
266  initialize<Architecture_t>(l.GetBiasGradients(), EInitialization::kZero);
267  }
268 }
269 
270 //______________________________________________________________________________
271 template<typename Architecture_t, typename Layer_t>
273  bool applyDropout)
274 {
275  fLayers.front().Forward(input, applyDropout);
276 
277  for (size_t i = 1; i < fLayers.size(); i++) {
278  fLayers[i].Forward(fLayers[i-1].GetOutput(), applyDropout);
279  }
280 }
281 
282 //______________________________________________________________________________
283 template<typename Architecture_t, typename Layer_t>
285  const Matrix_t &Y)
286 {
287 
288  evaluateGradients<Architecture_t>(fLayers.back().GetActivationGradients(),
289  fJ, Y, fLayers.back().GetOutput());
290 
291  for (size_t i = fLayers.size()-1; i > 0; i--) {
292  auto & activation_gradient_backward
293  = fLayers[i-1].GetActivationGradients();
294  auto & activations_backward
295  = fLayers[i-1].GetOutput();
296  fLayers[i].Backward(activation_gradient_backward,
297  activations_backward, fR, fWeightDecay);
298  }
299  fLayers[0].Backward(fDummy, X, fR, fWeightDecay);
300 
301 }
302 
303 //______________________________________________________________________________
304 template<typename Architecture_t, typename Layer_t>
306  -> Scalar_t
307 {
308  auto loss = evaluate<Architecture_t>(fJ, Y, fLayers.back().GetOutput());
309  for (auto &l : fLayers) {
310  loss += fWeightDecay * regularization<Architecture_t>(l.GetWeights(), fR);
311  }
312  return loss;
313 }
314 
315 //______________________________________________________________________________
316 template<typename Architecture_t, typename Layer_t>
318  const Matrix_t &Y,
319  bool applyDropout)
320  -> Scalar_t
321 {
322  Forward(X, applyDropout);
323  return Loss(Y);
324 }
325 
326 //______________________________________________________________________________
327 template<typename Architecture_t, typename Layer_t>
329  Matrix_t &X,
331 {
332  Forward(X, false);
333  evaluate<Architecture_t>(Yhat, f, fLayers.back().GetOutput());
334 }
335 
336 //______________________________________________________________________________
337 template<typename Architecture_t, typename Layer_t>
339  EOutputFunction f) const
340 {
341  evaluate<Architecture_t>(Y_hat, f, fLayers.back().GetOutput());
342 }
343 
344 //______________________________________________________________________________
345 template<typename Architecture_t, typename Layer_t>
347  -> Scalar_t
348 {
349  Scalar_t flops = 0;
350 
351  Scalar_t nb = (Scalar_t) fBatchSize;
352  Scalar_t nlp = (Scalar_t) fInputWidth;
353 
354  for(size_t i = 0; i < fLayers.size(); i++) {
355  Layer_t & layer = fLayers[i];
356  Scalar_t nl = (Scalar_t) layer.GetWidth();
357 
358  // Forward propagation.
359  flops += nb * nl * (2.0 * nlp - 1); // Matrix mult.
360  flops += nb * nl; // Add bias values.
361  flops += 2 * nb * nl; // Apply activation function and compute
362  // derivative.
363  // Backward propagation.
364  flops += nb * nl; // Hadamard
365  flops += nlp * nl * (2.0 * nb - 1.0); // Weight gradients
366  flops += nl * (nb - 1); // Bias gradients
367  if (i > 0) {
368  flops += nlp * nb * (2.0 * nl - 1.0); // Previous layer gradients.
369  }
370  nlp = nl;
371  }
372  return flops;
373 }
374 
375 //______________________________________________________________________________
376 template<typename Architecture_t, typename Layer_t>
378  const std::vector<Double_t> & probabilities)
379 {
380  for (size_t i = 0; i < fLayers.size(); i++) {
381  if (i < probabilities.size()) {
382  fLayers[i].SetDropoutProbability(probabilities[i]);
383  } else {
384  fLayers[i].SetDropoutProbability(1.0);
385  }
386  }
387 }
388 
389 //______________________________________________________________________________
390 template<typename Architecture_t, typename Layer_t>
392 {
393  std::cout << "DEEP NEURAL NETWORK:";
394  std::cout << " Loss function = " << static_cast<char>(fJ);
395  std::cout << ", Depth = " << fLayers.size() << std::endl;
396 
397  size_t i = 1;
398  for (auto & l : fLayers) {
399  std::cout << "DNN Layer " << i << ":" << std::endl;
400  l.Print();
401  i++;
402  }
403 
404 }
405 
406 } // namespace DNN
407 } // namespace TMVA
408 
409 #endif
Matrix_t & GetOutput()
Definition: Net.h:141
size_t GetOutputWidth() const
Definition: Net.h:143
virtual void Print(Option_t *option="") const
Dump this line with its attributes.
Definition: TLine.cxx:398
Scalar_t Loss(const Matrix_t &Y) const
Evaluate the loss function of the net using the activations that are currently stored in the output l...
Definition: Net.h:305
Matrix_t fDummy
Empty matrix for last step in back propagation.
Definition: Net.h:62
void SetDropoutProbabilities(const std::vector< Double_t > &probabilities)
Definition: Net.h:377
const Layer_t & GetLayer(size_t i) const
Definition: Net.h:139
size_t GetBatchSize() const
Definition: Net.h:137
Scalar_t GetNFlops()
Definition: Net.h:346
void Print()
Definition: Net.h:391
LayerIterator_t LayersBegin()
Iterator to the first layer of the net.
Definition: Net.h:95
std::vector< Layer_t > fLayers
Layers in the network.
Definition: Net.h:60
void Backward(const Matrix_t &X, const Matrix_t &Y)
Compute the weight gradients in the net from the given training samples X and training labels Y...
Definition: Net.h:284
ELossFunction GetLossFunction() const
Definition: Net.h:140
void SetWeightDecay(Scalar_t weightDecay)
Definition: Net.h:151
ELossFunction fJ
The loss function of the network.
Definition: Net.h:63
size_t GetDepth() const
Definition: Net.h:136
EInitialization
Definition: Functions.h:68
void AddLayer(size_t width, EActivationFunction f, Scalar_t dropoutProbability=1.0)
Add a layer of the given size to the neural net.
Definition: Net.h:224
size_t fInputWidth
Number of features in a single input event.
Definition: Net.h:58
void Initialize(EInitialization m)
Initialize the weights in the net with the initialization method.
Definition: Net.h:253
double weightDecay(double error, ItWeight itWeight, ItWeight itWeightEnd, double factorWeightDecay, EnumRegularization eRegularization)
compute the weight decay for regularization (L1 or L2)
Definition: NeuralNet.icc:491
typename std::vector< TLayer< Architecture_t > >::iterator LayerIterator_t
Definition: Net.h:54
Generic neural network class.
Definition: Net.h:49
void Forward(Matrix_t &X, bool applyDropout=false)
Forward a given input through the neural net.
Definition: Net.h:272
size_t fBatchSize
Batch size for training and evaluation of the Network.
Definition: Net.h:57
Scalar_t fWeightDecay
The weight decay factor.
Definition: Net.h:65
LayerIterator_t LayersEnd()
Iterator to the last layer of the net.
Definition: Net.h:98
void Initialize(Bool_t useTMVAStyle=kTRUE)
Definition: tmvaglob.cxx:176
typename Architecture_t::Matrix_t Matrix_t
Definition: Net.h:52
void SetRegularization(ERegularization R)
Definition: Net.h:149
ERegularization GetRegularization() const
Definition: Net.h:144
TMarker * m
Definition: textangle.C:8
void Clear()
Remove all layers from the network.
Definition: Net.h:238
size_t GetInputWidth() const
Definition: Net.h:142
TLine * l
Definition: textangle.C:4
TNet< Architecture_t, TSharedLayer< Architecture_t > > CreateClone(size_t batchSize)
Create a clone that uses the same weight and biases matrices but potentially a difference batch size...
Definition: Net.h:211
typename Architecture_t::Scalar_t Scalar_t
Definition: Net.h:53
double f(double x)
void Print(std::ostream &os, const OptionType &opt)
ERegularization fR
The regularization used for the network.
Definition: Net.h:64
EOutputFunction
Enum that represents output functions.
Definition: Functions.h:43
ELossFunction
Enum that represents objective functions for the net, i.e.
Definition: Functions.h:53
Scalar_t GetWeightDecay() const
Definition: Net.h:145
Abstract ClassifierFactory template that handles arbitrary types.
void InitializeGradients()
Initialize the gradients in the net to zero.
Definition: Net.h:262
void SetLossFunction(ELossFunction J)
Definition: Net.h:150
void Prediction(Matrix_t &Y_hat, Matrix_t &X, EOutputFunction f)
Compute the neural network predictionion obtained from forwarding the batch X through the neural netw...
Definition: Net.h:328
void SetInputWidth(size_t inputWidth)
Definition: Net.h:148
ERegularization
Enum representing the regularization type applied for a given layer.
Definition: Functions.h:60
EActivationFunction
Enum that represents layer activation functions.
Definition: Functions.h:31
TRandom3 R
a TMatrixD.
Definition: testIO.cxx:28
Layer_t & GetLayer(size_t i)
Definition: Net.h:138
void SetBatchSize(size_t batchSize)
Definition: Net.h:147