Logo ROOT   6.12/07
Reference Guide
Go to the documentation of this file.
1 // @(#)root/tmva: $Id$
2 // Author: Simon Pfreundschuh 20/06/16
4 /*************************************************************************
5  * Copyright (C) 2016, Simon Pfreundschuh *
6  * All rights reserved. *
7  * *
8  * For the licensing terms see $ROOTSYS/LICENSE. *
9  * For the list of contributors see $ROOTSYS/README/CREDITS. *
10  *************************************************************************/
12 #ifndef TMVA_DNN_NET
13 #define TMVA_DNN_NET
15 #include <vector>
16 #include <iostream>
18 #include "Layer.h"
20 namespace TMVA {
21 namespace DNN {
23 /** \class TNet
25  Generic neural network class.
27  This generic neural network class represents a concrete neural
28  network through a vector of layers and coordinates the forward
29  and backward propagation through the net.
31  The net takes as input a batch from the training data given in
32  matrix form, with each row corresponding to a certain training
33  event.
35  On construction, the neural network allocates all the memory
36  required for the training of the neural net and keeps it until
37  its destruction.
39  The Architecture type argument simply holds the
40  architecture-specific data types, which are just the matrix type
41  Matrix_t and the used scalar type Scalar_t.
43  \tparam Architecture The Architecture type that holds the
44  \tparam Layer_t The type used for the layers. Can be either
45  Layer<Architecture> or SharedWeightLayer<Architecture>.
46  datatypes for a given architecture.
47 */
48 template<typename Architecture_t, typename Layer_t = TLayer<Architecture_t>>
49  class TNet {
51 public:
52  using Matrix_t = typename Architecture_t::Matrix_t;
53  using Scalar_t = typename Architecture_t::Scalar_t;
54  using LayerIterator_t = typename std::vector<Layer_t>::iterator;
56 private:
57  size_t fBatchSize; ///< Batch size for training and evaluation of the Network.
58  size_t fInputWidth; ///< Number of features in a single input event.
60  std::vector<Layer_t> fLayers; ///< Layers in the network.
62  Matrix_t fDummy; ///< Empty matrix for last step in back propagation.
63  ELossFunction fJ; ///< The loss function of the network.
64  ERegularization fR; ///< The regularization used for the network.
65  Scalar_t fWeightDecay; ///< The weight decay factor.
67 public:
68  TNet();
69  TNet(const TNet & other);
70  template<typename OtherArchitecture_t>
71  TNet(size_t batchSize, const TNet<OtherArchitecture_t> &);
72  /*! Construct a neural net for a given batch size with
73  * given output function * and regularization. */
74  TNet(size_t batchSize,
75  size_t inputWidth,
76  ELossFunction fJ,
78  Scalar_t fWeightDecay = 0.0);
79  /*! Create a clone that uses the same weight and biases matrices but
80  * potentially a difference batch size. */
83  /*! Add a layer of the given size to the neural net. */
84  void AddLayer(size_t width, EActivationFunction f,
85  Scalar_t dropoutProbability = 1.0);
87  /*! Remove all layers from the network.*/
88  void Clear();
90  /*! Add a layer which shares its weights with another TNet instance. */
91  template <typename SharedLayer>
92  void AddLayer(SharedLayer & layer);
94  /*! Iterator to the first layer of the net. */
97  /*! Iterator to the last layer of the net. */
100  /*! Initialize the weights in the net with the
101  * initialization method. */
102  inline void Initialize(EInitialization m);
104  /*! Initialize the gradients in the net to zero. Required if net is
105  * used to store velocities of momentum-based minimization techniques. */
106  inline void InitializeGradients();
108  /*! Forward a given input through the neural net. Computes
109  * all layer activations up to the output layer */
110  inline void Forward(Matrix_t& X, bool applyDropout = false);
112  /*! Compute the weight gradients in the net from the given training
113  * samples X and training labels Y. */
114  inline void Backward(const Matrix_t &X, const Matrix_t &Y, const Matrix_t &weights);
116  /*! Evaluate the loss function of the net using the activations
117  * that are currently stored in the output layer. */
118  inline Scalar_t Loss(const Matrix_t &Y, const Matrix_t &weights, bool includeRegularization = true) const;
120  /*! Propagate the input batch X through the net and evaluate the
121  * error function for the resulting activations of the output
122  * layer */
123  inline Scalar_t Loss(Matrix_t &X, const Matrix_t &Y, const Matrix_t &weights, bool applyDropout = false,
124  bool includeRegularization = true);
126  /*! Compute the neural network predictionion obtained from forwarding the
127  * batch X through the neural network and applying the output function
128  * f to the activation of the last layer in the network. */
129  inline void Prediction(Matrix_t &Y_hat, Matrix_t &X, EOutputFunction f);
131  /*! Compute the neural network rediction obtained from applying the output
132  * function f to the activation of the last layer in the network. */
133  inline void Prediction(Matrix_t &Y_hat, EOutputFunction f) const;
137  size_t GetDepth() const {return fLayers.size();}
138  size_t GetBatchSize() const {return fBatchSize;}
139  Layer_t & GetLayer(size_t i) {return fLayers[i];}
140  const Layer_t & GetLayer(size_t i) const {return fLayers[i];}
141  ELossFunction GetLossFunction() const {return fJ;}
142  Matrix_t & GetOutput() {return fLayers.back().GetOutput();}
143  size_t GetInputWidth() const {return fInputWidth;}
144  size_t GetOutputWidth() const {return fLayers.back().GetWidth();}
148  void SetBatchSize(size_t batchSize) {fBatchSize = batchSize;}
149  void SetInputWidth(size_t inputWidth) {fInputWidth = inputWidth;}
151  void SetLossFunction(ELossFunction J) {fJ = J;}
153  void SetDropoutProbabilities(const std::vector<Double_t> & probabilities);
155  void Print();
156 };
158 //______________________________________________________________________________
159 template<typename Architecture_t, typename Layer_t>
161  : fBatchSize(0), fInputWidth(0), fLayers(), fDummy(0,0),
163  fWeightDecay(0.0)
164 {
165  // Nothing to do here.
166 }
168 //______________________________________________________________________________
169 template<typename Architecture_t, typename Layer_t>
171  : fBatchSize(other.fBatchSize), fInputWidth(other.fInputWidth),
172  fLayers(other.fLayers), fDummy(0,0), fJ(other.fJ), fR(other.fR),
173  fWeightDecay(other.fWeightDecay)
174 {
175  // Nothing to do here.
176 }
178 //______________________________________________________________________________
179 template<typename Architecture_t, typename Layer_t>
180 template<typename OtherArchitecture_t>
182  const TNet<OtherArchitecture_t> & other)
183  : fBatchSize(batchSize), fInputWidth(other.GetInputWidth()), fLayers(),
184  fDummy(0,0), fJ(other.GetLossFunction()), fR(other.GetRegularization()),
185  fWeightDecay(other.GetWeightDecay())
186 {
187  fLayers.reserve(other.GetDepth());
188  for (size_t i = 0; i < other.GetDepth(); i++) {
189  AddLayer(other.GetLayer(i).GetWidth(),
190  other.GetLayer(i).GetActivationFunction(),
191  other.GetLayer(i).GetDropoutProbability());
192  fLayers[i].GetWeights() = (TMatrixT<Double_t>) other.GetLayer(i).GetWeights();
193  fLayers[i].GetBiases() = (TMatrixT<Double_t>) other.GetLayer(i).GetBiases();
194  }
195 }
197 //______________________________________________________________________________
198 template<typename Architecture_t, typename Layer_t>
200  size_t inputWidth,
201  ELossFunction J,
204  : fBatchSize(batchSize), fInputWidth(inputWidth), fLayers(), fDummy(0,0),
205  fJ(J), fR(R), fWeightDecay(weightDecay)
206 {
207  // Nothing to do here.
208 }
210 //______________________________________________________________________________
211 template<typename Architecture_t, typename Layer_t>
214 {
215  TNet<Architecture_t, TSharedLayer<Architecture_t>> other(BatchSize, fInputWidth,
216  fJ, fR);
217  for (auto &l : fLayers) {
218  other.AddLayer(l);
219  }
220  return other;
221 }
223 //______________________________________________________________________________
224 template<typename Architecture_t, typename Layer_t>
227  Scalar_t dropoutProbability)
228 {
229  if (fLayers.size() == 0) {
230  fLayers.emplace_back(fBatchSize, fInputWidth, width, f, dropoutProbability);
231  } else {
232  size_t prevWidth = fLayers.back().GetWidth();
233  fLayers.emplace_back(fBatchSize, prevWidth, width, f, dropoutProbability);
234  }
235 }
237 //______________________________________________________________________________
238 template<typename Architecture_t, typename Layer_t>
240 {
241  fLayers.clear();
242 }
244 //______________________________________________________________________________
245 template<typename Architecture_t, typename Layer_t>
246  template<typename SharedLayer_t>
247  inline void TNet<Architecture_t, Layer_t>::AddLayer(SharedLayer_t & layer)
248 {
249  fLayers.emplace_back(fBatchSize, layer);
250 }
252 //______________________________________________________________________________
253 template<typename Architecture_t, typename Layer_t>
255 {
256  for (auto &l : fLayers) {
257  l.Initialize(m);
258  }
259 }
261 //______________________________________________________________________________
262 template<typename Architecture_t, typename Layer_t>
264 {
265  for (auto &l : fLayers) {
266  initialize<Architecture_t>(l.GetWeightGradients(), EInitialization::kZero);
267  initialize<Architecture_t>(l.GetBiasGradients(), EInitialization::kZero);
268  }
269 }
271 //______________________________________________________________________________
272 template<typename Architecture_t, typename Layer_t>
274  bool applyDropout)
275 {
276  fLayers.front().Forward(input, applyDropout);
278  for (size_t i = 1; i < fLayers.size(); i++) {
279  fLayers[i].Forward(fLayers[i-1].GetOutput(), applyDropout);
280  }
281 }
283 //______________________________________________________________________________
284 template <typename Architecture_t, typename Layer_t>
285 inline void TNet<Architecture_t, Layer_t>::Backward(const Matrix_t &X, const Matrix_t &Y, const Matrix_t &weights)
286 {
288  evaluateGradients<Architecture_t>(fLayers.back().GetActivationGradients(), fJ, Y, fLayers.back().GetOutput(),
289  weights);
291  for (size_t i = fLayers.size()-1; i > 0; i--) {
292  auto & activation_gradient_backward
293  = fLayers[i-1].GetActivationGradients();
294  auto & activations_backward
295  = fLayers[i-1].GetOutput();
296  fLayers[i].Backward(activation_gradient_backward,
297  activations_backward, fR, fWeightDecay);
298  }
299  fLayers[0].Backward(fDummy, X, fR, fWeightDecay);
301 }
303 //______________________________________________________________________________
304 template <typename Architecture_t, typename Layer_t>
305 inline auto TNet<Architecture_t, Layer_t>::Loss(const Matrix_t &Y, const Matrix_t &weights,
306  bool includeRegularization) const -> Scalar_t
307 {
308  auto loss = evaluate<Architecture_t>(fJ, Y, fLayers.back().GetOutput(), weights);
309  includeRegularization &= (fR != ERegularization::kNone);
310  if (includeRegularization) {
311  for (auto &l : fLayers) {
312  loss += fWeightDecay * regularization<Architecture_t>(l.GetWeights(), fR);
313  }
314  }
315  return loss;
316 }
318 //______________________________________________________________________________
319 template <typename Architecture_t, typename Layer_t>
320 inline auto TNet<Architecture_t, Layer_t>::Loss(Matrix_t &X, const Matrix_t &Y, const Matrix_t &weights,
321  bool applyDropout, bool includeRegularization) -> Scalar_t
322 {
323  Forward(X, applyDropout);
324  return Loss(Y, weights, includeRegularization);
325 }
327 //______________________________________________________________________________
328 template<typename Architecture_t, typename Layer_t>
330  Matrix_t &X,
331  EOutputFunction f)
332 {
333  Forward(X, false);
334  evaluate<Architecture_t>(Yhat, f, fLayers.back().GetOutput());
335 }
337 //______________________________________________________________________________
338 template<typename Architecture_t, typename Layer_t>
340  EOutputFunction f) const
341 {
342  evaluate<Architecture_t>(Y_hat, f, fLayers.back().GetOutput());
343 }
345 //______________________________________________________________________________
346 template<typename Architecture_t, typename Layer_t>
348  -> Scalar_t
349 {
350  Scalar_t flops = 0;
352  Scalar_t nb = (Scalar_t) fBatchSize;
353  Scalar_t nlp = (Scalar_t) fInputWidth;
355  for(size_t i = 0; i < fLayers.size(); i++) {
356  Layer_t & layer = fLayers[i];
357  Scalar_t nl = (Scalar_t) layer.GetWidth();
359  // Forward propagation.
360  flops += nb * nl * (2.0 * nlp - 1); // Matrix mult.
361  flops += nb * nl; // Add bias values.
362  flops += 2 * nb * nl; // Apply activation function and compute
363  // derivative.
364  // Backward propagation.
365  flops += nb * nl; // Hadamard
366  flops += nlp * nl * (2.0 * nb - 1.0); // Weight gradients
367  flops += nl * (nb - 1); // Bias gradients
368  if (i > 0) {
369  flops += nlp * nb * (2.0 * nl - 1.0); // Previous layer gradients.
370  }
371  nlp = nl;
372  }
373  return flops;
374 }
376 //______________________________________________________________________________
377 template<typename Architecture_t, typename Layer_t>
379  const std::vector<Double_t> & probabilities)
380 {
381  for (size_t i = 0; i < fLayers.size(); i++) {
382  if (i < probabilities.size()) {
383  fLayers[i].SetDropoutProbability(probabilities[i]);
384  } else {
385  fLayers[i].SetDropoutProbability(1.0);
386  }
387  }
388 }
390 //______________________________________________________________________________
391 template<typename Architecture_t, typename Layer_t>
393 {
394  std::cout << "DEEP NEURAL NETWORK:";
395  std::cout << " Loss function = " << static_cast<char>(fJ);
396  std::cout << ", Depth = " << fLayers.size() << std::endl;
398  size_t i = 1;
399  for (auto & l : fLayers) {
400  std::cout << "DNN Layer " << i << ":" << std::endl;
401  l.Print();
402  i++;
403  }
405 }
407 } // namespace DNN
408 } // namespace TMVA
410 #endif
Matrix_t & GetOutput()
Definition: Net.h:142
Scalar_t Loss(const Matrix_t &Y, const Matrix_t &weights, bool includeRegularization=true) const
Evaluate the loss function of the net using the activations that are currently stored in the output l...
Definition: Net.h:305
auto * m
Definition: textangle.C:8
Matrix_t fDummy
Empty matrix for last step in back propagation.
Definition: Net.h:62
void SetDropoutProbabilities(const std::vector< Double_t > &probabilities)
Definition: Net.h:378
Scalar_t GetNFlops()
Definition: Net.h:347
void Print()
Definition: Net.h:392
LayerIterator_t LayersBegin()
Iterator to the first layer of the net.
Definition: Net.h:95
std::vector< Layer_t > fLayers
Layers in the network.
Definition: Net.h:60
void SetWeightDecay(Scalar_t weightDecay)
Definition: Net.h:152
ELossFunction fJ
The loss function of the network.
Definition: Net.h:63
Scalar_t GetWeightDecay() const
Definition: Net.h:146
Definition: Functions.h:70
void AddLayer(size_t width, EActivationFunction f, Scalar_t dropoutProbability=1.0)
Add a layer of the given size to the neural net.
Definition: Net.h:225
size_t fInputWidth
Number of features in a single input event.
Definition: Net.h:58
void Initialize(EInitialization m)
Initialize the weights in the net with the initialization method.
Definition: Net.h:254
double weightDecay(double error, ItWeight itWeight, ItWeight itWeightEnd, double factorWeightDecay, EnumRegularization eRegularization)
compute the weight decay for regularization (L1 or L2)
Definition: NeuralNet.icc:497
typename std::vector< TLayer< Architecture_t > >::iterator LayerIterator_t
Definition: Net.h:54
Generic neural network class.
Definition: Net.h:49
void Forward(Matrix_t &X, bool applyDropout=false)
Forward a given input through the neural net.
Definition: Net.h:273
size_t fBatchSize
Batch size for training and evaluation of the Network.
Definition: Net.h:57
Scalar_t fWeightDecay
The weight decay factor.
Definition: Net.h:65
LayerIterator_t LayersEnd()
Iterator to the last layer of the net.
Definition: Net.h:98
void Initialize(Bool_t useTMVAStyle=kTRUE)
Definition: tmvaglob.cxx:176
typename Architecture_t::Matrix_t Matrix_t
Definition: Net.h:52
ERegularization GetRegularization() const
Definition: Net.h:145
void SetRegularization(ERegularization R)
Definition: Net.h:150
void Clear()
Remove all layers from the network.
Definition: Net.h:239
const Handle_t kNone
Definition: GuiTypes.h:87
size_t GetDepth() const
Definition: Net.h:137
void Backward(const Matrix_t &X, const Matrix_t &Y, const Matrix_t &weights)
Compute the weight gradients in the net from the given training samples X and training labels Y...
Definition: Net.h:285
TNet< Architecture_t, TSharedLayer< Architecture_t > > CreateClone(size_t batchSize)
Create a clone that uses the same weight and biases matrices but potentially a difference batch size...
Definition: Net.h:212
size_t GetOutputWidth() const
Definition: Net.h:144
typename Architecture_t::Scalar_t Scalar_t
Definition: Net.h:53
size_t GetInputWidth() const
Definition: Net.h:143
void Print(std::ostream &os, const OptionType &opt)
ERegularization fR
The regularization used for the network.
Definition: Net.h:64
Enum that represents output functions.
Definition: Functions.h:43
Enum that represents objective functions for the net, i.e.
Definition: Functions.h:54
Abstract ClassifierFactory template that handles arbitrary types.
void InitializeGradients()
Initialize the gradients in the net to zero.
Definition: Net.h:263
auto * l
Definition: textangle.C:4
void SetLossFunction(ELossFunction J)
Definition: Net.h:151
void Prediction(Matrix_t &Y_hat, Matrix_t &X, EOutputFunction f)
Compute the neural network predictionion obtained from forwarding the batch X through the neural netw...
Definition: Net.h:329
void SetInputWidth(size_t inputWidth)
Definition: Net.h:149
Enum representing the regularization type applied for a given layer.
Definition: Functions.h:62
Enum that represents layer activation functions.
Definition: Functions.h:31
size_t GetBatchSize() const
Definition: Net.h:138
constexpr Double_t R()
Definition: TMath.h:213
Layer_t & GetLayer(size_t i)
Definition: Net.h:139
void SetBatchSize(size_t batchSize)
Definition: Net.h:148
const Layer_t & GetLayer(size_t i) const
Definition: Net.h:140
ELossFunction GetLossFunction() const
Definition: Net.h:141