Logo ROOT   6.14/05
Reference Guide
DenseLayer.h
Go to the documentation of this file.
1 
2 // Author: Vladimir Ilievski
3 
4 /**********************************************************************************
5  * Project: TMVA - a Root-integrated toolkit for multivariate data analysis *
6  * Package: TMVA *
7  * Class : TDenseLayer *
8  * Web : http://tmva.sourceforge.net *
9  * *
10  * Description: *
11  * Dense Layer Class *
12  * *
13  * Authors (alphabetical): *
14  * Vladimir Ilievski <ilievski.vladimir@live.com> - CERN, Switzerland *
15  * *
16  * Copyright (c) 2005-2015: *
17  * CERN, Switzerland *
18  * U. of Victoria, Canada *
19  * MPI-K Heidelberg, Germany *
20  * U. of Bonn, Germany *
21  * *
22  * Redistribution and use in source and binary forms, with or without *
23  * modification, are permitted according to the terms listed in LICENSE *
24  * (http://tmva.sourceforge.net/LICENSE) *
25  **********************************************************************************/
26 
27 #ifndef TMVA_DNN_DENSELAYER
28 #define TMVA_DNN_DENSELAYER
29 
30 #include "TMatrix.h"
31 
32 #include "TMVA/DNN/GeneralLayer.h"
33 #include "TMVA/DNN/Functions.h"
34 
35 #include <iostream>
36 
37 namespace TMVA {
38 namespace DNN {
39 /** \class TDenseLayer
40 
41 Generic layer class.
42 
43 This generic layer class represents a dense layer of a neural network with
44 a given width n and activation function f. The activation function of each
45 layer is given by \f$\mathbf{u} = \mathbf{W}\mathbf{x} + \boldsymbol{\theta}\f$.
46 
47 In addition to the weight and bias matrices, each layer allocates memory
48 for its activations and the corresponding first partial fDerivatives of
49 the activation function as well as the gradients of the weights and biases.
50 
51 The layer provides member functions for the forward propagation of
52 activations through the given layer.
53 */
54 template <typename Architecture_t>
55 class TDenseLayer : public VGeneralLayer<Architecture_t> {
56 public:
57  using Scalar_t = typename Architecture_t::Scalar_t;
58  using Matrix_t = typename Architecture_t::Matrix_t;
59 
60 private:
61  std::vector<Matrix_t> fDerivatives; ///< First fDerivatives of the activations of this layer.
62 
63  Scalar_t fDropoutProbability; ///< Probability that an input is active.
64 
65  EActivationFunction fF; ///< Activation function of the layer.
66  ERegularization fReg; ///< The regularization method.
67  Scalar_t fWeightDecay; ///< The weight decay.
68 
69 public:
70  /*! Constructor */
71  TDenseLayer(size_t BatchSize, size_t InputWidth, size_t Width, EInitialization init, Scalar_t DropoutProbability,
73 
74  /*! Copy the dense layer provided as a pointer */
76 
77  /*! Copy Constructor */
78  TDenseLayer(const TDenseLayer &);
79 
80  /*! Destructor */
81  ~TDenseLayer();
82 
83  /*! Compute activation of the layer for the given input. The input
84  * must be in 3D tensor form with the different matrices corresponding to
85  * different events in the batch. Computes activations as well as
86  * the first partial derivative of the activation function at those
87  * activations. */
88  void Forward(std::vector<Matrix_t> &input, bool applyDropout = false);
89 
90  /*! Compute weight, bias and activation gradients. Uses the precomputed
91  * first partial derviatives of the activation function computed during
92  * forward propagation and modifies them. Must only be called directly
93  * a the corresponding call to Forward(...). */
94  void Backward(std::vector<Matrix_t> &gradients_backward, const std::vector<Matrix_t> &activations_backward,
95  std::vector<Matrix_t> &inp1, std::vector<Matrix_t> &inp2);
96 
97  /*! Printing the layer info. */
98  void Print() const;
99 
100  /*! Writes the information and the weights about the layer in an XML node. */
101  virtual void AddWeightsXMLTo(void *parent);
102 
103  /*! Read the information and the weights about the layer from XML node. */
104  virtual void ReadWeightsFromXML(void *parent);
105 
106 
107  /*! Getters */
109 
110  const std::vector<Matrix_t> &GetDerivatives() const { return fDerivatives; }
111  std::vector<Matrix_t> &GetDerivatives() { return fDerivatives; }
112 
113  Matrix_t &GetDerivativesAt(size_t i) { return fDerivatives[i]; }
114  const Matrix_t &GetDerivativesAt(size_t i) const { return fDerivatives[i]; }
115 
119 };
120 
121 //
122 //
123 // The Dense Layer Class - Implementation
124 //______________________________________________________________________________
125 template <typename Architecture_t>
126 TDenseLayer<Architecture_t>::TDenseLayer(size_t batchSize, size_t inputWidth, size_t width, EInitialization init,
127  Scalar_t dropoutProbability, EActivationFunction f, ERegularization reg,
129  : VGeneralLayer<Architecture_t>(batchSize, 1, 1, inputWidth, 1, 1, width, 1, width, inputWidth, 1, width, 1, 1,
130  batchSize, width, init),
131  fDerivatives(), fDropoutProbability(dropoutProbability), fF(f), fReg(reg), fWeightDecay(weightDecay)
132 {
133  fDerivatives.emplace_back(batchSize, width);
134 }
135 
136 //______________________________________________________________________________
137 template <typename Architecture_t>
139  : VGeneralLayer<Architecture_t>(layer), fDerivatives(), fDropoutProbability(layer->GetDropoutProbability()),
141 {
142  fDerivatives.emplace_back(layer->GetBatchSize(), layer->GetWidth());
143 }
144 
145 //______________________________________________________________________________
146 template <typename Architecture_t>
148  : VGeneralLayer<Architecture_t>(layer), fDerivatives(), fDropoutProbability(layer.fDropoutProbability), fF(layer.fF),
149  fReg(layer.fReg), fWeightDecay(layer.fWeightDecay)
150 {
151  fDerivatives.emplace_back(layer.fBatchSize, layer.fWidth);
152 }
153 
154 //______________________________________________________________________________
155 template <typename Architecture_t>
157 {
158  // Nothing to do here.
159 }
160 
161 //______________________________________________________________________________
162 template <typename Architecture_t>
163 auto TDenseLayer<Architecture_t>::Forward(std::vector<Matrix_t> &input, bool applyDropout) -> void
164 {
165  if (applyDropout && (this->GetDropoutProbability() != 1.0)) {
166  Architecture_t::Dropout(input[0], this->GetDropoutProbability());
167  }
168  Architecture_t::MultiplyTranspose(this->GetOutputAt(0), input[0], this->GetWeightsAt(0));
169  Architecture_t::AddRowWise(this->GetOutputAt(0), this->GetBiasesAt(0));
170  evaluateDerivative<Architecture_t>(this->GetDerivativesAt(0), this->GetActivationFunction(), this->GetOutputAt(0));
171  evaluate<Architecture_t>(this->GetOutputAt(0), this->GetActivationFunction());
172 }
173 
174 //______________________________________________________________________________
175 template <typename Architecture_t>
176 auto TDenseLayer<Architecture_t>::Backward(std::vector<Matrix_t> &gradients_backward,
177  const std::vector<Matrix_t> &activations_backward,
178  std::vector<Matrix_t> & /*inp1*/, std::vector<Matrix_t> &
179  /*inp2*/) -> void
180 {
181  if (gradients_backward.size() == 0) {
182  Matrix_t dummy(0, 0);
183  Architecture_t::Backward(dummy, this->GetWeightGradientsAt(0), this->GetBiasGradientsAt(0),
184  this->GetDerivativesAt(0), this->GetActivationGradientsAt(0), this->GetWeightsAt(0),
185  activations_backward[0]);
186 
187  } else {
188  Architecture_t::Backward(gradients_backward[0], this->GetWeightGradientsAt(0), this->GetBiasGradientsAt(0),
189  this->GetDerivativesAt(0), this->GetActivationGradientsAt(0), this->GetWeightsAt(0),
190  activations_backward[0]);
191  }
192 
193  addRegularizationGradients<Architecture_t>(this->GetWeightGradientsAt(0), this->GetWeightsAt(0),
194  this->GetWeightDecay(), this->GetRegularization());
195 }
196 
197 //______________________________________________________________________________
198 template <typename Architecture_t>
200 {
201  std::cout << " DENSE Layer: \t ";
202  std::cout << " ( Input = " << this->GetWeightsAt(0).GetNcols(); // input size
203  std::cout << " , Width = " << this->GetWeightsAt(0).GetNrows() << " ) "; // layer width
204  if (this->GetOutput().size() > 0) {
205  std::cout << "\tOutput = ( " << this->GetOutput().size() << " , " << this->GetOutput()[0].GetNrows() << " , " << this->GetOutput()[0].GetNcols() << " ) ";
206  }
207  std::vector<std::string> activationNames = { "Identity","Relu","Sigmoid","Tanh","SymmRelu","SoftSign","Gauss" };
208  std::cout << "\t Activation Function = ";
209  std::cout << activationNames[ static_cast<int>(fF) ] << std::endl;
210 }
211 
212 //______________________________________________________________________________
213 
214 template <typename Architecture_t>
216 {
217  // write layer width activation function + weigbht and bias matrices
218 
219  auto layerxml = gTools().xmlengine().NewChild(parent, 0, "DenseLayer");
220 
221  gTools().xmlengine().NewAttr(layerxml, 0, "Width", gTools().StringFromInt(this->GetWidth()));
222 
223  int activationFunction = static_cast<int>(this -> GetActivationFunction());
224  gTools().xmlengine().NewAttr(layerxml, 0, "ActivationFunction",
225  TString::Itoa(activationFunction, 10));
226  // write weights and bias matrix
227  this->WriteMatrixToXML(layerxml, "Weights", this -> GetWeightsAt(0));
228  this->WriteMatrixToXML(layerxml, "Biases", this -> GetBiasesAt(0));
229 }
230 
231 //______________________________________________________________________________
232 template <typename Architecture_t>
234 {
235  // Read layer weights and biases from XML
236  this->ReadMatrixXML(parent,"Weights", this -> GetWeightsAt(0));
237  this->ReadMatrixXML(parent,"Biases", this -> GetBiasesAt(0));
238 
239 }
240 
241 } // namespace DNN
242 } // namespace TMVA
243 
244 #endif
Generic General Layer class.
Definition: GeneralLayer.h:45
Matrix_t & GetDerivativesAt(size_t i)
Definition: DenseLayer.h:113
TXMLEngine & xmlengine()
Definition: Tools.h:270
static TString Itoa(Int_t value, Int_t base)
Converts an Int_t to a TString with respect to the base specified (2-36).
Definition: TString.cxx:2000
virtual void AddWeightsXMLTo(void *parent)
Writes the information and the weights about the layer in an XML node.
Definition: DenseLayer.h:215
size_t fWidth
The width of this layer.
Definition: GeneralLayer.h:58
void ReadMatrixXML(void *node, const char *name, Matrix_t &matrix)
Definition: GeneralLayer.h:502
typename Architecture_t::Scalar_t Scalar_t
Definition: DenseLayer.h:57
typename Architecture_t::Matrix_t Matrix_t
Definition: DenseLayer.h:58
const Matrix_t & GetBiasesAt(size_t i) const
Definition: GeneralLayer.h:158
EActivationFunction GetActivationFunction() const
Definition: DenseLayer.h:116
virtual void ReadWeightsFromXML(void *parent)
Read the information and the weights about the layer from XML node.
Definition: DenseLayer.h:233
image html pict1_TGaxis_012 png width
Define new text attributes for the label number "labNum".
Definition: TGaxis.cxx:2551
EActivationFunction fF
Activation function of the layer.
Definition: DenseLayer.h:65
void Forward(std::vector< Matrix_t > &input, bool applyDropout=false)
Compute activation of the layer for the given input.
Definition: DenseLayer.h:163
#define f(i)
Definition: RSha256.hxx:104
Scalar_t GetDropoutProbability() const
Getters.
Definition: DenseLayer.h:108
EInitialization
Definition: Functions.h:70
Matrix_t & GetOutputAt(size_t i)
Definition: GeneralLayer.h:179
~TDenseLayer()
Destructor.
Definition: DenseLayer.h:156
ERegularization GetRegularization() const
Definition: DenseLayer.h:117
double weightDecay(double error, ItWeight itWeight, ItWeight itWeightEnd, double factorWeightDecay, EnumRegularization eRegularization)
compute the weight decay for regularization (L1 or L2)
Definition: NeuralNet.icc:496
size_t GetBatchSize() const
Getters.
Definition: GeneralLayer.h:140
Generic layer class.
Definition: DenseLayer.h:55
ERegularization fReg
The regularization method.
Definition: DenseLayer.h:66
std::vector< Matrix_t > fDerivatives
First fDerivatives of the activations of this layer.
Definition: DenseLayer.h:61
const Matrix_t & GetBiasGradientsAt(size_t i) const
Definition: GeneralLayer.h:170
std::vector< Matrix_t > & GetDerivatives()
Definition: DenseLayer.h:111
Tools & gTools()
void Print() const
Printing the layer info.
Definition: DenseLayer.h:199
const Matrix_t & GetWeightsAt(size_t i) const
Definition: GeneralLayer.h:152
Matrix_t & GetActivationGradientsAt(size_t i)
Definition: GeneralLayer.h:182
XMLAttrPointer_t NewAttr(XMLNodePointer_t xmlnode, XMLNsPointer_t, const char *name, const char *value)
creates new attribute for xmlnode, namespaces are not supported for attributes
Definition: TXMLEngine.cxx:578
static Int_t init()
size_t fBatchSize
Batch size used for training and evaluation.
Definition: GeneralLayer.h:50
static RooMathCoreReg dummy
Scalar_t GetWeightDecay() const
Definition: DenseLayer.h:118
void Backward(std::vector< Matrix_t > &gradients_backward, const std::vector< Matrix_t > &activations_backward, std::vector< Matrix_t > &inp1, std::vector< Matrix_t > &inp2)
Compute weight, bias and activation gradients.
Definition: DenseLayer.h:176
const Matrix_t & GetDerivativesAt(size_t i) const
Definition: DenseLayer.h:114
Abstract ClassifierFactory template that handles arbitrary types.
const Matrix_t & GetWeightGradientsAt(size_t i) const
Definition: GeneralLayer.h:164
Scalar_t fWeightDecay
The weight decay.
Definition: DenseLayer.h:67
Scalar_t fDropoutProbability
Probability that an input is active.
Definition: DenseLayer.h:63
TDenseLayer(size_t BatchSize, size_t InputWidth, size_t Width, EInitialization init, Scalar_t DropoutProbability, EActivationFunction f, ERegularization reg, Scalar_t weightDecay)
Constructor.
Definition: DenseLayer.h:126
XMLNodePointer_t NewChild(XMLNodePointer_t parent, XMLNsPointer_t ns, const char *name, const char *content=0)
create new child element for parent node
Definition: TXMLEngine.cxx:707
const std::vector< Matrix_t > & GetDerivatives() const
Definition: DenseLayer.h:110
ERegularization
Enum representing the regularization type applied for a given layer.
Definition: Functions.h:62
void WriteMatrixToXML(void *node, const char *name, const Matrix_t &matrix)
Definition: GeneralLayer.h:479
EActivationFunction
Enum that represents layer activation functions.
Definition: Functions.h:31
const std::vector< Matrix_t > & GetOutput() const
Definition: GeneralLayer.h:173
size_t GetWidth() const
Definition: GeneralLayer.h:146