Logo ROOT  
Reference Guide
DenseLayer.h
Go to the documentation of this file.
1
2// Author: Vladimir Ilievski
3
4/**********************************************************************************
5 * Project: TMVA - a Root-integrated toolkit for multivariate data analysis *
6 * Package: TMVA *
7 * Class : TDenseLayer *
8 * Web : http://tmva.sourceforge.net *
9 * *
10 * Description: *
11 * Dense Layer Class *
12 * *
13 * Authors (alphabetical): *
14 * Vladimir Ilievski <ilievski.vladimir@live.com> - CERN, Switzerland *
15 * *
16 * Copyright (c) 2005-2015: *
17 * CERN, Switzerland *
18 * U. of Victoria, Canada *
19 * MPI-K Heidelberg, Germany *
20 * U. of Bonn, Germany *
21 * *
22 * Redistribution and use in source and binary forms, with or without *
23 * modification, are permitted according to the terms listed in LICENSE *
24 * (http://tmva.sourceforge.net/LICENSE) *
25 **********************************************************************************/
26
27#ifndef TMVA_DNN_DENSELAYER
28#define TMVA_DNN_DENSELAYER
29
30#include "TMatrix.h"
31
33#include "TMVA/DNN/Functions.h"
35
36#include <iostream>
37#include <iomanip>
38#include <vector>
39#include <string>
40
41namespace TMVA {
42namespace DNN {
43/** \class TDenseLayer
44
45Generic layer class.
46
47This generic layer class represents a dense layer of a neural network with
48a given width n and activation function f. The activation function of each
49layer is given by \f$\mathbf{u} = \mathbf{W}\mathbf{x} + \boldsymbol{\theta}\f$.
50
51In addition to the weight and bias matrices, each layer allocates memory
52for its activations and the corresponding input tensor before evaluation of
53the activation function as well as the gradients of the weights and biases.
54
55The layer provides member functions for the forward propagation of
56activations through the given layer.
57*/
58template <typename Architecture_t>
59class TDenseLayer : public VGeneralLayer<Architecture_t> {
60public:
61
62 using Scalar_t = typename Architecture_t::Scalar_t;
63 using Matrix_t = typename Architecture_t::Matrix_t;
64 using Tensor_t = typename Architecture_t::Tensor_t;
65
66private:
67
68 Tensor_t fInputActivation; ///< output of GEMM and input to activation function
69 Tensor_t fDerivatives; ///< activation function gradient
70
71 Scalar_t fDropoutProbability; ///< Probability that an input is active.
72
73 EActivationFunction fF; ///< Activation function of the layer.
74 ERegularization fReg; ///< The regularization method.
75 Scalar_t fWeightDecay; ///< The weight decay.
76
77 typename Architecture_t::ActivationDescriptor_t fActivationDesc; // the descriptor for the activation function
78
79public:
80 /*! Constructor */
81 TDenseLayer(size_t BatchSize, size_t InputWidth, size_t Width, EInitialization init, Scalar_t DropoutProbability,
83
84 /*! Copy the dense layer provided as a pointer */
86
87 /*! Copy Constructor */
88 TDenseLayer(const TDenseLayer &);
89
90 /*! Destructor */
92
93 /*! Compute activation of the layer for the given input. The input
94 * must be in 3D tensor form with the different matrices corresponding to
95 * different events in the batch. Computes activations as well as
96 * the first partial derivative of the activation function at those
97 * activations. */
98 void Forward(Tensor_t &input, bool applyDropout = false);
99
100 /*! Compute weight, bias and activation gradients. Uses the precomputed
101 * first partial derivatives of the activation function computed during
102 * forward propagation and modifies them. Must only be called directly
103 * a the corresponding call to Forward(...). */
104 void Backward(Tensor_t &gradients_backward, const Tensor_t &activations_backward );
105 /// std::vector<Matrix_t> &inp1, std::vector<Matrix_t> &inp2);
106
107 /*! Printing the layer info. */
108 void Print() const;
109
110 /*! Writes the information and the weights about the layer in an XML node. */
111 virtual void AddWeightsXMLTo(void *parent);
112
113 /*! Read the information and the weights about the layer from XML node. */
114 virtual void ReadWeightsFromXML(void *parent);
115
116 /*! Set dropout probabilities */
117 virtual void SetDropoutProbability(Scalar_t dropoutProbability) { fDropoutProbability = dropoutProbability; }
118
119 /*! Getters */
121
122 /* return output of Gemm before computing the activation function */
125
129};
130
131//
132//
133// The Dense Layer Class - Implementation
134//______________________________________________________________________________
135template <typename Architecture_t>
136TDenseLayer<Architecture_t>::TDenseLayer(size_t batchSize, size_t inputWidth, size_t width, EInitialization init,
137 Scalar_t dropoutProbability, EActivationFunction f, ERegularization reg,
139 : VGeneralLayer<Architecture_t>(batchSize, 1, 1, inputWidth, 1, 1, width, 1, width, inputWidth, 1, width, 1, 1,
140 batchSize, width, init),
141 fInputActivation(), fDropoutProbability(dropoutProbability), fF(f), fReg(reg), fWeightDecay(weightDecay)
142{
143 // should be {1, batchSize, width} but take from output
144 fInputActivation = Tensor_t ( this->GetOutput().GetShape() );
145 fDerivatives = Tensor_t ( this->GetOutput().GetShape() );
146
147 Architecture_t::InitializeActivationDescriptor(fActivationDesc,fF);
148}
149
150//______________________________________________________________________________
151template <typename Architecture_t>
153 VGeneralLayer<Architecture_t>(layer),
154 fInputActivation( layer->GetInputActivation().GetShape() ),
155 fDropoutProbability(layer->GetDropoutProbability()),
156 fF(layer->GetActivationFunction()), fReg(layer->GetRegularization()), fWeightDecay(layer->GetWeightDecay())
157{
158 fDerivatives = Tensor_t ( this->GetOutput().GetShape() );
159 Architecture_t::InitializeActivationDescriptor(fActivationDesc,fF);
160}
161
162//______________________________________________________________________________
163template <typename Architecture_t>
165 VGeneralLayer<Architecture_t>(layer),
166 fInputActivation( layer->GetInputActivation()),
167 fDropoutProbability(layer.fDropoutProbability),
168 fF(layer.fF), fReg(layer.fReg), fWeightDecay(layer.fWeightDecay)
169{
170 fDerivatives = Tensor_t ( this->GetOutput().GetShape() );
171 Architecture_t::InitializeActivationDescriptor(fActivationDesc,fF);
172}
173
174//______________________________________________________________________________
175template <typename Architecture_t>
177{
178 // release activation descriptor
179 Architecture_t::ReleaseDescriptor(fActivationDesc);
180}
181
182
183
184
185//______________________________________________________________________________
186template <typename Architecture_t>
187auto TDenseLayer<Architecture_t>::Forward( Tensor_t &input, bool applyDropout) -> void
188{
189 if (applyDropout && (this->GetDropoutProbability() != 1.0)) {
190 //
191 Architecture_t::DropoutForward(input, static_cast<TDescriptors *> (nullptr),
192 static_cast<TWorkspace *> (nullptr),
193 this->GetDropoutProbability());
194 }
195 Architecture_t::MultiplyTranspose(this->GetOutput() , input, this->GetWeightsAt(0));
196 Architecture_t::AddRowWise(this->GetOutput(), this->GetBiasesAt(0));
197
198 //evaluate<Architecture_t>(this->GetOutput(), this->GetActivationFunction());
199 Architecture_t::Copy(this->GetInputActivation(),this->GetOutput());
200
201 Architecture_t::ActivationFunctionForward(this->GetOutput(), this->GetActivationFunction(), fActivationDesc);
202}
203
204//______________________________________________________________________________
205template <typename Architecture_t>
206auto TDenseLayer<Architecture_t>::Backward(Tensor_t &gradients_backward, const Tensor_t &activations_backward) -> void
207/// std::vector<Matrix_t> & /*inp1*/, std::vector<Matrix_t> &
208//// /*inp2*/) -> void
209{
210
211 if (this->GetDropoutProbability() != 1.0) {
212 Architecture_t::DropoutBackward(this->GetActivationGradients(),
213 static_cast<TDescriptors *> (nullptr),
214 static_cast<TWorkspace *> (nullptr));
215 }
216
217 Architecture_t::ActivationFunctionBackward(fDerivatives, this->GetOutput(),
218 this->GetActivationGradients(), this->GetInputActivation(),
219 this->GetActivationFunction(), fActivationDesc);
220
221 Architecture_t::Backward(gradients_backward, this->GetWeightGradientsAt(0), this->GetBiasGradientsAt(0),
222 fDerivatives, this->GetActivationGradients(), this->GetWeightsAt(0),
223 activations_backward);
224
225 addRegularizationGradients<Architecture_t>(this->GetWeightGradientsAt(0), this->GetWeightsAt(0),
226 this->GetWeightDecay(), this->GetRegularization());
227}
228
229//______________________________________________________________________________
230template <typename Architecture_t>
232{
233 std::cout << " DENSE Layer: \t";
234 std::cout << " ( Input =" << std::setw(6) << this->GetWeightsAt(0).GetNcols(); // input size
235 std::cout << " , Width =" << std::setw(6) << this->GetWeightsAt(0).GetNrows() << " ) "; // layer width
236
237 std::cout << "\tOutput = ( " << std::setw(2) << this->GetOutput().GetFirstSize() << " ," << std::setw(6) << this->GetOutput().GetShape()[0] << " ," << std::setw(6) << this->GetOutput().GetShape()[1] << " ) ";
238
239 std::vector<std::string> activationNames = { "Identity","Relu","Sigmoid","Tanh","SymmRelu","SoftSign","Gauss" };
240 std::cout << "\t Activation Function = ";
241 std::cout << activationNames[ static_cast<int>(fF) ];
242 if (fDropoutProbability != 1.) std::cout << "\t Dropout prob. = " << fDropoutProbability;
243 std::cout << std::endl;
244}
245
246//______________________________________________________________________________
247
248template <typename Architecture_t>
250{
251 // write layer width activation function + weight and bias matrices
252
253 auto layerxml = gTools().xmlengine().NewChild(parent, 0, "DenseLayer");
254
255 gTools().xmlengine().NewAttr(layerxml, 0, "Width", gTools().StringFromInt(this->GetWidth()));
256
257 int activationFunction = static_cast<int>(this -> GetActivationFunction());
258 gTools().xmlengine().NewAttr(layerxml, 0, "ActivationFunction",
259 TString::Itoa(activationFunction, 10));
260 // write weights and bias matrix
261 this->WriteMatrixToXML(layerxml, "Weights", this -> GetWeightsAt(0));
262 this->WriteMatrixToXML(layerxml, "Biases", this -> GetBiasesAt(0));
263}
264
265//______________________________________________________________________________
266template <typename Architecture_t>
268{
269 // Read layer weights and biases from XML
270 this->ReadMatrixXML(parent,"Weights", this -> GetWeightsAt(0));
271 this->ReadMatrixXML(parent,"Biases", this -> GetBiasesAt(0));
272
273}
274
275
276} // namespace DNN
277} // namespace TMVA
278
279#endif
#define f(i)
Definition: RSha256.hxx:104
Option_t Option_t TPoint TPoint const char GetTextMagnitude GetFillStyle GetLineColor GetLineWidth GetMarkerStyle GetTextAlign GetTextColor GetTextSize void input
Option_t Option_t width
Option_t Option_t TPoint TPoint const char GetTextMagnitude GetFillStyle GetLineColor GetLineWidth GetMarkerStyle GetTextAlign GetTextColor GetTextSize void reg
Generic layer class.
Definition: DenseLayer.h:59
ERegularization fReg
The regularization method.
Definition: DenseLayer.h:74
Tensor_t fDerivatives
activation function gradient
Definition: DenseLayer.h:69
ERegularization GetRegularization() const
Definition: DenseLayer.h:127
void Backward(Tensor_t &gradients_backward, const Tensor_t &activations_backward)
Compute weight, bias and activation gradients.
Definition: DenseLayer.h:206
typename Architecture_t::Matrix_t Matrix_t
Definition: DenseLayer.h:63
const Tensor_t & GetInputActivation() const
Definition: DenseLayer.h:123
Scalar_t fWeightDecay
The weight decay.
Definition: DenseLayer.h:75
Tensor_t fInputActivation
output of GEMM and input to activation function
Definition: DenseLayer.h:68
Architecture_t::ActivationDescriptor_t fActivationDesc
Definition: DenseLayer.h:77
EActivationFunction fF
Activation function of the layer.
Definition: DenseLayer.h:73
TDenseLayer(size_t BatchSize, size_t InputWidth, size_t Width, EInitialization init, Scalar_t DropoutProbability, EActivationFunction f, ERegularization reg, Scalar_t weightDecay)
Constructor.
Definition: DenseLayer.h:136
void Print() const
std::vector<Matrix_t> &inp1, std::vector<Matrix_t> &inp2);
Definition: DenseLayer.h:231
~TDenseLayer()
Destructor.
Definition: DenseLayer.h:176
virtual void AddWeightsXMLTo(void *parent)
Writes the information and the weights about the layer in an XML node.
Definition: DenseLayer.h:249
Scalar_t GetDropoutProbability() const
Getters.
Definition: DenseLayer.h:120
Tensor_t & GetInputActivation()
Definition: DenseLayer.h:124
EActivationFunction GetActivationFunction() const
Definition: DenseLayer.h:126
void Forward(Tensor_t &input, bool applyDropout=false)
Compute activation of the layer for the given input.
Definition: DenseLayer.h:187
virtual void SetDropoutProbability(Scalar_t dropoutProbability)
Set dropout probabilities.
Definition: DenseLayer.h:117
typename Architecture_t::Scalar_t Scalar_t
Definition: DenseLayer.h:62
virtual void ReadWeightsFromXML(void *parent)
Read the information and the weights about the layer from XML node.
Definition: DenseLayer.h:267
typename Architecture_t::Tensor_t Tensor_t
Definition: DenseLayer.h:64
Scalar_t fDropoutProbability
Probability that an input is active.
Definition: DenseLayer.h:71
Scalar_t GetWeightDecay() const
Definition: DenseLayer.h:128
Generic General Layer class.
Definition: GeneralLayer.h:51
const Tensor_t & GetOutput() const
Definition: GeneralLayer.h:196
TXMLEngine & xmlengine()
Definition: Tools.h:262
static TString Itoa(Int_t value, Int_t base)
Converts an Int_t to a TString with respect to the base specified (2-36).
Definition: TString.cxx:2050
XMLNodePointer_t NewChild(XMLNodePointer_t parent, XMLNsPointer_t ns, const char *name, const char *content=nullptr)
create new child element for parent node
Definition: TXMLEngine.cxx:715
XMLAttrPointer_t NewAttr(XMLNodePointer_t xmlnode, XMLNsPointer_t, const char *name, const char *value)
creates new attribute for xmlnode, namespaces are not supported for attributes
Definition: TXMLEngine.cxx:586
void Copy(void *source, void *dest)
void init()
Inspect hardware capabilities, and load the optimal library for RooFit computations.
EInitialization
Definition: Functions.h:72
double weightDecay(double error, ItWeight itWeight, ItWeight itWeightEnd, double factorWeightDecay, EnumRegularization eRegularization)
compute the weight decay for regularization (L1 or L2)
Definition: NeuralNet.icc:498
ERegularization
Enum representing the regularization type applied for a given layer.
Definition: Functions.h:65
EActivationFunction
Enum that represents layer activation functions.
Definition: Functions.h:32
create variable transformations
Tools & gTools()