doc/hackathon/Net_8h_source.html

// @(#)root/tmva: $Id$

// Author: Simon Pfreundschuh 20/06/16


/*************************************************************************

 * Copyright (C) 2016, Simon Pfreundschuh                                *

 * All rights reserved.                                                  *

 *                                                                       *

 * For the licensing terms see $ROOTSYS/LICENSE.                         *

 * For the list of contributors see $ROOTSYS/README/CREDITS.             *

 *************************************************************************/


#ifndef TMVA_DNN_NET

#define TMVA_DNN_NET


#include <vector>

#include <iostream>


#include "Layer.h"


namespace TMVA {

namespace DNN  {


/** \class TNet


    Generic neural network class.


    This generic neural network class represents a concrete neural

    network through a vector of layers and coordinates the forward

    and backward propagation through the net.


    The net takes as input a batch from the training data given in

    matrix form, with each row corresponding to a certain training

    event.


    On construction, the neural network allocates all the memory

    required for the training of the neural net and keeps it until

    its destruction.


    The Architecture type argument simply holds the

    architecture-specific data types, which are just the matrix type

    Matrix_t and the used scalar type Scalar_t.


    \tparam Architecture The Architecture type that holds the

    \tparam Layer_t The type used for the layers. Can be either

    Layer<Architecture> or SharedWeightLayer<Architecture>.

    datatypes for a given architecture.

*/

template<typename Architecture_t, typename Layer_t = TLayer<Architecture_t>>


   class TNet {


public:

   using Matrix_t         = typename Architecture_t::Matrix_t;

   using Scalar_t         = typename Architecture_t::Scalar_t;

   using LayerIterator_t  = typename std::vector<Layer_t>::iterator;


private:

   size_t fBatchSize;  ///< Batch size for training and evaluation of the Network.

   size_t fInputWidth; ///< Number of features in a single input event.


   std::vector<Layer_t> fLayers; ///< Layers in the network.


   Matrix_t fDummy;       ///< Empty matrix for last step in back propagation.

   ELossFunction fJ;      ///< The loss function of the network.

   ERegularization fR;    ///< The regularization used for the network.

   Scalar_t fWeightDecay; ///< The weight decay factor.


public:

   TNet();

   TNet(const TNet & other);

   template<typename OtherArchitecture_t>

   TNet(size_t batchSize, const TNet<OtherArchitecture_t> &);

   /*! Construct a neural net for a given batch size with

    *  given output function * and regularization. */

   TNet(size_t batchSize,

        size_t inputWidth,

        ELossFunction fJ,

        ERegularization fR = ERegularization::kNone,

        Scalar_t fWeightDecay = 0.0);

   /*! Create a clone that uses the same weight and biases matrices but

    *  potentially a difference batch size. */

   TNet<Architecture_t, TSharedLayer<Architecture_t>> CreateClone(size_t batchSize);


   /*! Add a layer of the given size to the neural net. */

   void AddLayer(size_t width, EActivationFunction f,

                 Scalar_t dropoutProbability = 1.0);


   /*! Remove all layers from the network.*/

   void Clear();


   /*! Add a layer which shares its weights with another TNet instance. */

   template <typename SharedLayer>

   void AddLayer(SharedLayer & layer);


   /*! Iterator to the first layer of the net. */

   LayerIterator_t LayersBegin() {return fLayers;}


   /*! Iterator to the last layer of the net. */

   LayerIterator_t LayersEnd() {return fLayers;}


   /*! Initialize the weights in the net with the

    *  initialization method. */

   inline void Initialize(EInitialization m);


   /*! Initialize the gradients in the net to zero. Required if net is

    *  used to store velocities of momentum-based minimization techniques. */

   inline void InitializeGradients();


   /*! Forward a given input through the neural net. Computes

    *  all layer activations up to the output layer */

   inline void Forward(Matrix_t& X, bool applyDropout = false);


   /*! Compute the weight gradients in the net from the given training

    * samples X and training labels Y. */

   inline void Backward(const Matrix_t &X, const Matrix_t &Y, const Matrix_t &weights);


   /*! Evaluate the loss function of the net using the activations

    *  that are currently stored in the output layer. */

   inline Scalar_t Loss(const Matrix_t &Y, const Matrix_t &weights, bool includeRegularization = true) const;


   /*! Propagate the input batch X through the net and evaluate the

    *  error function for the resulting activations of the output

    *  layer */

   inline Scalar_t Loss(Matrix_t &X, const Matrix_t &Y, const Matrix_t &weights, bool applyDropout = false,

                        bool includeRegularization = true);


   /*! Compute the neural network prediction obtained from forwarding the

    *  batch X through the neural network and applying the output function

    *  f to the activation of the last layer in the network. */

   inline void Prediction(Matrix_t &Y_hat, Matrix_t &X, EOutputFunction f);


   /*! Compute the neural network prediction obtained from applying the output

    * function f to the activation of the last layer in the network. */

   inline void Prediction(Matrix_t &Y_hat, EOutputFunction f) const;


   Scalar_t            GetNFlops();


   size_t              GetDepth() const          {return fLayers.size();}

   size_t              GetBatchSize() const      {return fBatchSize;}

   Layer_t &           GetLayer(size_t i)        {return fLayers[i];}

   const Layer_t &     GetLayer(size_t i) const  {return fLayers[i];}

   ELossFunction       GetLossFunction() const   {return fJ;}

   Matrix_t &          GetOutput()               {return fLayers.back().GetOutput();}

   size_t              GetInputWidth() const     {return fInputWidth;}

   size_t              GetOutputWidth() const    {return fLayers.back().GetWidth();}

   ERegularization     GetRegularization() const {return fR;}

   Scalar_t            GetWeightDecay() const    {return fWeightDecay;}


   void SetBatchSize(size_t batchSize)       {fBatchSize = batchSize;}

   void SetInputWidth(size_t inputWidth)     {fInputWidth = inputWidth;}

   void SetRegularization(ERegularization R) {fR = R;}

   void SetLossFunction(ELossFunction J)     {fJ = J;}

   void SetWeightDecay(Scalar_t weightDecay) {fWeightDecay = weightDecay;}

   void SetDropoutProbabilities(const std::vector<Double_t> & probabilities);


   void Print();

};


//______________________________________________________________________________

template<typename Architecture_t, typename Layer_t>


   TNet<Architecture_t, Layer_t>::TNet()

    : fBatchSize(0), fInputWidth(0), fLayers(), fDummy(0,0),

    fJ(ELossFunction::kMeanSquaredError), fR(ERegularization::kNone),

    fWeightDecay(0.0)

{

   // Nothing to do here.

}


//______________________________________________________________________________

template<typename Architecture_t, typename Layer_t>


   TNet<Architecture_t, Layer_t>::TNet(const TNet & other)

   : fBatchSize(other.fBatchSize), fInputWidth(other.fInputWidth),

    fLayers(other.fLayers), fDummy(0,0), fJ(other.fJ), fR(other.fR),

    fWeightDecay(other.fWeightDecay)

{

   // Nothing to do here.

}


//______________________________________________________________________________

template<typename Architecture_t, typename Layer_t>

template<typename OtherArchitecture_t>


TNet<Architecture_t, Layer_t>::TNet(size_t batchSize,

                                    const TNet<OtherArchitecture_t> & other)

    : fBatchSize(batchSize), fInputWidth(other.GetInputWidth()), fLayers(),

    fDummy(0,0), fJ(other.GetLossFunction()), fR(other.GetRegularization()),

    fWeightDecay(other.GetWeightDecay())

{

   fLayers.reserve(other.GetDepth());

   for (size_t i = 0; i < other.GetDepth(); i++) {

      AddLayer(other.GetLayer(i).GetWidth(),

               other.GetLayer(i).GetActivationFunction(),

               other.GetLayer(i).GetDropoutProbability());

      fLayers[i].GetWeights() = (TMatrixT<Scalar_t>) other.GetLayer(i).GetWeights();

      fLayers[i].GetBiases()  = (TMatrixT<Scalar_t>) other.GetLayer(i).GetBiases();

   }

}


//______________________________________________________________________________

template<typename Architecture_t, typename Layer_t>


   TNet<Architecture_t, Layer_t>::TNet(size_t        batchSize,

                                       size_t        inputWidth,

                                       ELossFunction J,

                                       ERegularization R,

                                       Scalar_t weightDecay)

    : fBatchSize(batchSize), fInputWidth(inputWidth), fLayers(), fDummy(0,0),

    fJ(J), fR(R), fWeightDecay(weightDecay)

{

   // Nothing to do here.

}


//______________________________________________________________________________

template<typename Architecture_t, typename Layer_t>


   auto TNet<Architecture_t, Layer_t>::CreateClone(size_t BatchSize)

   -> TNet<Architecture_t, TSharedLayer<Architecture_t>>

{

   TNet<Architecture_t, TSharedLayer<Architecture_t>> other(BatchSize, fInputWidth,

                                                            fJ, fR);

   for (auto &l : fLayers) {

      other.AddLayer(l);

   }

   return other;

}


//______________________________________________________________________________

template<typename Architecture_t, typename Layer_t>


   void TNet<Architecture_t, Layer_t>::AddLayer(size_t width,

                                                EActivationFunction f,

                                                Scalar_t dropoutProbability)

{

   if (fLayers.size() == 0) {

      fLayers.emplace_back(fBatchSize, fInputWidth, width, f, dropoutProbability);

   } else {

      size_t prevWidth = fLayers.back().GetWidth();

      fLayers.emplace_back(fBatchSize, prevWidth, width, f, dropoutProbability);

   }

}


//______________________________________________________________________________

template<typename Architecture_t, typename Layer_t>


   void TNet<Architecture_t, Layer_t>::Clear()

{

   fLayers.clear();

}


//______________________________________________________________________________

template<typename Architecture_t, typename Layer_t>

   template<typename SharedLayer_t>


   inline void TNet<Architecture_t, Layer_t>::AddLayer(SharedLayer_t & layer)

{

   fLayers.emplace_back(fBatchSize, layer);

}


//______________________________________________________________________________

template<typename Architecture_t, typename Layer_t>


   inline void TNet<Architecture_t, Layer_t>::Initialize(EInitialization m)

{

   for (auto &l : fLayers) {

      l.Initialize(m);

   }

}


//______________________________________________________________________________

template<typename Architecture_t, typename Layer_t>


   inline void TNet<Architecture_t, Layer_t>::InitializeGradients()

{

   for (auto &l : fLayers) {

      initialize<Architecture_t>(l.GetWeightGradients(), EInitialization::kZero);

      initialize<Architecture_t>(l.GetBiasGradients(),   EInitialization::kZero);

   }

}


//______________________________________________________________________________

template<typename Architecture_t, typename Layer_t>


inline void TNet<Architecture_t, Layer_t>::Forward(Matrix_t &input,

                                                   bool applyDropout)

{

   fLayers.front().Forward(input, applyDropout);


   for (size_t i = 1; i < fLayers.size(); i++) {

      fLayers[i].Forward(fLayers[i-1].GetOutput(), applyDropout);

   }

}


//______________________________________________________________________________

template <typename Architecture_t, typename Layer_t>


inline void TNet<Architecture_t, Layer_t>::Backward(const Matrix_t &X, const Matrix_t &Y, const Matrix_t &weights)

{


   evaluateGradients<Architecture_t>(fLayers.back().GetActivationGradients(), fJ, Y, fLayers.back().GetOutput(),

                                     weights);


   for (size_t i = fLayers.size()-1; i > 0; i--) {

      auto & activation_gradient_backward

         = fLayers[i-1].GetActivationGradients();

      auto & activations_backward

         = fLayers[i-1].GetOutput();

      fLayers[i].Backward(activation_gradient_backward,

                          activations_backward, fR, fWeightDecay);

   }

   fLayers[0].Backward(fDummy, X, fR, fWeightDecay);


}


//______________________________________________________________________________

template <typename Architecture_t, typename Layer_t>


inline auto TNet<Architecture_t, Layer_t>::Loss(const Matrix_t &Y, const Matrix_t &weights,

                                                bool includeRegularization) const -> Scalar_t

{

   auto loss = evaluate<Architecture_t>(fJ, Y, fLayers.back().GetOutput(), weights);

   includeRegularization &= (fR != ERegularization::kNone);

   if (includeRegularization) {

      for (auto &l : fLayers) {

         loss += fWeightDecay * regularization<Architecture_t>(l.GetWeights(), fR);

      }

   }

   return loss;

}


//______________________________________________________________________________

template <typename Architecture_t, typename Layer_t>


inline auto TNet<Architecture_t, Layer_t>::Loss(Matrix_t &X, const Matrix_t &Y, const Matrix_t &weights,

                                                bool applyDropout, bool includeRegularization) -> Scalar_t

{

   Forward(X, applyDropout);

   return Loss(Y, weights, includeRegularization);

}


//______________________________________________________________________________

template<typename Architecture_t, typename Layer_t>


   inline void TNet<Architecture_t, Layer_t>::Prediction(Matrix_t &Yhat,

                                                         Matrix_t &X,

                                                         EOutputFunction f)

{

   Forward(X, false);

   evaluate<Architecture_t>(Yhat, f, fLayers.back().GetOutput());

}


//______________________________________________________________________________

template<typename Architecture_t, typename Layer_t>


   inline void TNet<Architecture_t, Layer_t>::Prediction(Matrix_t &Y_hat,

                                                         EOutputFunction f) const

{

   evaluate<Architecture_t>(Y_hat, f, fLayers.back().GetOutput());

}


//______________________________________________________________________________

template<typename Architecture_t, typename Layer_t>


auto TNet<Architecture_t, Layer_t>::GetNFlops()

   -> Scalar_t

{

   Scalar_t flops = 0;


   Scalar_t nb  = (Scalar_t) fBatchSize;

   Scalar_t nlp = (Scalar_t) fInputWidth;


   for(size_t i = 0; i < fLayers.size(); i++) {

      Layer_t & layer = fLayers[i];

      Scalar_t nl = (Scalar_t) layer.GetWidth();


      // Forward propagation.

      flops += nb * nl * (2.0 * nlp - 1); // Matrix mult.

      flops += nb * nl;                   // Add bias values.

      flops += 2 * nb * nl;               // Apply activation function and compute

                                          // derivative.

      // Backward propagation.

      flops += nb * nl;                      // Hadamard

      flops += nlp * nl * (2.0 * nb - 1.0);  // Weight gradients

      flops += nl * (nb - 1);                // Bias gradients

      if (i > 0) {

         flops += nlp * nb * (2.0 * nl  - 1.0); // Previous layer gradients.

      }

      nlp = nl;

   }

   return flops;

}


//______________________________________________________________________________

template<typename Architecture_t, typename Layer_t>


void TNet<Architecture_t, Layer_t>::SetDropoutProbabilities(

    const std::vector<Double_t> & probabilities)

{

   for (size_t i = 0; i < fLayers.size(); i++) {

      if (i < probabilities.size()) {

         fLayers[i].SetDropoutProbability(probabilities[i]);

      } else {

         fLayers[i].SetDropoutProbability(1.0);

      }

   }

}


//______________________________________________________________________________

template<typename Architecture_t, typename Layer_t>


   void TNet<Architecture_t, Layer_t>::Print()

{

   std::cout << "DEEP NEURAL NETWORK:";

   std::cout << " Loss function = " << static_cast<char>(fJ);

   std::cout << ", Depth = " << fLayers.size() << std::endl;


   size_t i = 1;

   for (auto & l : fLayers) {

      std::cout << "DNN Layer " << i << ":" << std::endl;

      l.Print();

      i++;

   }


}


} // namespace DNN

} // namespace TMVA


#endif

kNone
const Handle_t kNone
Definition GuiTypes.h:89

Layer.h

f
#define f(i)
Definition RSha256.hxx:104

R
#define R(a, b, c, d, e, f, g, h, i)
Definition RSha256.hxx:110

initialize
void initialize()

evaluate
double evaluate() const override

X
#define X(type, name)

input
Option_t Option_t TPoint TPoint const char GetTextMagnitude GetFillStyle GetLineColor GetLineWidth GetMarkerStyle GetTextAlign GetTextColor GetTextSize void input
Definition TGWin32VirtualXProxy.cxx:142

width
Option_t Option_t width
Definition TGWin32VirtualXProxy.cxx:56

TMVA::DNN::TNet::SetWeightDecay
void SetWeightDecay(Scalar_t weightDecay)
Definition Net.h:152

TMVA::DNN::TNet::Print
void Print()
Definition Net.h:392

TMVA::DNN::TNet::Initialize
void Initialize(EInitialization m)
Initialize the weights in the net with the initialization method.
Definition Net.h:254

TMVA::DNN::TNet::Loss
Scalar_t Loss(const Matrix_t &Y, const Matrix_t &weights, bool includeRegularization=true) const
Evaluate the loss function of the net using the activations that are currently stored in the output l...
Definition Net.h:305

TMVA::DNN::TNet::SetRegularization
void SetRegularization(ERegularization R)
Definition Net.h:150

TMVA::DNN::TNet::fInputWidth
size_t fInputWidth
Number of features in a single input event.
Definition Net.h:58

TMVA::DNN::TNet::LayersEnd
LayerIterator_t LayersEnd()
Iterator to the last layer of the net.
Definition Net.h:98

TMVA::DNN::TNet::GetOutputWidth
size_t GetOutputWidth() const
Definition Net.h:144

TMVA::DNN::TNet::GetLayer
const Layer_t & GetLayer(size_t i) const
Definition Net.h:140

TMVA::DNN::TNet::Matrix_t
typename Architecture_t::Matrix_t Matrix_t
Definition Net.h:52

TMVA::DNN::TNet::fBatchSize
size_t fBatchSize
Batch size for training and evaluation of the Network.
Definition Net.h:57

TMVA::DNN::TNet::fJ
ELossFunction fJ
The loss function of the network.
Definition Net.h:63

TMVA::DNN::TNet::Forward
void Forward(Matrix_t &X, bool applyDropout=false)
Forward a given input through the neural net.
Definition Net.h:273

TMVA::DNN::TNet::GetRegularization
ERegularization GetRegularization() const
Definition Net.h:145

TMVA::DNN::TNet::Prediction
void Prediction(Matrix_t &Y_hat, Matrix_t &X, EOutputFunction f)
Compute the neural network prediction obtained from forwarding the batch X through the neural network...
Definition Net.h:329

TMVA::DNN::TNet::SetBatchSize
void SetBatchSize(size_t batchSize)
Definition Net.h:148

TMVA::DNN::TNet::TNet
TNet()
Definition Net.h:160

TMVA::DNN::TNet::InitializeGradients
void InitializeGradients()
Initialize the gradients in the net to zero.
Definition Net.h:263

TMVA::DNN::TNet::fWeightDecay
Scalar_t fWeightDecay
The weight decay factor.
Definition Net.h:65

TMVA::DNN::TNet::fR
ERegularization fR
The regularization used for the network.
Definition Net.h:64

TMVA::DNN::TNet::LayersBegin
LayerIterator_t LayersBegin()
Iterator to the first layer of the net.
Definition Net.h:95

TMVA::DNN::TNet::CreateClone
TNet< Architecture_t, TSharedLayer< Architecture_t > > CreateClone(size_t batchSize)
Create a clone that uses the same weight and biases matrices but potentially a difference batch size.
Definition Net.h:212

TMVA::DNN::TNet::GetOutput
Matrix_t & GetOutput()
Definition Net.h:142

TMVA::DNN::TNet::Backward
void Backward(const Matrix_t &X, const Matrix_t &Y, const Matrix_t &weights)
Compute the weight gradients in the net from the given training samples X and training labels Y.
Definition Net.h:285

TMVA::DNN::TNet::GetLossFunction
ELossFunction GetLossFunction() const
Definition Net.h:141

TMVA::DNN::TNet::fDummy
Matrix_t fDummy
Empty matrix for last step in back propagation.
Definition Net.h:62

TMVA::DNN::TNet::GetNFlops
Scalar_t GetNFlops()
Definition Net.h:347

TMVA::DNN::TNet::GetBatchSize
size_t GetBatchSize() const
Definition Net.h:138

TMVA::DNN::TNet::GetDepth
size_t GetDepth() const
Definition Net.h:137

TMVA::DNN::TNet::SetDropoutProbabilities
void SetDropoutProbabilities(const std::vector< Double_t > &probabilities)
Definition Net.h:378

TMVA::DNN::TNet::SetLossFunction
void SetLossFunction(ELossFunction J)
Definition Net.h:151

TMVA::DNN::TNet::Clear
void Clear()
Remove all layers from the network.
Definition Net.h:239

TMVA::DNN::TNet::AddLayer
void AddLayer(SharedLayer &layer)
Add a layer which shares its weights with another TNet instance.

TMVA::DNN::TNet::AddLayer
void AddLayer(size_t width, EActivationFunction f, Scalar_t dropoutProbability=1.0)
Add a layer of the given size to the neural net.
Definition Net.h:225

TMVA::DNN::TNet::SetInputWidth
void SetInputWidth(size_t inputWidth)
Definition Net.h:149

TMVA::DNN::TNet::GetInputWidth
size_t GetInputWidth() const
Definition Net.h:143

TMVA::DNN::TNet::LayerIterator_t
typename std::vector< Layer_t >::iterator LayerIterator_t
Definition Net.h:54

TMVA::DNN::TNet::Scalar_t
typename Architecture_t::Scalar_t Scalar_t
Definition Net.h:53

TMVA::DNN::TNet::GetWeightDecay
Scalar_t GetWeightDecay() const
Definition Net.h:146

TMVA::DNN::TNet::fLayers
std::vector< Layer_t > fLayers
Layers in the network.
Definition Net.h:60

TMVA::DNN::TNet::GetLayer
Layer_t & GetLayer(size_t i)
Definition Net.h:139

TMatrixT
TMatrixT.
Definition TMatrixT.h:40

for
for(Int_t i=0;i< n;i++)
Definition legend1.C:18

TMVA::DNN
Definition Adadelta.h:36

TMVA::DNN::EInitialization
EInitialization
Definition Functions.h:72

TMVA::DNN::EInitialization::kZero
@ kZero
Definition Functions.h:76

TMVA::DNN::EOutputFunction
EOutputFunction
Enum that represents output functions.
Definition Functions.h:46

TMVA::DNN::weightDecay
double weightDecay(double error, ItWeight itWeight, ItWeight itWeightEnd, double factorWeightDecay, EnumRegularization eRegularization)
compute the weight decay for regularization (L1 or L2)
Definition NeuralNet.icc:498

TMVA::DNN::regularization
auto regularization(const typename Architecture_t::Matrix_t &A, ERegularization R) -> decltype(Architecture_t::L1Regularization(A))
Evaluate the regularization functional for a given weight matrix.
Definition Functions.h:238

TMVA::DNN::ERegularization
ERegularization
Enum representing the regularization type applied for a given layer.
Definition Functions.h:65

TMVA::DNN::ERegularization::kNone
@ kNone
Definition Functions.h:66

TMVA::DNN::EActivationFunction
EActivationFunction
Enum that represents layer activation functions.
Definition Functions.h:32

TMVA::DNN::ELossFunction
ELossFunction
Enum that represents objective functions for the net, i.e.
Definition Functions.h:57

TMVA::DNN::ELossFunction::kMeanSquaredError
@ kMeanSquaredError
Definition Functions.h:59

TMVA::DNN::evaluateGradients
void evaluateGradients(typename Architecture_t::Matrix_t &dY, ELossFunction f, const typename Architecture_t::Matrix_t &Y, const typename Architecture_t::Matrix_t &output, const typename Architecture_t::Matrix_t &weights)
Compute the gradient of the given output function f for given activations output of the output layer ...
Definition Functions.h:215

TMVA
create variable transformations
Definition GeneticMinimizer.h:22

m
TMarker m
Definition textangle.C:8

l
TLine l
Definition textangle.C:4