doc/hackathon/GRULayer_8h_source.html

// @(#)root/tmva/tmva/dnn/gru:$Id$

// Author: Surya S Dwivedi 03/07/19


/**********************************************************************************

 * Project: TMVA - a Root-integrated toolkit for multivariate data analysis       *

 * Package: TMVA                                                                  *

 * Class : BasicGRULayer                                                         *

 *                                                                                *

 * Description:                                                                   *

 *       NeuralNetwork                                                            *

 *                                                                                *

 * Authors (alphabetical):                                                        *

 *       Surya S Dwivedi  <surya2191997@gmail.com> - IIT Kharagpur, India         *

 *                                                                                *

 * Copyright (c) 2005-2019:                                                       *

 * All rights reserved.                                                           *

 *       CERN, Switzerland                                                        *

 *                                                                                *

 * For the licensing terms see $ROOTSYS/LICENSE.                                  *

 * For the list of contributors see $ROOTSYS/README/CREDITS.                      *

 **********************************************************************************/


//#pragma once


//////////////////////////////////////////////////////////////////////

// This class implements the GRU layer. GRU is a variant of vanilla

// RNN which is capable of learning long range dependencies.

//////////////////////////////////////////////////////////////////////


#ifndef TMVA_DNN_GRU_LAYER

#define TMVA_DNN_GRU_LAYER


#include <cmath>

#include <iostream>

#include <vector>


#include "TMatrix.h"

#include "TMVA/DNN/Functions.h"


namespace TMVA

{

namespace DNN

{

namespace RNN

{


//______________________________________________________________________________

//

// Basic GRU Layer

//______________________________________________________________________________


/** \class BasicGRULayer

      Generic implementation

*/

template<typename Architecture_t>


      class TBasicGRULayer : public VGeneralLayer<Architecture_t>

{


public:


   using Matrix_t = typename Architecture_t::Matrix_t;

   using Scalar_t = typename Architecture_t::Scalar_t;

   using Tensor_t = typename Architecture_t::Tensor_t;


   using LayerDescriptor_t = typename Architecture_t::RecurrentDescriptor_t;

   using WeightsDescriptor_t = typename Architecture_t::FilterDescriptor_t;

   using TensorDescriptor_t = typename Architecture_t::TensorDescriptor_t;

   using HelperDescriptor_t = typename Architecture_t::DropoutDescriptor_t;


   using RNNWorkspace_t = typename Architecture_t::RNNWorkspace_t;

   using RNNDescriptors_t = typename Architecture_t::RNNDescriptors_t;


private:


   size_t fStateSize;                           ///< Hidden state size for GRU

   size_t fTimeSteps;                           ///< Timesteps for GRU


   bool fRememberState;                         ///< Remember state in next pass

   bool fReturnSequence = false;                ///< Return in output full sequence or just last element

   bool fResetGateAfter = false;                ///< GRU variant to Apply the reset gate multiplication afterwards (used by cuDNN)


   DNN::EActivationFunction fF1;                ///< Activation function: sigmoid

   DNN::EActivationFunction fF2;                ///< Activation function: tanh


   Matrix_t fResetValue;                        ///< Computed reset gate values

   Matrix_t fUpdateValue;                       ///< Computed forget gate values

   Matrix_t fCandidateValue;                    ///< Computed candidate values

   Matrix_t fState;                             ///< Hidden state of GRU


   Matrix_t &fWeightsResetGate;                 ///< Reset Gate weights for input, fWeights[0]

   Matrix_t &fWeightsResetGateState;            ///< Input Gate weights for prev state, fWeights[1]

   Matrix_t &fResetGateBias;                    ///< Input Gate bias


   Matrix_t &fWeightsUpdateGate;                ///< Update Gate weights for input, fWeights[2]

   Matrix_t &fWeightsUpdateGateState;           ///< Update Gate weights for prev state, fWeights[3]

   Matrix_t &fUpdateGateBias;                   ///< Update Gate bias


   Matrix_t &fWeightsCandidate;                 ///< Candidate Gate weights for input, fWeights[4]

   Matrix_t &fWeightsCandidateState;            ///< Candidate Gate weights for prev state, fWeights[5]

   Matrix_t &fCandidateBias;                    ///< Candidate Gate bias


   std::vector<Matrix_t> reset_gate_value;      ///< Reset gate value for every time step

   std::vector<Matrix_t> update_gate_value;     ///< Update gate value for every time step

   std::vector<Matrix_t> candidate_gate_value;  ///< Candidate gate value for every time step


   std::vector<Matrix_t> fDerivativesReset;     ///< First fDerivatives of the activations reset gate

   std::vector<Matrix_t> fDerivativesUpdate;    ///< First fDerivatives of the activations update gate

   std::vector<Matrix_t> fDerivativesCandidate; ///< First fDerivatives of the activations candidate gate


   Matrix_t &fWeightsResetGradients;            ///< Gradients w.r.t the reset gate - input weights

   Matrix_t &fWeightsResetStateGradients;       ///< Gradients w.r.t the reset gate - hidden state weights

   Matrix_t &fResetBiasGradients;               ///< Gradients w.r.t the reset gate - bias weights

   Matrix_t &fWeightsUpdateGradients;           ///< Gradients w.r.t the update gate - input weights

   Matrix_t &fWeightsUpdateStateGradients;      ///< Gradients w.r.t the update gate - hidden state weights

   Matrix_t &fUpdateBiasGradients;              ///< Gradients w.r.t the update gate - bias weights

   Matrix_t &fWeightsCandidateGradients;        ///< Gradients w.r.t the candidate gate - input weights

   Matrix_t &fWeightsCandidateStateGradients;   ///< Gradients w.r.t the candidate gate - hidden state weights

   Matrix_t &fCandidateBiasGradients;           ///< Gradients w.r.t the candidate gate - bias weights


   Matrix_t fCell;                              ///< Empty matrix for GRU


   // Tensor representing all weights (used by cuDNN)

   Tensor_t fWeightsTensor;         ///< Tensor for all weights

   Tensor_t fWeightGradientsTensor; ///< Tensor for all weight gradients


   // tensors used internally for the forward and backward pass

   Tensor_t fX;  ///<  cached input tensor as T x B x I

   Tensor_t fY;  ///<  cached output tensor as T x B x S

   Tensor_t fDx; ///< cached   gradient on the input (output of backward)   as T x B x I

   Tensor_t fDy; ///< cached  activation gradient (input of backward)   as T x B x S


   TDescriptors *fDescriptors = nullptr; ///< Keeps all the RNN descriptors

   TWorkspace *fWorkspace = nullptr;     // workspace needed for GPU computation (CudNN)


public:


   /*! Constructor */

   TBasicGRULayer(size_t batchSize, size_t stateSize, size_t inputSize,

                   size_t timeSteps, bool rememberState = false, bool returnSequence = false,

                   bool resetGateAfter = false,

                   DNN::EActivationFunction f1 = DNN::EActivationFunction::kSigmoid,

                   DNN::EActivationFunction f2 = DNN::EActivationFunction::kTanh,

                   bool training = true, DNN::EInitialization fA = DNN::EInitialization::kZero);


   /*! Copy Constructor */

   TBasicGRULayer(const TBasicGRULayer &);


   /*! Initialize the weights according to the given initialization

    **  method. */

   void Initialize() override;


   /*! Initialize the hidden state and cell state method. */

   void InitState(DNN::EInitialization m = DNN::EInitialization::kZero);


   /*! Computes the next hidden state

    *  and next cell state with given input matrix. */

   void Forward(Tensor_t &input, bool isTraining = true) override;


   /*! Forward for a single cell (time unit) */

   void CellForward(Matrix_t &updateGateValues, Matrix_t &candidateValues);


   /*! Backpropagates the error. Must only be called directly at the corresponding

    *  call to Forward(...). */

   void Backward(Tensor_t &gradients_backward,

                 const Tensor_t &activations_backward) override;


   /* Updates weights and biases, given the learning rate */

   void Update(const Scalar_t learningRate);


   /*! Backward for a single time unit

    *  a the corresponding call to Forward(...). */

   Matrix_t & CellBackward(Matrix_t & state_gradients_backward,

                           const Matrix_t & precStateActivations,

                           const Matrix_t & reset_gate, const Matrix_t & update_gate,

                           const Matrix_t & candidate_gate,

                           const Matrix_t & input, Matrix_t & input_gradient,

                           Matrix_t &dr, Matrix_t &du, Matrix_t &dc);


   /*! Decides the values we'll update (NN with Sigmoid) */

   void ResetGate(const Matrix_t &input, Matrix_t &di);


   /*! Forgets the past values (NN with Sigmoid) */

   void UpdateGate(const Matrix_t &input, Matrix_t &df);


   /*! Decides the new candidate values (NN with Tanh) */

   void CandidateValue(const Matrix_t &input, Matrix_t &dc);


   /*! Prints the info about the layer */

   void Print() const override;


   /*! Writes the information and the weights about the layer in an XML node. */

   void AddWeightsXMLTo(void *parent) override;


   /*! Read the information and the weights about the layer from XML node. */

   void ReadWeightsFromXML(void *parent) override;


   /*! Getters */

   size_t GetInputSize()               const { return this->GetInputWidth(); }

   size_t GetTimeSteps()               const { return fTimeSteps; }

   size_t GetStateSize()               const { return fStateSize; }


   inline bool DoesRememberState()       const { return fRememberState; }

   inline bool DoesReturnSequence() const { return fReturnSequence; }


   inline DNN::EActivationFunction     GetActivationFunctionF1()        const { return fF1; }

   inline DNN::EActivationFunction     GetActivationFunctionF2()        const { return fF2; }


   const Matrix_t                    & GetResetGateValue()                const { return fResetValue; }

   Matrix_t                          & GetResetGateValue()                      { return fResetValue; }

   const Matrix_t                    & GetCandidateValue()                const { return fCandidateValue; }

   Matrix_t                          & GetCandidateValue()                      { return fCandidateValue; }

   const Matrix_t                    & GetUpdateGateValue()               const { return fUpdateValue; }

   Matrix_t                          & GetUpdateGateValue()                     { return fUpdateValue; }


   const Matrix_t                    & GetState()                   const { return fState; }

   Matrix_t                          & GetState()                         { return fState; }

   const Matrix_t                    &GetCell()                     const { return fCell; }

   Matrix_t                          & GetCell()                          { return  fCell; }


   const Matrix_t                    & GetWeightsResetGate()              const { return fWeightsResetGate; }

   Matrix_t                          & GetWeightsResetGate()                    { return fWeightsResetGate; }

   const Matrix_t                    & GetWeightsCandidate()              const { return fWeightsCandidate; }

   Matrix_t                          & GetWeightsCandidate()                    { return fWeightsCandidate; }

   const Matrix_t                    & GetWeightsUpdateGate()             const { return fWeightsUpdateGate; }

   Matrix_t                          & GetWeightsUpdateGate()                   { return fWeightsUpdateGate; }


   const Matrix_t                    & GetWeightsResetGateState()         const { return fWeightsResetGateState; }

   Matrix_t                          & GetWeightsResetGateState()               { return fWeightsResetGateState; }

   const Matrix_t                    & GetWeightsUpdateGateState()        const { return fWeightsUpdateGateState; }

   Matrix_t                          & GetWeightsUpdateGateState()              { return fWeightsUpdateGateState; }

   const Matrix_t                    & GetWeightsCandidateState()         const { return fWeightsCandidateState; }

   Matrix_t                          & GetWeightsCandidateState()               { return fWeightsCandidateState; }


   const std::vector<Matrix_t>       & GetDerivativesReset()              const { return fDerivativesReset; }

   std::vector<Matrix_t>             & GetDerivativesReset()                    { return fDerivativesReset; }

   const Matrix_t                    & GetResetDerivativesAt(size_t i)    const { return fDerivativesReset[i]; }

   Matrix_t                          & GetResetDerivativesAt(size_t i)           { return fDerivativesReset[i]; }

   const std::vector<Matrix_t>       & GetDerivativesUpdate()              const { return fDerivativesUpdate; }

   std::vector<Matrix_t>             & GetDerivativesUpdate()                    { return fDerivativesUpdate; }

   const Matrix_t                    & GetUpdateDerivativesAt(size_t i)    const { return fDerivativesUpdate[i]; }

   Matrix_t                          & GetUpdateDerivativesAt(size_t i)          { return fDerivativesUpdate[i]; }

   const std::vector<Matrix_t>       & GetDerivativesCandidate()           const { return fDerivativesCandidate; }

   std::vector<Matrix_t>             & GetDerivativesCandidate()                 { return fDerivativesCandidate; }

   const Matrix_t                    & GetCandidateDerivativesAt(size_t i) const { return fDerivativesCandidate[i]; }

   Matrix_t                          & GetCandidateDerivativesAt(size_t i)       { return fDerivativesCandidate[i]; }


   const std::vector<Matrix_t>       & GetResetGateTensor()              const { return reset_gate_value; }

   std::vector<Matrix_t>             & GetResetGateTensor()                    { return reset_gate_value; }

   const Matrix_t                    & GetResetGateTensorAt(size_t i)    const { return reset_gate_value[i]; }

   Matrix_t                          & GetResetGateTensorAt(size_t i)           { return reset_gate_value[i]; }

   const std::vector<Matrix_t>       & GetUpdateGateTensor()              const { return update_gate_value; }

   std::vector<Matrix_t>             & GetUpdateGateTensor()                    { return update_gate_value; }

   const Matrix_t                    & GetUpdateGateTensorAt(size_t i)    const { return update_gate_value[i]; }

   Matrix_t                          & GetUpdateGateTensorAt(size_t i)          { return update_gate_value[i]; }

   const std::vector<Matrix_t>       & GetCandidateGateTensor()           const { return candidate_gate_value; }

   std::vector<Matrix_t>             & GetCandidateGateTensor()                 { return candidate_gate_value; }

   const Matrix_t                    & GetCandidateGateTensorAt(size_t i) const { return candidate_gate_value[i]; }

   Matrix_t                          & GetCandidateGateTensorAt(size_t i)       { return candidate_gate_value[i]; }


   const Matrix_t                   & GetResetGateBias()         const { return fResetGateBias; }

   Matrix_t                         & GetResetGateBias()               { return fResetGateBias; }

   const Matrix_t                   & GetUpdateGateBias()        const { return fUpdateGateBias; }

   Matrix_t                         & GetUpdateGateBias()              { return fUpdateGateBias; }

   const Matrix_t                   & GetCandidateBias()         const { return fCandidateBias; }

   Matrix_t                         & GetCandidateBias()               { return fCandidateBias; }


   const Matrix_t                   & GetWeightsResetGradients()        const { return fWeightsResetGradients; }

   Matrix_t                         & GetWeightsResetGradients()              { return fWeightsResetGradients; }

   const Matrix_t                   & GetWeightsResetStateGradients()   const { return fWeightsResetStateGradients; }

   Matrix_t                         & GetWeightsResetStateGradients()         { return fWeightsResetStateGradients; }

   const Matrix_t                   & GetResetBiasGradients()           const { return fResetBiasGradients; }

   Matrix_t                         & GetResetBiasGradients()                 { return fResetBiasGradients; }

   const Matrix_t                   & GetWeightsUpdateGradients()      const { return fWeightsUpdateGradients; }

   Matrix_t                         & GetWeightsUpdateGradients()            { return fWeightsUpdateGradients; }

   const Matrix_t                   & GetWeigthsUpdateStateGradients() const { return fWeightsUpdateStateGradients; }

   Matrix_t                         & GetWeightsUpdateStateGradients()       { return fWeightsUpdateStateGradients; }

   const Matrix_t                   & GetUpdateBiasGradients()         const { return fUpdateBiasGradients; }

   Matrix_t                         & GetUpdateBiasGradients()               { return fUpdateBiasGradients; }

   const Matrix_t                   & GetWeightsCandidateGradients()      const { return fWeightsCandidateGradients; }

   Matrix_t                         & GetWeightsCandidateGradients()            { return fWeightsCandidateGradients; }

   const Matrix_t                   & GetWeightsCandidateStateGradients() const { return fWeightsCandidateStateGradients; }

   Matrix_t                         & GetWeightsCandidateStateGradients()       { return fWeightsCandidateStateGradients; }

   const Matrix_t                   & GetCandidateBiasGradients()         const { return fCandidateBiasGradients; }

   Matrix_t                         & GetCandidateBiasGradients()               { return fCandidateBiasGradients; }


   Tensor_t &GetWeightsTensor() { return fWeightsTensor; }

   const Tensor_t &GetWeightsTensor() const { return fWeightsTensor; }

   Tensor_t &GetWeightGradientsTensor() { return fWeightGradientsTensor; }

   const Tensor_t &GetWeightGradientsTensor() const { return fWeightGradientsTensor; }


   Tensor_t &GetX() { return fX; }

   Tensor_t &GetY() { return fY; }

   Tensor_t &GetDX() { return fDx; }

   Tensor_t &GetDY() { return fDy; }

};


//______________________________________________________________________________

//

// Basic GRU-Layer Implementation

//______________________________________________________________________________


template <typename Architecture_t>


TBasicGRULayer<Architecture_t>::TBasicGRULayer(size_t batchSize, size_t stateSize, size_t inputSize, size_t timeSteps,

                                               bool rememberState, bool returnSequence, bool resetGateAfter, DNN::EActivationFunction f1,

                                               DNN::EActivationFunction f2, bool /* training */,

                                               DNN::EInitialization fA)

   : VGeneralLayer<Architecture_t>(batchSize, 1, timeSteps, inputSize, 1, (returnSequence) ? timeSteps : 1, stateSize,

                                   6, {stateSize, stateSize, stateSize, stateSize, stateSize, stateSize},

                                   {inputSize, inputSize, inputSize, stateSize, stateSize, stateSize}, 3,

                                   {stateSize, stateSize, stateSize}, {1, 1, 1}, batchSize,

                                   (returnSequence) ? timeSteps : 1, stateSize, fA),

     fStateSize(stateSize), fTimeSteps(timeSteps), fRememberState(rememberState), fReturnSequence(returnSequence), fResetGateAfter(resetGateAfter),

     fF1(f1), fF2(f2), fResetValue(batchSize, stateSize), fUpdateValue(batchSize, stateSize),

     fCandidateValue(batchSize, stateSize), fState(batchSize, stateSize), fWeightsResetGate(this->GetWeightsAt(0)),

     fWeightsResetGateState(this->GetWeightsAt(3)), fResetGateBias(this->GetBiasesAt(0)),

     fWeightsUpdateGate(this->GetWeightsAt(1)), fWeightsUpdateGateState(this->GetWeightsAt(4)),

     fUpdateGateBias(this->GetBiasesAt(1)), fWeightsCandidate(this->GetWeightsAt(2)),

     fWeightsCandidateState(this->GetWeightsAt(5)), fCandidateBias(this->GetBiasesAt(2)),

     fWeightsResetGradients(this->GetWeightGradientsAt(0)), fWeightsResetStateGradients(this->GetWeightGradientsAt(3)),

     fResetBiasGradients(this->GetBiasGradientsAt(0)), fWeightsUpdateGradients(this->GetWeightGradientsAt(1)),

     fWeightsUpdateStateGradients(this->GetWeightGradientsAt(4)), fUpdateBiasGradients(this->GetBiasGradientsAt(1)),

     fWeightsCandidateGradients(this->GetWeightGradientsAt(2)),

     fWeightsCandidateStateGradients(this->GetWeightGradientsAt(5)),

     fCandidateBiasGradients(this->GetBiasGradientsAt(2))

{

   for (size_t i = 0; i < timeSteps; ++i) {

      fDerivativesReset.emplace_back(batchSize, stateSize);

      fDerivativesUpdate.emplace_back(batchSize, stateSize);

      fDerivativesCandidate.emplace_back(batchSize, stateSize);

      reset_gate_value.emplace_back(batchSize, stateSize);

      update_gate_value.emplace_back(batchSize, stateSize);

      candidate_gate_value.emplace_back(batchSize, stateSize);

   }

   Architecture_t::InitializeGRUTensors(this);

}


 //______________________________________________________________________________

template <typename Architecture_t>


TBasicGRULayer<Architecture_t>::TBasicGRULayer(const TBasicGRULayer &layer)

   : VGeneralLayer<Architecture_t>(layer),

      fStateSize(layer.fStateSize),

      fTimeSteps(layer.fTimeSteps),

      fRememberState(layer.fRememberState),

      fReturnSequence(layer.fReturnSequence),

      fResetGateAfter(layer.fResetGateAfter),

      fF1(layer.GetActivationFunctionF1()),

      fF2(layer.GetActivationFunctionF2()),

      fResetValue(layer.GetBatchSize(), layer.GetStateSize()),

      fUpdateValue(layer.GetBatchSize(), layer.GetStateSize()),

      fCandidateValue(layer.GetBatchSize(), layer.GetStateSize()),

      fState(layer.GetBatchSize(), layer.GetStateSize()),

      fWeightsResetGate(this->GetWeightsAt(0)),

      fWeightsResetGateState(this->GetWeightsAt(3)),

      fResetGateBias(this->GetBiasesAt(0)),

      fWeightsUpdateGate(this->GetWeightsAt(1)),

      fWeightsUpdateGateState(this->GetWeightsAt(4)),

      fUpdateGateBias(this->GetBiasesAt(1)),

      fWeightsCandidate(this->GetWeightsAt(2)),

      fWeightsCandidateState(this->GetWeightsAt(5)),

      fCandidateBias(this->GetBiasesAt(2)),

      fWeightsResetGradients(this->GetWeightGradientsAt(0)),

      fWeightsResetStateGradients(this->GetWeightGradientsAt(3)),

      fResetBiasGradients(this->GetBiasGradientsAt(0)),

      fWeightsUpdateGradients(this->GetWeightGradientsAt(1)),

      fWeightsUpdateStateGradients(this->GetWeightGradientsAt(4)),

      fUpdateBiasGradients(this->GetBiasGradientsAt(1)),

      fWeightsCandidateGradients(this->GetWeightGradientsAt(2)),

      fWeightsCandidateStateGradients(this->GetWeightGradientsAt(5)),

      fCandidateBiasGradients(this->GetBiasGradientsAt(2))

{

   for (size_t i = 0; i < fTimeSteps; ++i) {

      fDerivativesReset.emplace_back(layer.GetBatchSize(), layer.GetStateSize());

      Architecture_t::Copy(fDerivativesReset[i], layer.GetResetDerivativesAt(i));


      fDerivativesUpdate.emplace_back(layer.GetBatchSize(), layer.GetStateSize());

      Architecture_t::Copy(fDerivativesUpdate[i], layer.GetUpdateDerivativesAt(i));


      fDerivativesCandidate.emplace_back(layer.GetBatchSize(), layer.GetStateSize());

      Architecture_t::Copy(fDerivativesCandidate[i], layer.GetCandidateDerivativesAt(i));


      reset_gate_value.emplace_back(layer.GetBatchSize(), layer.GetStateSize());

      Architecture_t::Copy(reset_gate_value[i], layer.GetResetGateTensorAt(i));


      update_gate_value.emplace_back(layer.GetBatchSize(), layer.GetStateSize());

      Architecture_t::Copy(update_gate_value[i], layer.GetUpdateGateTensorAt(i));


      candidate_gate_value.emplace_back(layer.GetBatchSize(), layer.GetStateSize());

      Architecture_t::Copy(candidate_gate_value[i], layer.GetCandidateGateTensorAt(i));

   }


   // Gradient matrices not copied

   Architecture_t::Copy(fState, layer.GetState());


   // Copy each gate values.

   Architecture_t::Copy(fResetValue, layer.GetResetGateValue());

   Architecture_t::Copy(fCandidateValue, layer.GetCandidateValue());

   Architecture_t::Copy(fUpdateValue, layer.GetUpdateGateValue());


   Architecture_t::InitializeGRUTensors(this);

}


//______________________________________________________________________________

template <typename Architecture_t>


void TBasicGRULayer<Architecture_t>::Initialize()

{

   VGeneralLayer<Architecture_t>::Initialize();


   Architecture_t::InitializeGRUDescriptors(fDescriptors, this);

   Architecture_t::InitializeGRUWorkspace(fWorkspace, fDescriptors, this);


   //cuDNN only supports resetGate after

   if (Architecture_t::IsCudnn())

      fResetGateAfter = true;

}


//______________________________________________________________________________

template <typename Architecture_t>


auto inline TBasicGRULayer<Architecture_t>::ResetGate(const Matrix_t &input, Matrix_t &dr)

-> void

{

   /*! Computes reset gate values according to equation:

    *  input = act(W_input . input + W_state . state + bias)

    *  activation function: sigmoid. */

   const DNN::EActivationFunction fRst = this->GetActivationFunctionF1();

   Matrix_t tmpState(fResetValue.GetNrows(), fResetValue.GetNcols());

   Architecture_t::MultiplyTranspose(tmpState, fState, fWeightsResetGateState);

   Architecture_t::MultiplyTranspose(fResetValue, input, fWeightsResetGate);

   Architecture_t::ScaleAdd(fResetValue, tmpState);

   Architecture_t::AddRowWise(fResetValue, fResetGateBias);

   DNN::evaluateDerivativeMatrix<Architecture_t>(dr, fRst, fResetValue);

   DNN::evaluateMatrix<Architecture_t>(fResetValue, fRst);

}


 //______________________________________________________________________________

template <typename Architecture_t>


auto inline TBasicGRULayer<Architecture_t>::UpdateGate(const Matrix_t &input, Matrix_t &du)

-> void

{

   /*! Computes update gate values according to equation:

    *  forget = act(W_input . input + W_state . state + bias)

    *  activation function: sigmoid. */

   const DNN::EActivationFunction fUpd = this->GetActivationFunctionF1();

   Matrix_t tmpState(fUpdateValue.GetNrows(), fUpdateValue.GetNcols());

   Architecture_t::MultiplyTranspose(tmpState, fState, fWeightsUpdateGateState);

   Architecture_t::MultiplyTranspose(fUpdateValue, input, fWeightsUpdateGate);

   Architecture_t::ScaleAdd(fUpdateValue, tmpState);

   Architecture_t::AddRowWise(fUpdateValue, fUpdateGateBias);

   DNN::evaluateDerivativeMatrix<Architecture_t>(du, fUpd, fUpdateValue);

   DNN::evaluateMatrix<Architecture_t>(fUpdateValue, fUpd);

}


 //______________________________________________________________________________

template <typename Architecture_t>


auto inline TBasicGRULayer<Architecture_t>::CandidateValue(const Matrix_t &input, Matrix_t &dc)

-> void

{

   /*!

        vanilla GRU:

        candidate_value = act(W_input . input + W_state . (reset*state) + bias)


        but CuDNN uses reset_after variant that is faster (with bias mode = input)

        (apply reset gate multiplication after matrix multiplication)

        candidate_value = act(W_input . input + reset * (W_state . state) + bias


        activation function = tanh.


    */


   const DNN::EActivationFunction fCan = this->GetActivationFunctionF2();

   Matrix_t tmp(fCandidateValue.GetNrows(), fCandidateValue.GetNcols());

   if (!fResetGateAfter) {

      Matrix_t tmpState(fResetValue); // I think here tmpState uses fResetValue buffer

      Architecture_t::Hadamard(tmpState, fState);

      Architecture_t::MultiplyTranspose(tmp, tmpState, fWeightsCandidateState);

   } else {

      // variant GRU used in cuDNN slightly faster

      Architecture_t::MultiplyTranspose(tmp, fState, fWeightsCandidateState);

      Architecture_t::Hadamard(tmp, fResetValue);

   }

   Architecture_t::MultiplyTranspose(fCandidateValue, input, fWeightsCandidate);

   Architecture_t::ScaleAdd(fCandidateValue, tmp);

   Architecture_t::AddRowWise(fCandidateValue, fCandidateBias);

   DNN::evaluateDerivativeMatrix<Architecture_t>(dc, fCan, fCandidateValue);

   DNN::evaluateMatrix<Architecture_t>(fCandidateValue, fCan);

}


 //______________________________________________________________________________

template <typename Architecture_t>


auto inline TBasicGRULayer<Architecture_t>::Forward(Tensor_t &input, bool isTraining )

-> void

{

   // for Cudnn

   if (Architecture_t::IsCudnn()) {


      // input size is stride[1] of input tensor that is B x T x inputSize

      assert(input.GetStrides()[1] == this->GetInputSize());


      Tensor_t &x = this->fX;

      Tensor_t &y = this->fY;

      Architecture_t::Rearrange(x, input);


      //const auto &weights = this->GetWeightsAt(0);

      const auto &weights = this->GetWeightsTensor();


      auto &hx = this->fState;

      auto &cx = this->fCell;

      // use same for hy and cy

      auto &hy = this->fState;

      auto &cy = this->fCell;


      auto & rnnDesc = static_cast<RNNDescriptors_t &>(*fDescriptors);

      auto & rnnWork = static_cast<RNNWorkspace_t &>(*fWorkspace);


      Architecture_t::RNNForward(x, hx, cx, weights, y, hy, cy, rnnDesc, rnnWork, isTraining);


      if (fReturnSequence) {

         Architecture_t::Rearrange(this->GetOutput(), y); // swap B and T from y to Output

      } else {

         // tmp is a reference to y (full cudnn output)

         Tensor_t tmp = (y.At(y.GetShape()[0] - 1)).Reshape({y.GetShape()[1], 1, y.GetShape()[2]});

         Architecture_t::Copy(this->GetOutput(), tmp);

      }


      return;

   }


   // D : input size

   // H : state size

   // T : time size

   // B : batch size


   Tensor_t arrInput ( fTimeSteps, this->GetBatchSize(), this->GetInputWidth());

   // for (size_t t = 0; t < fTimeSteps; ++t) {

   //    arrInput.emplace_back(this->GetBatchSize(), this->GetInputWidth()); // T x B x D

   // }

   Architecture_t::Rearrange(arrInput, input); // B x T x D


   Tensor_t arrOutput ( fTimeSteps, this->GetBatchSize(), fStateSize );

   // for (size_t t = 0; t < fTimeSteps;++t) {

   //    arrOutput.emplace_back(this->GetBatchSize(), fStateSize); // T x B x H

   // }


   if (!this->fRememberState) {

      InitState(DNN::EInitialization::kZero);

   }


   /*! Pass each gate values to CellForward() to calculate

    *  next hidden state and next cell state. */

   for (size_t t = 0; t < fTimeSteps; ++t) {

      /* Feed forward network: value of each gate being computed at each timestep t. */

      ResetGate(arrInput[t], fDerivativesReset[t]);

      Architecture_t::Copy(this->GetResetGateTensorAt(t), fResetValue);

      UpdateGate(arrInput[t], fDerivativesUpdate[t]);

      Architecture_t::Copy(this->GetUpdateGateTensorAt(t), fUpdateValue);


      CandidateValue(arrInput[t], fDerivativesCandidate[t]);

      Architecture_t::Copy(this->GetCandidateGateTensorAt(t), fCandidateValue);


      CellForward(fUpdateValue, fCandidateValue);


      // Architecture_t::PrintTensor(Tensor_t(fState), "state output");


      Matrix_t arrOutputMt = arrOutput[t];

      Architecture_t::Copy(arrOutputMt, fState);

   }


   if (fReturnSequence)

      Architecture_t::Rearrange(this->GetOutput(), arrOutput); // B x T x D

   else {

      // get T[end[]]

      Tensor_t tmp = arrOutput.At(fTimeSteps - 1); // take last time step

      // shape of tmp is  for CPU (column wise) B x D ,   need to reshape to  make a B x D x 1

      //  and transpose it to 1 x D x B  (this is how output is expected in columnmajor format)

      tmp = tmp.Reshape({tmp.GetShape()[0], tmp.GetShape()[1], 1});

      assert(tmp.GetSize() == this->GetOutput().GetSize());

      assert(tmp.GetShape()[0] == this->GetOutput().GetShape()[2]); // B is last dim in output and first in tmp

      Architecture_t::Rearrange(this->GetOutput(), tmp);

      // keep array output

      fY = arrOutput;

   }

}


//______________________________________________________________________________

template <typename Architecture_t>


auto inline TBasicGRULayer<Architecture_t>::CellForward(Matrix_t &updateGateValues, Matrix_t &candidateValues)

-> void

{

   Architecture_t::Hadamard(fState, updateGateValues);


   // this will reuse content of updateGateValues

   Matrix_t tmp(updateGateValues); // H X 1

   for (size_t j = 0; j < (size_t) tmp.GetNcols(); j++) {

      for (size_t i = 0; i < (size_t) tmp.GetNrows(); i++) {

         tmp(i,j) = 1 - tmp(i,j);

      }

   }


   // Update state

   Architecture_t::Hadamard(candidateValues, tmp);

   Architecture_t::ScaleAdd(fState, candidateValues);

}


//____________________________________________________________________________

template <typename Architecture_t>


auto inline TBasicGRULayer<Architecture_t>::Backward(Tensor_t &gradients_backward,           // B x T x D

                                                      const Tensor_t &activations_backward)   // B x T x D

-> void

{

   // BACKWARD for CUDNN

   if (Architecture_t::IsCudnn()) {


      Tensor_t &x = this->fX;

      Tensor_t &y = this->fY;

      Tensor_t &dx = this->fDx;

      Tensor_t &dy = this->fDy;


      // input size is stride[1] of input tensor that is B x T x inputSize

      assert(activations_backward.GetStrides()[1] == this->GetInputSize());


      Architecture_t::Rearrange(x, activations_backward);


      if (!fReturnSequence) {


         // Architecture_t::InitializeZero(dy);

         Architecture_t::InitializeZero(dy);


         // Tensor_t tmp1 = y.At(y.GetShape()[0] - 1).Reshape({y.GetShape()[1], 1, y.GetShape()[2]});

         Tensor_t tmp2 = dy.At(dy.GetShape()[0] - 1).Reshape({dy.GetShape()[1], 1, dy.GetShape()[2]});


         // Architecture_t::Copy(tmp1, this->GetOutput());

         Architecture_t::Copy(tmp2, this->GetActivationGradients());

      } else {

         Architecture_t::Rearrange(y, this->GetOutput());

         Architecture_t::Rearrange(dy, this->GetActivationGradients());

      }


      // Architecture_t::PrintTensor(this->GetOutput(), "output before bwd");


      // for cudnn Matrix_t and Tensor_t are same type

      const auto &weights = this->GetWeightsTensor();

      auto &weightGradients = this->GetWeightGradientsTensor();


      // note that cudnnRNNBackwardWeights accumulate the weight gradients.

      // We need then to initialize the tensor to zero every time

      Architecture_t::InitializeZero(weightGradients);


      // hx is fState

      auto &hx = this->GetState();

      auto &cx = this->GetCell();

      // use same for hy and cy

      auto &dhy = hx;

      auto &dcy = cx;

      auto &dhx = hx;

      auto &dcx = cx;


      auto & rnnDesc = static_cast<RNNDescriptors_t &>(*fDescriptors);

      auto & rnnWork = static_cast<RNNWorkspace_t &>(*fWorkspace);


      Architecture_t::RNNBackward(x, hx, cx, y, dy, dhy, dcy, weights, dx, dhx, dcx, weightGradients, rnnDesc, rnnWork);


      // Architecture_t::PrintTensor(this->GetOutput(), "output after bwd");


      if (gradients_backward.GetSize() != 0)

         Architecture_t::Rearrange(gradients_backward, dx);


      return;

   }


   // gradients_backward is activationGradients of layer before it, which is input layer.

   // Currently, gradients_backward is for input(x) and not for state.

   // For the state it can be:

   Matrix_t state_gradients_backward(this->GetBatchSize(), fStateSize); // B x H

   DNN::initialize<Architecture_t>(state_gradients_backward, DNN::EInitialization::kZero); // B x H


   // if dummy is false gradients_backward will be written back on the matrix

   bool dummy = false;

   if (gradients_backward.GetSize() == 0 || gradients_backward[0].GetNrows() == 0 || gradients_backward[0].GetNcols() == 0) {

      dummy = true;

   }


   Tensor_t arr_gradients_backward ( fTimeSteps, this->GetBatchSize(), this->GetInputSize());


   //Architecture_t::Rearrange(arr_gradients_backward, gradients_backward); // B x T x D

   // activations_backward is input.

   Tensor_t arr_activations_backward ( fTimeSteps, this->GetBatchSize(), this->GetInputSize());


   Architecture_t::Rearrange(arr_activations_backward, activations_backward); // B x T x D


   /*! For backpropagation, we need to calculate loss. For loss, output must be known.

    *  We obtain outputs during forward propagation and place the results in arr_output tensor. */

   Tensor_t arr_output ( fTimeSteps, this->GetBatchSize(), fStateSize);


   Matrix_t initState(this->GetBatchSize(), fStateSize); // B x H

   DNN::initialize<Architecture_t>(initState, DNN::EInitialization::kZero); // B x H


   // This will take partial derivative of state[t] w.r.t state[t-1]

   Tensor_t arr_actgradients ( fTimeSteps, this->GetBatchSize(), fStateSize);


   if (fReturnSequence) {

      Architecture_t::Rearrange(arr_output, this->GetOutput());

      Architecture_t::Rearrange(arr_actgradients, this->GetActivationGradients());

   } else {

      //

      arr_output = fY;

      Architecture_t::InitializeZero(arr_actgradients);

      // need to reshape to pad a time dimension = 1 (note here is columnmajor tensors)

      Tensor_t tmp_grad = arr_actgradients.At(fTimeSteps - 1).Reshape({this->GetBatchSize(), fStateSize, 1});

      assert(tmp_grad.GetSize() == this->GetActivationGradients().GetSize());

      assert(tmp_grad.GetShape()[0] ==

             this->GetActivationGradients().GetShape()[2]); // B in tmp is [0] and [2] in input act. gradients


      Architecture_t::Rearrange(tmp_grad, this->GetActivationGradients());

   }


   /*! There are total 8 different weight matrices and 4 bias vectors.

    *  Re-initialize them with zero because it should have some value. (can't be garbage values) */


   // Reset Gate.

   fWeightsResetGradients.Zero();

   fWeightsResetStateGradients.Zero();

   fResetBiasGradients.Zero();


   // Update Gate.

   fWeightsUpdateGradients.Zero();

   fWeightsUpdateStateGradients.Zero();

   fUpdateBiasGradients.Zero();


   // Candidate Gate.

   fWeightsCandidateGradients.Zero();

   fWeightsCandidateStateGradients.Zero();

   fCandidateBiasGradients.Zero();


   for (size_t t = fTimeSteps; t > 0; t--) {

      // Store the sum of gradients obtained at each timestep during backward pass.

      Architecture_t::ScaleAdd(state_gradients_backward, arr_actgradients[t-1]);

      if (t > 1) {

         const Matrix_t &prevStateActivations = arr_output[t-2];

         Matrix_t dx = arr_gradients_backward[t-1];

         // During forward propagation, each gate value calculates their gradients.

         CellBackward(state_gradients_backward, prevStateActivations,

                      this->GetResetGateTensorAt(t-1), this->GetUpdateGateTensorAt(t-1),

                      this->GetCandidateGateTensorAt(t-1),

                      arr_activations_backward[t-1], dx ,

                      fDerivativesReset[t-1], fDerivativesUpdate[t-1],

                      fDerivativesCandidate[t-1]);

      } else {

         const Matrix_t &prevStateActivations = initState;

         Matrix_t dx = arr_gradients_backward[t-1];

         CellBackward(state_gradients_backward, prevStateActivations,

                      this->GetResetGateTensorAt(t-1), this->GetUpdateGateTensorAt(t-1),

                      this->GetCandidateGateTensorAt(t-1),

                      arr_activations_backward[t-1], dx ,

                      fDerivativesReset[t-1], fDerivativesUpdate[t-1],

                      fDerivativesCandidate[t-1]);

        }

   }


   if (!dummy) {

      Architecture_t::Rearrange(gradients_backward, arr_gradients_backward );

   }


}


//______________________________________________________________________________

template <typename Architecture_t>


auto inline TBasicGRULayer<Architecture_t>::CellBackward(Matrix_t & state_gradients_backward,

                                                          const Matrix_t & precStateActivations,

                                                          const Matrix_t & reset_gate, const Matrix_t & update_gate,

                                                          const Matrix_t & candidate_gate,

                                                          const Matrix_t & input, Matrix_t & input_gradient,

                                                          Matrix_t &dr, Matrix_t &du, Matrix_t &dc)

-> Matrix_t &

{

   /*! Call here GRULayerBackward() to pass parameters i.e. gradient

    *  values obtained from each gate during forward propagation. */

   return Architecture_t::GRULayerBackward(state_gradients_backward,

                                           fWeightsResetGradients, fWeightsUpdateGradients, fWeightsCandidateGradients,

                                           fWeightsResetStateGradients, fWeightsUpdateStateGradients,

                                           fWeightsCandidateStateGradients, fResetBiasGradients, fUpdateBiasGradients,

                                           fCandidateBiasGradients, dr, du, dc,

                                           precStateActivations,

                                           reset_gate, update_gate, candidate_gate,

                                           fWeightsResetGate, fWeightsUpdateGate, fWeightsCandidate,

                                           fWeightsResetGateState, fWeightsUpdateGateState, fWeightsCandidateState,

                                           input, input_gradient, fResetGateAfter);

}


//______________________________________________________________________________

template <typename Architecture_t>


auto TBasicGRULayer<Architecture_t>::InitState(DNN::EInitialization /* m */)

-> void

{

   DNN::initialize<Architecture_t>(this->GetState(),  DNN::EInitialization::kZero);

}


 //______________________________________________________________________________

template<typename Architecture_t>


auto TBasicGRULayer<Architecture_t>::Print() const

-> void

{

   std::cout << " GRU Layer: \t ";

   std::cout << " (NInput = " << this->GetInputSize();  // input size

   std::cout << ", NState = " << this->GetStateSize();  // hidden state size

   std::cout << ", NTime  = " << this->GetTimeSteps() << " )";  // time size

   std::cout << "\tOutput = ( " << this->GetOutput().GetFirstSize() << " , " << this->GetOutput()[0].GetNrows() << " , " << this->GetOutput()[0].GetNcols() << " )\n";

}


//______________________________________________________________________________

template <typename Architecture_t>


auto inline TBasicGRULayer<Architecture_t>::AddWeightsXMLTo(void *parent)

-> void

{

   auto layerxml = gTools().xmlengine().NewChild(parent, nullptr, "GRULayer");


   // Write all other info like outputSize, cellSize, inputSize, timeSteps, rememberState

   gTools().xmlengine().NewAttr(layerxml, nullptr, "StateSize", gTools().StringFromInt(this->GetStateSize()));

   gTools().xmlengine().NewAttr(layerxml, nullptr, "InputSize", gTools().StringFromInt(this->GetInputSize()));

   gTools().xmlengine().NewAttr(layerxml, nullptr, "TimeSteps", gTools().StringFromInt(this->GetTimeSteps()));

   gTools().xmlengine().NewAttr(layerxml, nullptr, "RememberState", gTools().StringFromInt(this->DoesRememberState()));

   gTools().xmlengine().NewAttr(layerxml, nullptr, "ReturnSequence", gTools().StringFromInt(this->DoesReturnSequence()));

   gTools().xmlengine().NewAttr(layerxml, nullptr, "ResetGateAfter", gTools().StringFromInt(this->fResetGateAfter));


   // write weights and bias matrices

   this->WriteMatrixToXML(layerxml, "ResetWeights", this->GetWeightsAt(0));

   this->WriteMatrixToXML(layerxml, "ResetStateWeights", this->GetWeightsAt(1));

   this->WriteMatrixToXML(layerxml, "ResetBiases", this->GetBiasesAt(0));

   this->WriteMatrixToXML(layerxml, "UpdateWeights", this->GetWeightsAt(2));

   this->WriteMatrixToXML(layerxml, "UpdateStateWeights", this->GetWeightsAt(3));

   this->WriteMatrixToXML(layerxml, "UpdateBiases", this->GetBiasesAt(1));

   this->WriteMatrixToXML(layerxml, "CandidateWeights", this->GetWeightsAt(4));

   this->WriteMatrixToXML(layerxml, "CandidateStateWeights", this->GetWeightsAt(5));

   this->WriteMatrixToXML(layerxml, "CandidateBiases", this->GetBiasesAt(2));

}


 //______________________________________________________________________________

template <typename Architecture_t>


auto inline TBasicGRULayer<Architecture_t>::ReadWeightsFromXML(void *parent)

-> void

{

    // Read weights and biases

   this->ReadMatrixXML(parent, "ResetWeights", this->GetWeightsAt(0));

   this->ReadMatrixXML(parent, "ResetStateWeights", this->GetWeightsAt(1));

   this->ReadMatrixXML(parent, "ResetBiases", this->GetBiasesAt(0));

   this->ReadMatrixXML(parent, "UpdateWeights", this->GetWeightsAt(2));

   this->ReadMatrixXML(parent, "UpdateStateWeights", this->GetWeightsAt(3));

   this->ReadMatrixXML(parent, "UpdateBiases", this->GetBiasesAt(1));

   this->ReadMatrixXML(parent, "CandidateWeights", this->GetWeightsAt(4));

   this->ReadMatrixXML(parent, "CandidateStateWeights", this->GetWeightsAt(5));

   this->ReadMatrixXML(parent, "CandidateBiases", this->GetBiasesAt(2));

}


} // namespace GRU

} // namespace DNN

} // namespace TMVA


#endif // GRU_LAYER_H

TMatrix.h

TMVA::DNN::RNN::TBasicGRULayer::GetWeightsCandidate
const Matrix_t & GetWeightsCandidate() const
Definition GRULayer.h:224

TMVA::DNN::RNN::TBasicGRULayer::GetWeightsCandidateStateGradients
Matrix_t & GetWeightsCandidateStateGradients()
Definition GRULayer.h:286

TMVA::DNN::RNN::TBasicGRULayer::LayerDescriptor_t
typename Architecture_t::RecurrentDescriptor_t LayerDescriptor_t
Definition GRULayer.h:65

TMVA::DNN::RNN::TBasicGRULayer::Forward
void Forward(Tensor_t &input, bool isTraining=true) override
Computes the next hidden state and next cell state with given input matrix.
Definition GRULayer.h:494

TMVA::DNN::RNN::TBasicGRULayer::GetWeightsResetGate
Matrix_t & GetWeightsResetGate()
Definition GRULayer.h:223

TMVA::DNN::RNN::TBasicGRULayer::fResetBiasGradients
Matrix_t & fResetBiasGradients
Gradients w.r.t the reset gate - bias weights.
Definition GRULayer.h:114

TMVA::DNN::RNN::TBasicGRULayer::GetUpdateGateTensor
std::vector< Matrix_t > & GetUpdateGateTensor()
Definition GRULayer.h:254

TMVA::DNN::RNN::TBasicGRULayer::Tensor_t
typename Architecture_t::Tensor_t Tensor_t
Definition GRULayer.h:63

TMVA::DNN::RNN::TBasicGRULayer::reset_gate_value
std::vector< Matrix_t > reset_gate_value
Reset gate value for every time step.
Definition GRULayer.h:104

TMVA::DNN::RNN::TBasicGRULayer::CellBackward
Matrix_t & CellBackward(Matrix_t &state_gradients_backward, const Matrix_t &precStateActivations, const Matrix_t &reset_gate, const Matrix_t &update_gate, const Matrix_t &candidate_gate, const Matrix_t &input, Matrix_t &input_gradient, Matrix_t &dr, Matrix_t &du, Matrix_t &dc)
Backward for a single time unit a the corresponding call to Forward(...).
Definition GRULayer.h:776

TMVA::DNN::RNN::TBasicGRULayer::fStateSize
size_t fStateSize
Hidden state size for GRU.
Definition GRULayer.h:75

TMVA::DNN::RNN::TBasicGRULayer::GetWeightsResetGradients
const Matrix_t & GetWeightsResetGradients() const
Definition GRULayer.h:271

TMVA::DNN::RNN::TBasicGRULayer::GetUpdateBiasGradients
const Matrix_t & GetUpdateBiasGradients() const
Definition GRULayer.h:281

TMVA::DNN::RNN::TBasicGRULayer::fReturnSequence
bool fReturnSequence
Return in output full sequence or just last element.
Definition GRULayer.h:79

TMVA::DNN::RNN::TBasicGRULayer::GetWeightsResetStateGradients
const Matrix_t & GetWeightsResetStateGradients() const
Definition GRULayer.h:273

TMVA::DNN::RNN::TBasicGRULayer::fDerivativesReset
std::vector< Matrix_t > fDerivativesReset
First fDerivatives of the activations reset gate.
Definition GRULayer.h:108

TMVA::DNN::RNN::TBasicGRULayer::GetWeightsTensor
const Tensor_t & GetWeightsTensor() const
Definition GRULayer.h:291

TMVA::DNN::RNN::TBasicGRULayer::GetResetGateTensor
std::vector< Matrix_t > & GetResetGateTensor()
Definition GRULayer.h:250

TMVA::DNN::RNN::TBasicGRULayer::GetDY
Tensor_t & GetDY()
Definition GRULayer.h:298

TMVA::DNN::RNN::TBasicGRULayer::GetWeightsUpdateGateState
Matrix_t & GetWeightsUpdateGateState()
Definition GRULayer.h:232

TMVA::DNN::RNN::TBasicGRULayer::GetCandidateGateTensor
const std::vector< Matrix_t > & GetCandidateGateTensor() const
Definition GRULayer.h:257

TMVA::DNN::RNN::TBasicGRULayer::GetUpdateDerivativesAt
const Matrix_t & GetUpdateDerivativesAt(size_t i) const
Definition GRULayer.h:242

TMVA::DNN::RNN::TBasicGRULayer::GetWeightsUpdateStateGradients
Matrix_t & GetWeightsUpdateStateGradients()
Definition GRULayer.h:280

TMVA::DNN::RNN::TBasicGRULayer::Print
void Print() const override
Prints the info about the layer.
Definition GRULayer.h:809

TMVA::DNN::RNN::TBasicGRULayer::GetInputSize
size_t GetInputSize() const
Getters.
Definition GRULayer.h:200

TMVA::DNN::RNN::TBasicGRULayer::fState
Matrix_t fState
Hidden state of GRU.
Definition GRULayer.h:88

TMVA::DNN::RNN::TBasicGRULayer::GetWeightsResetGradients
Matrix_t & GetWeightsResetGradients()
Definition GRULayer.h:272

TMVA::DNN::RNN::TBasicGRULayer::GetWeightGradientsTensor
Tensor_t & GetWeightGradientsTensor()
Definition GRULayer.h:292

TMVA::DNN::RNN::TBasicGRULayer::GetCandidateBias
const Matrix_t & GetCandidateBias() const
Definition GRULayer.h:268

TMVA::DNN::RNN::TBasicGRULayer::update_gate_value
std::vector< Matrix_t > update_gate_value
Update gate value for every time step.
Definition GRULayer.h:105

TMVA::DNN::RNN::TBasicGRULayer::GetWeightsTensor
Tensor_t & GetWeightsTensor()
Definition GRULayer.h:290

TMVA::DNN::RNN::TBasicGRULayer::fX
Tensor_t fX
cached input tensor as T x B x I
Definition GRULayer.h:129

TMVA::DNN::RNN::TBasicGRULayer::GetCandidateGateTensorAt
Matrix_t & GetCandidateGateTensorAt(size_t i)
Definition GRULayer.h:260

TMVA::DNN::RNN::TBasicGRULayer::GetResetBiasGradients
Matrix_t & GetResetBiasGradients()
Definition GRULayer.h:276

TMVA::DNN::RNN::TBasicGRULayer::GetCandidateValue
Matrix_t & GetCandidateValue()
Definition GRULayer.h:213

TMVA::DNN::RNN::TBasicGRULayer::AddWeightsXMLTo
void AddWeightsXMLTo(void *parent) override
Writes the information and the weights about the layer in an XML node.
Definition GRULayer.h:821

TMVA::DNN::RNN::TBasicGRULayer::GetWeightsResetGateState
Matrix_t & GetWeightsResetGateState()
Definition GRULayer.h:230

TMVA::DNN::RNN::TBasicGRULayer::fF1
DNN::EActivationFunction fF1
Activation function: sigmoid.
Definition GRULayer.h:82

TMVA::DNN::RNN::TBasicGRULayer::GetWeightsUpdateGate
const Matrix_t & GetWeightsUpdateGate() const
Definition GRULayer.h:226

TMVA::DNN::RNN::TBasicGRULayer::GetDerivativesReset
const std::vector< Matrix_t > & GetDerivativesReset() const
Definition GRULayer.h:236

TMVA::DNN::RNN::TBasicGRULayer::GetUpdateGateBias
const Matrix_t & GetUpdateGateBias() const
Definition GRULayer.h:266

TMVA::DNN::RNN::TBasicGRULayer::fWeightsResetGradients
Matrix_t & fWeightsResetGradients
Gradients w.r.t the reset gate - input weights.
Definition GRULayer.h:112

TMVA::DNN::RNN::TBasicGRULayer::GetDerivativesUpdate
std::vector< Matrix_t > & GetDerivativesUpdate()
Definition GRULayer.h:241

TMVA::DNN::RNN::TBasicGRULayer::fCandidateBiasGradients
Matrix_t & fCandidateBiasGradients
Gradients w.r.t the candidate gate - bias weights.
Definition GRULayer.h:120

TMVA::DNN::RNN::TBasicGRULayer::fCandidateBias
Matrix_t & fCandidateBias
Candidate Gate bias.
Definition GRULayer.h:101

TMVA::DNN::RNN::TBasicGRULayer::GetUpdateGateTensorAt
Matrix_t & GetUpdateGateTensorAt(size_t i)
Definition GRULayer.h:256

TMVA::DNN::RNN::TBasicGRULayer::fF2
DNN::EActivationFunction fF2
Activation function: tanh.
Definition GRULayer.h:83

TMVA::DNN::RNN::TBasicGRULayer::GetWeightsUpdateGradients
const Matrix_t & GetWeightsUpdateGradients() const
Definition GRULayer.h:277

TMVA::DNN::RNN::TBasicGRULayer::GetWeightsCandidateGradients
Matrix_t & GetWeightsCandidateGradients()
Definition GRULayer.h:284

TMVA::DNN::RNN::TBasicGRULayer::fWeightsUpdateStateGradients
Matrix_t & fWeightsUpdateStateGradients
Gradients w.r.t the update gate - hidden state weights.
Definition GRULayer.h:116

TMVA::DNN::RNN::TBasicGRULayer::GetWeightsCandidate
Matrix_t & GetWeightsCandidate()
Definition GRULayer.h:225

TMVA::DNN::RNN::TBasicGRULayer::fWeightsUpdateGradients
Matrix_t & fWeightsUpdateGradients
Gradients w.r.t the update gate - input weights.
Definition GRULayer.h:115

TMVA::DNN::RNN::TBasicGRULayer::GetUpdateGateValue
Matrix_t & GetUpdateGateValue()
Definition GRULayer.h:215

TMVA::DNN::RNN::TBasicGRULayer::fTimeSteps
size_t fTimeSteps
Timesteps for GRU.
Definition GRULayer.h:76

TMVA::DNN::RNN::TBasicGRULayer::fDerivativesCandidate
std::vector< Matrix_t > fDerivativesCandidate
First fDerivatives of the activations candidate gate.
Definition GRULayer.h:110

TMVA::DNN::RNN::TBasicGRULayer::GetWeightGradientsTensor
const Tensor_t & GetWeightGradientsTensor() const
Definition GRULayer.h:293

TMVA::DNN::RNN::TBasicGRULayer::WeightsDescriptor_t
typename Architecture_t::FilterDescriptor_t WeightsDescriptor_t
Definition GRULayer.h:66

TMVA::DNN::RNN::TBasicGRULayer::fWeightGradientsTensor
Tensor_t fWeightGradientsTensor
Tensor for all weight gradients.
Definition GRULayer.h:126

TMVA::DNN::RNN::TBasicGRULayer::fUpdateBiasGradients
Matrix_t & fUpdateBiasGradients
Gradients w.r.t the update gate - bias weights.
Definition GRULayer.h:117

TMVA::DNN::RNN::TBasicGRULayer::GetWeightsResetStateGradients
Matrix_t & GetWeightsResetStateGradients()
Definition GRULayer.h:274

TMVA::DNN::RNN::TBasicGRULayer::GetCandidateGateTensor
std::vector< Matrix_t > & GetCandidateGateTensor()
Definition GRULayer.h:258

TMVA::DNN::RNN::TBasicGRULayer::fWeightsResetGate
Matrix_t & fWeightsResetGate
Reset Gate weights for input, fWeights[0].
Definition GRULayer.h:91

TMVA::DNN::RNN::TBasicGRULayer::GetResetDerivativesAt
const Matrix_t & GetResetDerivativesAt(size_t i) const
Definition GRULayer.h:238

TMVA::DNN::RNN::TBasicGRULayer::GetWeightsUpdateGate
Matrix_t & GetWeightsUpdateGate()
Definition GRULayer.h:227

TMVA::DNN::RNN::TBasicGRULayer::Matrix_t
typename Architecture_t::Matrix_t Matrix_t
Definition GRULayer.h:61

TMVA::DNN::RNN::TBasicGRULayer::GetCandidateGateTensorAt
const Matrix_t & GetCandidateGateTensorAt(size_t i) const
Definition GRULayer.h:259

TMVA::DNN::RNN::TBasicGRULayer::GetWeightsCandidateState
Matrix_t & GetWeightsCandidateState()
Definition GRULayer.h:234

TMVA::DNN::RNN::TBasicGRULayer::GetCandidateBiasGradients
const Matrix_t & GetCandidateBiasGradients() const
Definition GRULayer.h:287

TMVA::DNN::RNN::TBasicGRULayer::GetResetGateTensorAt
Matrix_t & GetResetGateTensorAt(size_t i)
Definition GRULayer.h:252

TMVA::DNN::RNN::TBasicGRULayer::fResetGateBias
Matrix_t & fResetGateBias
Input Gate bias.
Definition GRULayer.h:93

TMVA::DNN::RNN::TBasicGRULayer::GetResetGateTensor
const std::vector< Matrix_t > & GetResetGateTensor() const
Definition GRULayer.h:249

TMVA::DNN::RNN::TBasicGRULayer::fCell
Matrix_t fCell
Empty matrix for GRU.
Definition GRULayer.h:122

TMVA::DNN::RNN::TBasicGRULayer::candidate_gate_value
std::vector< Matrix_t > candidate_gate_value
Candidate gate value for every time step.
Definition GRULayer.h:106

TMVA::DNN::RNN::TBasicGRULayer::Scalar_t
typename Architecture_t::Scalar_t Scalar_t
Definition GRULayer.h:62

TMVA::DNN::RNN::TBasicGRULayer::GetWeigthsUpdateStateGradients
const Matrix_t & GetWeigthsUpdateStateGradients() const
Definition GRULayer.h:279

TMVA::DNN::RNN::TBasicGRULayer::GetCandidateValue
const Matrix_t & GetCandidateValue() const
Definition GRULayer.h:212

TMVA::DNN::RNN::TBasicGRULayer::GetCandidateBiasGradients
Matrix_t & GetCandidateBiasGradients()
Definition GRULayer.h:288

TMVA::DNN::RNN::TBasicGRULayer::fWeightsCandidateStateGradients
Matrix_t & fWeightsCandidateStateGradients
Gradients w.r.t the candidate gate - hidden state weights.
Definition GRULayer.h:119

TMVA::DNN::RNN::TBasicGRULayer::GetDerivativesUpdate
const std::vector< Matrix_t > & GetDerivativesUpdate() const
Definition GRULayer.h:240

TMVA::DNN::RNN::TBasicGRULayer::GetCell
const Matrix_t & GetCell() const
Definition GRULayer.h:219

TMVA::DNN::RNN::TBasicGRULayer::Initialize
void Initialize() override
Initialize the weights according to the given initialization method.
Definition GRULayer.h:409

TMVA::DNN::RNN::TBasicGRULayer::UpdateGate
void UpdateGate(const Matrix_t &input, Matrix_t &df)
Forgets the past values (NN with Sigmoid).
Definition GRULayer.h:441

TMVA::DNN::RNN::TBasicGRULayer::GetCandidateDerivativesAt
const Matrix_t & GetCandidateDerivativesAt(size_t i) const
Definition GRULayer.h:246

TMVA::DNN::RNN::TBasicGRULayer::fResetValue
Matrix_t fResetValue
Computed reset gate values.
Definition GRULayer.h:85

TMVA::DNN::RNN::TBasicGRULayer::GetActivationFunctionF2
DNN::EActivationFunction GetActivationFunctionF2() const
Definition GRULayer.h:208

TMVA::DNN::RNN::TBasicGRULayer::GetResetGateBias
Matrix_t & GetResetGateBias()
Definition GRULayer.h:265

TMVA::DNN::RNN::TBasicGRULayer::RNNWorkspace_t
typename Architecture_t::RNNWorkspace_t RNNWorkspace_t
Definition GRULayer.h:70

TMVA::DNN::RNN::TBasicGRULayer::fUpdateValue
Matrix_t fUpdateValue
Computed forget gate values.
Definition GRULayer.h:86

TMVA::DNN::RNN::TBasicGRULayer::GetResetBiasGradients
const Matrix_t & GetResetBiasGradients() const
Definition GRULayer.h:275

TMVA::DNN::RNN::TBasicGRULayer::fResetGateAfter
bool fResetGateAfter
GRU variant to Apply the reset gate multiplication afterwards (used by cuDNN).
Definition GRULayer.h:80

TMVA::DNN::RNN::TBasicGRULayer::GetWeightsCandidateGradients
const Matrix_t & GetWeightsCandidateGradients() const
Definition GRULayer.h:283

TMVA::DNN::RNN::TBasicGRULayer::GetActivationFunctionF1
DNN::EActivationFunction GetActivationFunctionF1() const
Definition GRULayer.h:207

TMVA::DNN::RNN::TBasicGRULayer::DoesReturnSequence
bool DoesReturnSequence() const
Definition GRULayer.h:205

TMVA::DNN::RNN::TBasicGRULayer::GetUpdateBiasGradients
Matrix_t & GetUpdateBiasGradients()
Definition GRULayer.h:282

TMVA::DNN::RNN::TBasicGRULayer::GetUpdateGateTensorAt
const Matrix_t & GetUpdateGateTensorAt(size_t i) const
Definition GRULayer.h:255

TMVA::DNN::RNN::TBasicGRULayer::fWeightsResetGateState
Matrix_t & fWeightsResetGateState
Input Gate weights for prev state, fWeights[1].
Definition GRULayer.h:92

TMVA::DNN::RNN::TBasicGRULayer::fWeightsUpdateGateState
Matrix_t & fWeightsUpdateGateState
Update Gate weights for prev state, fWeights[3].
Definition GRULayer.h:96

TMVA::DNN::RNN::TBasicGRULayer::GetDerivativesCandidate
const std::vector< Matrix_t > & GetDerivativesCandidate() const
Definition GRULayer.h:244

TMVA::DNN::RNN::TBasicGRULayer::fWeightsTensor
Tensor_t fWeightsTensor
Tensor for all weights.
Definition GRULayer.h:125

TMVA::DNN::RNN::TBasicGRULayer::RNNDescriptors_t
typename Architecture_t::RNNDescriptors_t RNNDescriptors_t
Definition GRULayer.h:71

TMVA::DNN::RNN::TBasicGRULayer::GetResetGateBias
const Matrix_t & GetResetGateBias() const
Definition GRULayer.h:264

TMVA::DNN::RNN::TBasicGRULayer::GetResetDerivativesAt
Matrix_t & GetResetDerivativesAt(size_t i)
Definition GRULayer.h:239

TMVA::DNN::RNN::TBasicGRULayer::GetUpdateGateValue
const Matrix_t & GetUpdateGateValue() const
Definition GRULayer.h:214

TMVA::DNN::RNN::TBasicGRULayer::GetResetGateTensorAt
const Matrix_t & GetResetGateTensorAt(size_t i) const
Definition GRULayer.h:251

TMVA::DNN::RNN::TBasicGRULayer::fDescriptors
TDescriptors * fDescriptors
Keeps all the RNN descriptors.
Definition GRULayer.h:134

TMVA::DNN::RNN::TBasicGRULayer::CellForward
void CellForward(Matrix_t &updateGateValues, Matrix_t &candidateValues)
Forward for a single cell (time unit).
Definition GRULayer.h:591

TMVA::DNN::RNN::TBasicGRULayer::GetWeightsUpdateGradients
Matrix_t & GetWeightsUpdateGradients()
Definition GRULayer.h:278

TMVA::DNN::RNN::TBasicGRULayer::fWeightsResetStateGradients
Matrix_t & fWeightsResetStateGradients
Gradients w.r.t the reset gate - hidden state weights.
Definition GRULayer.h:113

TMVA::DNN::RNN::TBasicGRULayer::fWeightsCandidateState
Matrix_t & fWeightsCandidateState
Candidate Gate weights for prev state, fWeights[5].
Definition GRULayer.h:100

TMVA::DNN::RNN::TBasicGRULayer::GetStateSize
size_t GetStateSize() const
Definition GRULayer.h:202

TMVA::DNN::RNN::TBasicGRULayer::GetDerivativesReset
std::vector< Matrix_t > & GetDerivativesReset()
Definition GRULayer.h:237

TMVA::DNN::RNN::TBasicGRULayer::fUpdateGateBias
Matrix_t & fUpdateGateBias
Update Gate bias.
Definition GRULayer.h:97

TMVA::DNN::RNN::TBasicGRULayer::Backward
void Backward(Tensor_t &gradients_backward, const Tensor_t &activations_backward) override
Backpropagates the error.
Definition GRULayer.h:611

TMVA::DNN::RNN::TBasicGRULayer::GetY
Tensor_t & GetY()
Definition GRULayer.h:296

TMVA::DNN::RNN::TBasicGRULayer::GetWeightsCandidateStateGradients
const Matrix_t & GetWeightsCandidateStateGradients() const
Definition GRULayer.h:285

TMVA::DNN::RNN::TBasicGRULayer::ResetGate
void ResetGate(const Matrix_t &input, Matrix_t &di)
Decides the values we'll update (NN with Sigmoid).
Definition GRULayer.h:423

TMVA::DNN::RNN::TBasicGRULayer::GetWeightsResetGate
const Matrix_t & GetWeightsResetGate() const
Definition GRULayer.h:222

TMVA::DNN::RNN::TBasicGRULayer::fDx
Tensor_t fDx
cached gradient on the input (output of backward) as T x B x I
Definition GRULayer.h:131

TMVA::DNN::RNN::TBasicGRULayer::GetCandidateBias
Matrix_t & GetCandidateBias()
Definition GRULayer.h:269

TMVA::DNN::RNN::TBasicGRULayer::TensorDescriptor_t
typename Architecture_t::TensorDescriptor_t TensorDescriptor_t
Definition GRULayer.h:67

TMVA::DNN::RNN::TBasicGRULayer::fRememberState
bool fRememberState
Remember state in next pass.
Definition GRULayer.h:78

TMVA::DNN::RNN::TBasicGRULayer::fWeightsCandidate
Matrix_t & fWeightsCandidate
Candidate Gate weights for input, fWeights[4].
Definition GRULayer.h:99

TMVA::DNN::RNN::TBasicGRULayer::fWeightsCandidateGradients
Matrix_t & fWeightsCandidateGradients
Gradients w.r.t the candidate gate - input weights.
Definition GRULayer.h:118

TMVA::DNN::RNN::TBasicGRULayer::fWorkspace
TWorkspace * fWorkspace
Definition GRULayer.h:135

TMVA::DNN::RNN::TBasicGRULayer::GetWeightsCandidateState
const Matrix_t & GetWeightsCandidateState() const
Definition GRULayer.h:233

TMVA::DNN::RNN::TBasicGRULayer::ReadWeightsFromXML
void ReadWeightsFromXML(void *parent) override
Read the information and the weights about the layer from XML node.
Definition GRULayer.h:848

TMVA::DNN::RNN::TBasicGRULayer::GetUpdateGateTensor
const std::vector< Matrix_t > & GetUpdateGateTensor() const
Definition GRULayer.h:253

TMVA::DNN::RNN::TBasicGRULayer::GetResetGateValue
const Matrix_t & GetResetGateValue() const
Definition GRULayer.h:210

TMVA::DNN::RNN::TBasicGRULayer::Update
void Update(const Scalar_t learningRate)

TMVA::DNN::RNN::TBasicGRULayer::fY
Tensor_t fY
cached output tensor as T x B x S
Definition GRULayer.h:130

TMVA::DNN::RNN::TBasicGRULayer::GetDX
Tensor_t & GetDX()
Definition GRULayer.h:297

TMVA::DNN::RNN::TBasicGRULayer::fCandidateValue
Matrix_t fCandidateValue
Computed candidate values.
Definition GRULayer.h:87

TMVA::DNN::RNN::TBasicGRULayer::GetState
const Matrix_t & GetState() const
Definition GRULayer.h:217

TMVA::DNN::RNN::TBasicGRULayer::GetCell
Matrix_t & GetCell()
Definition GRULayer.h:220

TMVA::DNN::RNN::TBasicGRULayer::GetState
Matrix_t & GetState()
Definition GRULayer.h:218

TMVA::DNN::RNN::TBasicGRULayer::GetUpdateGateBias
Matrix_t & GetUpdateGateBias()
Definition GRULayer.h:267

TMVA::DNN::RNN::TBasicGRULayer::InitState
void InitState(DNN::EInitialization m=DNN::EInitialization::kZero)
Initialize the hidden state and cell state method.
Definition GRULayer.h:801

TMVA::DNN::RNN::TBasicGRULayer::fDy
Tensor_t fDy
cached activation gradient (input of backward) as T x B x S
Definition GRULayer.h:132

TMVA::DNN::RNN::TBasicGRULayer::GetCandidateDerivativesAt
Matrix_t & GetCandidateDerivativesAt(size_t i)
Definition GRULayer.h:247

TMVA::DNN::RNN::TBasicGRULayer::fDerivativesUpdate
std::vector< Matrix_t > fDerivativesUpdate
First fDerivatives of the activations update gate.
Definition GRULayer.h:109

TMVA::DNN::RNN::TBasicGRULayer::GetTimeSteps
size_t GetTimeSteps() const
Definition GRULayer.h:201

TMVA::DNN::RNN::TBasicGRULayer::GetWeightsUpdateGateState
const Matrix_t & GetWeightsUpdateGateState() const
Definition GRULayer.h:231

TMVA::DNN::RNN::TBasicGRULayer::GetX
Tensor_t & GetX()
Definition GRULayer.h:295

TMVA::DNN::RNN::TBasicGRULayer::GetDerivativesCandidate
std::vector< Matrix_t > & GetDerivativesCandidate()
Definition GRULayer.h:245

TMVA::DNN::RNN::TBasicGRULayer::DoesRememberState
bool DoesRememberState() const
Definition GRULayer.h:204

TMVA::DNN::RNN::TBasicGRULayer::GetWeightsResetGateState
const Matrix_t & GetWeightsResetGateState() const
Definition GRULayer.h:229

TMVA::DNN::RNN::TBasicGRULayer::CandidateValue
void CandidateValue(const Matrix_t &input, Matrix_t &dc)
Decides the new candidate values (NN with Tanh).
Definition GRULayer.h:459

TMVA::DNN::RNN::TBasicGRULayer::TBasicGRULayer
TBasicGRULayer(size_t batchSize, size_t stateSize, size_t inputSize, size_t timeSteps, bool rememberState=false, bool returnSequence=false, bool resetGateAfter=false, DNN::EActivationFunction f1=DNN::EActivationFunction::kSigmoid, DNN::EActivationFunction f2=DNN::EActivationFunction::kTanh, bool training=true, DNN::EInitialization fA=DNN::EInitialization::kZero)
Constructor.
Definition GRULayer.h:308

TMVA::DNN::RNN::TBasicGRULayer::GetUpdateDerivativesAt
Matrix_t & GetUpdateDerivativesAt(size_t i)
Definition GRULayer.h:243

TMVA::DNN::RNN::TBasicGRULayer::fWeightsUpdateGate
Matrix_t & fWeightsUpdateGate
Update Gate weights for input, fWeights[2].
Definition GRULayer.h:95

TMVA::DNN::RNN::TBasicGRULayer::HelperDescriptor_t
typename Architecture_t::DropoutDescriptor_t HelperDescriptor_t
Definition GRULayer.h:68

TMVA::DNN::RNN::TBasicGRULayer::GetResetGateValue
Matrix_t & GetResetGateValue()
Definition GRULayer.h:211

TMVA::DNN::VGeneralLayer::GetWeightsAt
const Matrix_t & GetWeightsAt(size_t i) const
Definition GeneralLayer.h:175

TMVA::DNN::VGeneralLayer::Initialize
virtual void Initialize()
Initialize the weights and biases according to the given initialization method.
Definition GeneralLayer.h:395

TMVA::DNN::VGeneralLayer::GetOutput
const Tensor_t & GetOutput() const
Definition GeneralLayer.h:196

TMVA::DNN::VGeneralLayer::WriteMatrixToXML
void WriteMatrixToXML(void *node, const char *name, const Matrix_t &matrix)
Definition GeneralLayer.h:521

TMVA::DNN::VGeneralLayer::GetActivationGradients
const Tensor_t & GetActivationGradients() const
Definition GeneralLayer.h:199

TMVA::DNN::VGeneralLayer::GetBiasesAt
const Matrix_t & GetBiasesAt(size_t i) const
Definition GeneralLayer.h:181

TMVA::DNN::VGeneralLayer::GetBiasGradientsAt
const Matrix_t & GetBiasGradientsAt(size_t i) const
Definition GeneralLayer.h:193

TMVA::DNN::VGeneralLayer::GetBatchSize
size_t GetBatchSize() const
Getters.
Definition GeneralLayer.h:163

TMVA::DNN::VGeneralLayer::ReadMatrixXML
void ReadMatrixXML(void *node, const char *name, Matrix_t &matrix)
Definition GeneralLayer.h:544

TMVA::DNN::VGeneralLayer::GetWeightGradientsAt
const Matrix_t & GetWeightGradientsAt(size_t i) const
Definition GeneralLayer.h:187

TMVA::DNN::VGeneralLayer::VGeneralLayer
VGeneralLayer(size_t BatchSize, size_t InputDepth, size_t InputHeight, size_t InputWidth, size_t Depth, size_t Height, size_t Width, size_t WeightsNSlices, size_t WeightsNRows, size_t WeightsNCols, size_t BiasesNSlices, size_t BiasesNRows, size_t BiasesNCols, size_t OutputNSlices, size_t OutputNRows, size_t OutputNCols, EInitialization Init)
Constructor.
Definition GeneralLayer.h:239

TMVA::DNN::VGeneralLayer::GetInputWidth
size_t GetInputWidth() const
Definition GeneralLayer.h:166

TMVA::Tools::xmlengine
TXMLEngine & xmlengine()
Definition Tools.h:262

TXMLEngine::NewChild
XMLNodePointer_t NewChild(XMLNodePointer_t parent, XMLNsPointer_t ns, const char *name, const char *content=nullptr)
create new child element for parent node
Definition TXMLEngine.cxx:725

TXMLEngine::NewAttr
XMLAttrPointer_t NewAttr(XMLNodePointer_t xmlnode, XMLNsPointer_t, const char *name, const char *value)
creates new attribute for xmlnode, namespaces are not supported for attributes
Definition TXMLEngine.cxx:596

y
Double_t y[n]
Definition legend1.C:17

x
Double_t x[n]
Definition legend1.C:17

f1
TF1 * f1
Definition legend1.C:11

TMVA::DNN::RNN
Definition ContextHandles.h:93

TMVA::DNN
Definition Adadelta.h:36

TMVA::DNN::EInitialization
EInitialization
Definition Functions.h:72

TMVA::DNN::EInitialization::kZero
@ kZero
Definition Functions.h:76

TMVA::DNN::evaluateDerivativeMatrix
void evaluateDerivativeMatrix(typename Architecture_t::Matrix_t &B, EActivationFunction f, const typename Architecture_t::Matrix_t &A)
Definition Functions.h:160

TMVA::DNN::evaluateMatrix
void evaluateMatrix(typename Architecture_t::Matrix_t &A, EActivationFunction f)
Definition Functions.h:152

TMVA::DNN::EActivationFunction
EActivationFunction
Enum that represents layer activation functions.
Definition Functions.h:32

TMVA::DNN::EActivationFunction::kTanh
@ kTanh
Definition Functions.h:36

TMVA::DNN::EActivationFunction::kSigmoid
@ kSigmoid
Definition Functions.h:35

TMVA::DNN::initialize
void initialize(typename Architecture_t::Matrix_t &A, EInitialization m)
Definition Functions.h:282

TMVA
create variable transformations
Definition GeneticMinimizer.h:22

TMVA::gTools
Tools & gTools()

TMVA::DNN::TDescriptors
Definition ContextHandles.h:29

TMVA::DNN::TWorkspace
Definition ContextHandles.h:32

m
TMarker m
Definition textangle.C:8

Functions.h