doc/hackathon/LSTMLayer_8h_source.html

// @(#)root/tmva/tmva/dnn/lstm:$Id$

// Author: Surya S Dwivedi 27/05/19


/**********************************************************************************

 * Project: TMVA - a Root-integrated toolkit for multivariate data analysis       *

 * Package: TMVA                                                                  *

 * Class : BasicLSTMLayer                                                         *

 *                                                                                *

 * Description:                                                                   *

 *       NeuralNetwork                                                            *

 *                                                                                *

 * Authors (alphabetical):                                                        *

 *       Surya S Dwivedi  <surya2191997@gmail.com> - IIT Kharagpur, India         *

 *                                                                                *

 * Copyright (c) 2005-2019:                                                       *

 * All rights reserved.                                                           *

 *       CERN, Switzerland                                                        *

 *                                                                                *

 * For the licensing terms see $ROOTSYS/LICENSE.                                  *

 * For the list of contributors see $ROOTSYS/README/CREDITS.                      *

 **********************************************************************************/


//#pragma once


//////////////////////////////////////////////////////////////////////

// This class implements the LSTM layer. LSTM is a variant of vanilla

// RNN which is capable of learning long range dependencies.

//////////////////////////////////////////////////////////////////////


#ifndef TMVA_DNN_LSTM_LAYER

#define TMVA_DNN_LSTM_LAYER


#include <cmath>

#include <iostream>

#include <vector>


#include "TMatrix.h"

#include "TMVA/DNN/Functions.h"


namespace TMVA

{

namespace DNN

{

namespace RNN

{


//______________________________________________________________________________

//

// Basic LSTM Layer

//______________________________________________________________________________


/** \class BasicLSTMLayer

      Generic implementation

*/

template<typename Architecture_t>


      class TBasicLSTMLayer : public VGeneralLayer<Architecture_t>

{


public:


   using Matrix_t = typename Architecture_t::Matrix_t;

   using Scalar_t = typename Architecture_t::Scalar_t;

   using Tensor_t = typename Architecture_t::Tensor_t;


   using LayerDescriptor_t = typename Architecture_t::RecurrentDescriptor_t;

   using WeightsDescriptor_t = typename Architecture_t::FilterDescriptor_t;

   using TensorDescriptor_t = typename Architecture_t::TensorDescriptor_t;

   using HelperDescriptor_t = typename Architecture_t::DropoutDescriptor_t;


   using RNNWorkspace_t = typename Architecture_t::RNNWorkspace_t;

   using RNNDescriptors_t = typename Architecture_t::RNNDescriptors_t;


private:


   size_t fStateSize;                           ///< Hidden state size for LSTM

   size_t fCellSize;                            ///< Cell state size of LSTM

   size_t fTimeSteps;                           ///< Timesteps for LSTM


   bool fRememberState;                         ///< Remember state in next pass

   bool fReturnSequence = false;                ///< Return in output full sequence or just last element


   DNN::EActivationFunction fF1;                ///< Activation function: sigmoid

   DNN::EActivationFunction fF2;                ///< Activation function: tanh


   Matrix_t fInputValue;                        ///< Computed input gate values

   Matrix_t fCandidateValue;                    ///< Computed candidate values

   Matrix_t fForgetValue;                       ///< Computed forget gate values

   Matrix_t fOutputValue;                       ///< Computed output gate values

   Matrix_t fState;                             ///< Hidden state of LSTM

   Matrix_t fCell;                              ///< Cell state of LSTM


   Matrix_t &fWeightsInputGate;                 ///< Input Gate weights for input, fWeights[0]

   Matrix_t &fWeightsInputGateState;            ///< Input Gate weights for prev state, fWeights[1]

   Matrix_t &fInputGateBias;                    ///< Input Gate bias


   Matrix_t &fWeightsForgetGate;                ///< Forget Gate weights for input, fWeights[2]

   Matrix_t &fWeightsForgetGateState;           ///< Forget Gate weights for prev state, fWeights[3]

   Matrix_t &fForgetGateBias;                   ///< Forget Gate bias


   Matrix_t &fWeightsCandidate;                 ///< Candidate Gate weights for input, fWeights[4]

   Matrix_t &fWeightsCandidateState;            ///< Candidate Gate weights for prev state, fWeights[5]

   Matrix_t &fCandidateBias;                    ///< Candidate Gate bias


   Matrix_t &fWeightsOutputGate;                ///< Output Gate weights for input, fWeights[6]

   Matrix_t &fWeightsOutputGateState;           ///< Output Gate weights for prev state, fWeights[7]

   Matrix_t &fOutputGateBias;                   ///< Output Gate bias


   std::vector<Matrix_t> input_gate_value;      ///< input gate value for every time step

   std::vector<Matrix_t> forget_gate_value;     ///< forget gate value for every time step

   std::vector<Matrix_t> candidate_gate_value;  ///< candidate gate value for every time step

   std::vector<Matrix_t> output_gate_value;     ///< output gate value for every time step

   std::vector<Matrix_t> cell_value;            ///< cell value for every time step

   std::vector<Matrix_t> fDerivativesInput;     ///< First fDerivatives of the activations input gate

   std::vector<Matrix_t> fDerivativesForget;    ///< First fDerivatives of the activations forget gate

   std::vector<Matrix_t> fDerivativesCandidate; ///< First fDerivatives of the activations candidate gate

   std::vector<Matrix_t> fDerivativesOutput;    ///< First fDerivatives of the activations output gate


   Matrix_t &fWeightsInputGradients;            ///< Gradients w.r.t the input gate - input weights

   Matrix_t &fWeightsInputStateGradients;       ///< Gradients w.r.t the input gate - hidden state weights

   Matrix_t &fInputBiasGradients;               ///< Gradients w.r.t the input gate - bias weights

   Matrix_t &fWeightsForgetGradients;           ///< Gradients w.r.t the forget gate - input weights

   Matrix_t &fWeightsForgetStateGradients;      ///< Gradients w.r.t the forget gate - hidden state weights

   Matrix_t &fForgetBiasGradients;              ///< Gradients w.r.t the forget gate - bias weights

   Matrix_t &fWeightsCandidateGradients;        ///< Gradients w.r.t the candidate gate - input weights

   Matrix_t &fWeightsCandidateStateGradients;   ///< Gradients w.r.t the candidate gate - hidden state weights

   Matrix_t &fCandidateBiasGradients;           ///< Gradients w.r.t the candidate gate - bias weights

   Matrix_t &fWeightsOutputGradients;           ///< Gradients w.r.t the output gate - input weights

   Matrix_t &fWeightsOutputStateGradients;      ///< Gradients w.r.t the output gate - hidden state weights

   Matrix_t &fOutputBiasGradients;              ///< Gradients w.r.t the output gate - bias weights


   // Tensor representing all weights (used by cuDNN)

   Tensor_t fWeightsTensor;                     ///< Tensor for all weights

   Tensor_t fWeightGradientsTensor;             ///< Tensor for all weight gradients


   // tensors used internally for the forward and backward pass

   Tensor_t fX;  ///<  cached input tensor as T x B x I

   Tensor_t fY;  ///<  cached output tensor as T x B x S

   Tensor_t fDx; ///< cached   gradient on the input (output of backward)   as T x B x I

   Tensor_t fDy; ///< cached  activation gradient (input of backward)   as T x B x S


   TDescriptors *fDescriptors = nullptr; ///< Keeps all the RNN descriptors

   TWorkspace *fWorkspace = nullptr;     // workspace needed for GPU computation (CudNN)


public:


   /*! Constructor */

   TBasicLSTMLayer(size_t batchSize, size_t stateSize, size_t inputSize, size_t timeSteps, bool rememberState = false,

                   bool returnSequence = false,

                   DNN::EActivationFunction f1 = DNN::EActivationFunction::kSigmoid,

                   DNN::EActivationFunction f2 = DNN::EActivationFunction::kTanh, bool training = true,

                   DNN::EInitialization fA = DNN::EInitialization::kZero);


   /*! Copy Constructor */

   TBasicLSTMLayer(const TBasicLSTMLayer &);


   /*! Initialize the weights according to the given initialization

    **  method. */

   void Initialize() override;


   /*! Initialize the hidden state and cell state method. */

   void InitState(DNN::EInitialization m = DNN::EInitialization::kZero);


   /*! Computes the next hidden state

    *  and next cell state with given input matrix. */

   void Forward(Tensor_t &input, bool isTraining = true) override;


   /*! Forward for a single cell (time unit) */

   void CellForward(Matrix_t &inputGateValues, const Matrix_t &forgetGateValues,

                  const Matrix_t &candidateValues, const Matrix_t &outputGateValues);


   /*! Backpropagates the error. Must only be called directly at the corresponding

    *  call to Forward(...). */

   void Backward(Tensor_t &gradients_backward,

                 const Tensor_t &activations_backward) override;


   /* Updates weights and biases, given the learning rate */

   void Update(const Scalar_t learningRate);


   /*! Backward for a single time unit

    *  a the corresponding call to Forward(...). */

   Matrix_t & CellBackward(Matrix_t & state_gradients_backward,

                           Matrix_t & cell_gradients_backward,

                           const Matrix_t & precStateActivations, const Matrix_t & precCellActivations,

                           const Matrix_t & input_gate, const Matrix_t & forget_gate,

                           const Matrix_t & candidate_gate, const Matrix_t & output_gate,

                           const Matrix_t & input, Matrix_t & input_gradient,

                           Matrix_t &di, Matrix_t &df, Matrix_t &dc, Matrix_t &dout, size_t t);


   /*! Decides the values we'll update (NN with Sigmoid) */

   void InputGate(const Matrix_t &input, Matrix_t &di);


   /*! Forgets the past values (NN with Sigmoid) */

   void ForgetGate(const Matrix_t &input, Matrix_t &df);


   /*! Decides the new candidate values (NN with Tanh) */

   void CandidateValue(const Matrix_t &input, Matrix_t &dc);


   /*! Computes output values (NN with Sigmoid) */

   void OutputGate(const Matrix_t &input, Matrix_t &dout);


   /*! Prints the info about the layer */

   void Print() const override;


   /*! Writes the information and the weights about the layer in an XML node. */

   void AddWeightsXMLTo(void *parent) override;


   /*! Read the information and the weights about the layer from XML node. */

   void ReadWeightsFromXML(void *parent) override;


   /*! Getters */

   size_t GetInputSize()               const { return this->GetInputWidth(); }

   size_t GetTimeSteps()               const { return fTimeSteps; }

   size_t GetStateSize()               const { return fStateSize; }

   size_t GetCellSize()                const { return fCellSize; }


   inline bool DoesRememberState()       const { return fRememberState; }

   inline bool DoesReturnSequence()      const { return fReturnSequence; }


   inline DNN::EActivationFunction     GetActivationFunctionF1()        const { return fF1; }

   inline DNN::EActivationFunction     GetActivationFunctionF2()        const { return fF2; }


   const Matrix_t                    & GetInputGateValue()                const { return fInputValue; }

   Matrix_t                          & GetInputGateValue()                      { return fInputValue; }

   const Matrix_t                    & GetCandidateValue()                const { return fCandidateValue; }

   Matrix_t                          & GetCandidateValue()                      { return fCandidateValue; }

   const Matrix_t                    & GetForgetGateValue()               const { return fForgetValue; }

   Matrix_t                          & GetForgetGateValue()                     { return fForgetValue; }

   const Matrix_t                    & GetOutputGateValue()               const { return fOutputValue; }

   Matrix_t                          & GetOutputGateValue()                     { return fOutputValue; }


   const Matrix_t                    & GetState()                   const { return fState; }

   Matrix_t                          & GetState()                         { return fState; }

   const Matrix_t                    & GetCell()                    const { return fCell; }

   Matrix_t                          & GetCell()                          { return fCell; }


   const Matrix_t                    & GetWeightsInputGate()              const { return fWeightsInputGate; }

   Matrix_t                          & GetWeightsInputGate()                    { return fWeightsInputGate; }

   const Matrix_t                    & GetWeightsCandidate()              const { return fWeightsCandidate; }

   Matrix_t                          & GetWeightsCandidate()                    { return fWeightsCandidate; }

   const Matrix_t                    & GetWeightsForgetGate()             const { return fWeightsForgetGate; }

   Matrix_t                          & GetWeightsForgetGate()                   { return fWeightsForgetGate; }

   const Matrix_t                    & GetWeightsOutputGate()             const { return fWeightsOutputGate; }

   Matrix_t                          & GetWeightsOutputGate()                   { return fWeightsOutputGate; }

   const Matrix_t                    & GetWeightsInputGateState()         const { return fWeightsInputGateState; }

   Matrix_t                          & GetWeightsInputGateState()               { return fWeightsInputGateState; }

   const Matrix_t                    & GetWeightsForgetGateState()        const { return fWeightsForgetGateState; }

   Matrix_t                          & GetWeightsForgetGateState()              { return fWeightsForgetGateState; }

   const Matrix_t                    & GetWeightsCandidateState()         const { return fWeightsCandidateState; }

   Matrix_t                          & GetWeightsCandidateState()               { return fWeightsCandidateState; }

   const Matrix_t                    & GetWeightsOutputGateState()        const { return fWeightsOutputGateState; }

   Matrix_t                          & GetWeightsOutputGateState()              { return fWeightsOutputGateState; }


   const std::vector<Matrix_t>       & GetDerivativesInput()              const { return fDerivativesInput; }

   std::vector<Matrix_t>             & GetDerivativesInput()                    { return fDerivativesInput; }

   const Matrix_t                    & GetInputDerivativesAt(size_t i)    const { return fDerivativesInput[i]; }

   Matrix_t                          & GetInputDerivativesAt(size_t i)           { return fDerivativesInput[i]; }

   const std::vector<Matrix_t>       & GetDerivativesForget()              const { return fDerivativesForget; }

   std::vector<Matrix_t>             & GetDerivativesForget()                    { return fDerivativesForget; }

   const Matrix_t                    & GetForgetDerivativesAt(size_t i)    const { return fDerivativesForget[i]; }

   Matrix_t                          & GetForgetDerivativesAt(size_t i)          { return fDerivativesForget[i]; }

   const std::vector<Matrix_t>       & GetDerivativesCandidate()           const { return fDerivativesCandidate; }

   std::vector<Matrix_t>             & GetDerivativesCandidate()                 { return fDerivativesCandidate; }

   const Matrix_t                    & GetCandidateDerivativesAt(size_t i) const { return fDerivativesCandidate[i]; }

   Matrix_t                          & GetCandidateDerivativesAt(size_t i)       { return fDerivativesCandidate[i]; }

   const std::vector<Matrix_t>       & GetDerivativesOutput()              const { return fDerivativesOutput; }

   std::vector<Matrix_t>             & GetDerivativesOutput()                    { return fDerivativesOutput; }

   const Matrix_t                    & GetOutputDerivativesAt(size_t i)    const { return fDerivativesOutput[i]; }

   Matrix_t                          & GetOutputDerivativesAt(size_t i)          { return fDerivativesOutput[i]; }


   const std::vector<Matrix_t>       & GetInputGateTensor()              const { return input_gate_value; }

   std::vector<Matrix_t>             & GetInputGateTensor()                    { return input_gate_value; }

   const Matrix_t                    & GetInputGateTensorAt(size_t i)    const { return input_gate_value[i]; }

   Matrix_t                          & GetInputGateTensorAt(size_t i)           { return input_gate_value[i]; }

   const std::vector<Matrix_t>       & GetForgetGateTensor()              const { return forget_gate_value; }

   std::vector<Matrix_t>             & GetForgetGateTensor()                    { return forget_gate_value; }

   const Matrix_t                    & GetForgetGateTensorAt(size_t i)    const { return forget_gate_value[i]; }

   Matrix_t                          & GetForgetGateTensorAt(size_t i)          { return forget_gate_value[i]; }

   const std::vector<Matrix_t>       & GetCandidateGateTensor()           const { return candidate_gate_value; }

   std::vector<Matrix_t>             & GetCandidateGateTensor()                 { return candidate_gate_value; }

   const Matrix_t                    & GetCandidateGateTensorAt(size_t i) const { return candidate_gate_value[i]; }

   Matrix_t                          & GetCandidateGateTensorAt(size_t i)       { return candidate_gate_value[i]; }

   const std::vector<Matrix_t>       & GetOutputGateTensor()              const { return output_gate_value; }

   std::vector<Matrix_t>             & GetOutputGateTensor()                    { return output_gate_value; }

   const Matrix_t                    & GetOutputGateTensorAt(size_t i)    const { return output_gate_value[i]; }

   Matrix_t                          & GetOutputGateTensorAt(size_t i)          { return output_gate_value[i]; }

   const std::vector<Matrix_t>       & GetCellTensor()                    const { return cell_value; }

   std::vector<Matrix_t>             & GetCellTensor()                          { return cell_value; }

   const Matrix_t                    & GetCellTensorAt(size_t i)          const { return cell_value[i]; }

   Matrix_t                          & GetCellTensorAt(size_t i)                { return cell_value[i]; }


   const Matrix_t                   & GetInputGateBias()         const { return fInputGateBias; }

   Matrix_t                         & GetInputGateBias()               { return fInputGateBias; }

   const Matrix_t                   & GetForgetGateBias()        const { return fForgetGateBias; }

   Matrix_t                         & GetForgetGateBias()              { return fForgetGateBias; }

   const Matrix_t                   & GetCandidateBias()         const { return fCandidateBias; }

   Matrix_t                         & GetCandidateBias()               { return fCandidateBias; }

   const Matrix_t                   & GetOutputGateBias()        const { return fOutputGateBias; }

   Matrix_t                         & GetOutputGateBias()              { return fOutputGateBias; }

   const Matrix_t                   & GetWeightsInputGradients()        const { return fWeightsInputGradients; }

   Matrix_t                         & GetWeightsInputGradients()              { return fWeightsInputGradients; }

   const Matrix_t                   & GetWeightsInputStateGradients()   const { return fWeightsInputStateGradients; }

   Matrix_t                         & GetWeightsInputStateGradients()         { return fWeightsInputStateGradients; }

   const Matrix_t                   & GetInputBiasGradients()           const { return fInputBiasGradients; }

   Matrix_t                         & GetInputBiasGradients()                 { return fInputBiasGradients; }

   const Matrix_t                   & GetWeightsForgetGradients()      const { return fWeightsForgetGradients; }

   Matrix_t                         & GetWeightsForgetGradients()            { return fWeightsForgetGradients; }

   const Matrix_t                   & GetWeigthsForgetStateGradients() const { return fWeightsForgetStateGradients; }

   Matrix_t                         & GetWeightsForgetStateGradients()       { return fWeightsForgetStateGradients; }

   const Matrix_t                   & GetForgetBiasGradients()         const { return fForgetBiasGradients; }

   Matrix_t                         & GetForgetBiasGradients()               { return fForgetBiasGradients; }

   const Matrix_t                   & GetWeightsCandidateGradients()      const { return fWeightsCandidateGradients; }

   Matrix_t                         & GetWeightsCandidateGradients()            { return fWeightsCandidateGradients; }

   const Matrix_t                   & GetWeightsCandidateStateGradients() const { return fWeightsCandidateStateGradients; }

   Matrix_t                         & GetWeightsCandidateStateGradients()       { return fWeightsCandidateStateGradients; }

   const Matrix_t                   & GetCandidateBiasGradients()         const { return fCandidateBiasGradients; }

   Matrix_t                         & GetCandidateBiasGradients()               { return fCandidateBiasGradients; }

   const Matrix_t                   & GetWeightsOutputGradients()        const { return fWeightsOutputGradients; }

   Matrix_t                         & GetWeightsOutputGradients()              { return fWeightsOutputGradients; }

   const Matrix_t                   & GetWeightsOutputStateGradients()   const { return fWeightsOutputStateGradients; }

   Matrix_t                         & GetWeightsOutputStateGradients()         { return fWeightsOutputStateGradients; }

   const Matrix_t                   & GetOutputBiasGradients()           const { return fOutputBiasGradients; }

   Matrix_t                         & GetOutputBiasGradients()                 { return fOutputBiasGradients; }


   Tensor_t &GetWeightsTensor() { return fWeightsTensor; }

   const Tensor_t &GetWeightsTensor() const { return fWeightsTensor; }

   Tensor_t &GetWeightGradientsTensor() { return fWeightGradientsTensor; }

   const Tensor_t &GetWeightGradientsTensor() const { return fWeightGradientsTensor; }


   Tensor_t &GetX() { return fX; }

   Tensor_t &GetY() { return fY; }

   Tensor_t &GetDX() { return fDx; }

   Tensor_t &GetDY() { return fDy; }

};


//______________________________________________________________________________

//

// Basic LSTM-Layer Implementation

//______________________________________________________________________________


template <typename Architecture_t>


TBasicLSTMLayer<Architecture_t>::TBasicLSTMLayer(size_t batchSize, size_t stateSize, size_t inputSize, size_t timeSteps,

                                                 bool rememberState, bool returnSequence, DNN::EActivationFunction f1,

                                                 DNN::EActivationFunction f2, bool /* training */,

                                                 DNN::EInitialization fA)

   : VGeneralLayer<Architecture_t>(

        batchSize, 1, timeSteps, inputSize, 1, (returnSequence) ? timeSteps : 1, stateSize, 8,

        {stateSize, stateSize, stateSize, stateSize, stateSize, stateSize, stateSize, stateSize},

        {inputSize, inputSize, inputSize, inputSize, stateSize, stateSize, stateSize, stateSize}, 4,

        {stateSize, stateSize, stateSize, stateSize}, {1, 1, 1, 1}, batchSize, (returnSequence) ? timeSteps : 1,

        stateSize, fA),

     fStateSize(stateSize), fCellSize(stateSize), fTimeSteps(timeSteps), fRememberState(rememberState),

     fReturnSequence(returnSequence), fF1(f1), fF2(f2), fInputValue(batchSize, stateSize),

     fCandidateValue(batchSize, stateSize), fForgetValue(batchSize, stateSize), fOutputValue(batchSize, stateSize),

     fState(batchSize, stateSize), fCell(batchSize, stateSize), fWeightsInputGate(this->GetWeightsAt(0)),

     fWeightsInputGateState(this->GetWeightsAt(4)), fInputGateBias(this->GetBiasesAt(0)),

     fWeightsForgetGate(this->GetWeightsAt(1)), fWeightsForgetGateState(this->GetWeightsAt(5)),

     fForgetGateBias(this->GetBiasesAt(1)), fWeightsCandidate(this->GetWeightsAt(2)),

     fWeightsCandidateState(this->GetWeightsAt(6)), fCandidateBias(this->GetBiasesAt(2)),

     fWeightsOutputGate(this->GetWeightsAt(3)), fWeightsOutputGateState(this->GetWeightsAt(7)),

     fOutputGateBias(this->GetBiasesAt(3)), fWeightsInputGradients(this->GetWeightGradientsAt(0)),

     fWeightsInputStateGradients(this->GetWeightGradientsAt(4)), fInputBiasGradients(this->GetBiasGradientsAt(0)),

     fWeightsForgetGradients(this->GetWeightGradientsAt(1)),

     fWeightsForgetStateGradients(this->GetWeightGradientsAt(5)), fForgetBiasGradients(this->GetBiasGradientsAt(1)),

     fWeightsCandidateGradients(this->GetWeightGradientsAt(2)),

     fWeightsCandidateStateGradients(this->GetWeightGradientsAt(6)),

     fCandidateBiasGradients(this->GetBiasGradientsAt(2)), fWeightsOutputGradients(this->GetWeightGradientsAt(3)),

     fWeightsOutputStateGradients(this->GetWeightGradientsAt(7)), fOutputBiasGradients(this->GetBiasGradientsAt(3))

{

   for (size_t i = 0; i < timeSteps; ++i) {

      fDerivativesInput.emplace_back(batchSize, stateSize);

      fDerivativesForget.emplace_back(batchSize, stateSize);

      fDerivativesCandidate.emplace_back(batchSize, stateSize);

      fDerivativesOutput.emplace_back(batchSize, stateSize);

      input_gate_value.emplace_back(batchSize, stateSize);

      forget_gate_value.emplace_back(batchSize, stateSize);

      candidate_gate_value.emplace_back(batchSize, stateSize);

      output_gate_value.emplace_back(batchSize, stateSize);

      cell_value.emplace_back(batchSize, stateSize);

   }

   Architecture_t::InitializeLSTMTensors(this);

}


 //______________________________________________________________________________

template <typename Architecture_t>


TBasicLSTMLayer<Architecture_t>::TBasicLSTMLayer(const TBasicLSTMLayer &layer)

   : VGeneralLayer<Architecture_t>(layer),

      fStateSize(layer.fStateSize),

      fCellSize(layer.fCellSize),

      fTimeSteps(layer.fTimeSteps),

      fRememberState(layer.fRememberState),

      fReturnSequence(layer.fReturnSequence),

      fF1(layer.GetActivationFunctionF1()),

      fF2(layer.GetActivationFunctionF2()),

      fInputValue(layer.GetBatchSize(), layer.GetStateSize()),

      fCandidateValue(layer.GetBatchSize(), layer.GetStateSize()),

      fForgetValue(layer.GetBatchSize(), layer.GetStateSize()),

      fOutputValue(layer.GetBatchSize(), layer.GetStateSize()),

      fState(layer.GetBatchSize(), layer.GetStateSize()),

      fCell(layer.GetBatchSize(), layer.GetCellSize()),

      fWeightsInputGate(this->GetWeightsAt(0)),

      fWeightsInputGateState(this->GetWeightsAt(4)),

      fInputGateBias(this->GetBiasesAt(0)),

      fWeightsForgetGate(this->GetWeightsAt(1)),

      fWeightsForgetGateState(this->GetWeightsAt(5)),

      fForgetGateBias(this->GetBiasesAt(1)),

      fWeightsCandidate(this->GetWeightsAt(2)),

      fWeightsCandidateState(this->GetWeightsAt(6)),

      fCandidateBias(this->GetBiasesAt(2)),

      fWeightsOutputGate(this->GetWeightsAt(3)),

      fWeightsOutputGateState(this->GetWeightsAt(7)),

      fOutputGateBias(this->GetBiasesAt(3)),

      fWeightsInputGradients(this->GetWeightGradientsAt(0)),

      fWeightsInputStateGradients(this->GetWeightGradientsAt(4)),

      fInputBiasGradients(this->GetBiasGradientsAt(0)),

      fWeightsForgetGradients(this->GetWeightGradientsAt(1)),

      fWeightsForgetStateGradients(this->GetWeightGradientsAt(5)),

      fForgetBiasGradients(this->GetBiasGradientsAt(1)),

      fWeightsCandidateGradients(this->GetWeightGradientsAt(2)),

      fWeightsCandidateStateGradients(this->GetWeightGradientsAt(6)),

      fCandidateBiasGradients(this->GetBiasGradientsAt(2)),

      fWeightsOutputGradients(this->GetWeightGradientsAt(3)),

      fWeightsOutputStateGradients(this->GetWeightGradientsAt(7)),

      fOutputBiasGradients(this->GetBiasGradientsAt(3))

{

   for (size_t i = 0; i < fTimeSteps; ++i) {

      fDerivativesInput.emplace_back(layer.GetBatchSize(), layer.GetStateSize());

      Architecture_t::Copy(fDerivativesInput[i], layer.GetInputDerivativesAt(i));


      fDerivativesForget.emplace_back(layer.GetBatchSize(), layer.GetStateSize());

      Architecture_t::Copy(fDerivativesForget[i], layer.GetForgetDerivativesAt(i));


      fDerivativesCandidate.emplace_back(layer.GetBatchSize(), layer.GetStateSize());

      Architecture_t::Copy(fDerivativesCandidate[i], layer.GetCandidateDerivativesAt(i));


      fDerivativesOutput.emplace_back(layer.GetBatchSize(), layer.GetStateSize());

      Architecture_t::Copy(fDerivativesOutput[i], layer.GetOutputDerivativesAt(i));


      input_gate_value.emplace_back(layer.GetBatchSize(), layer.GetStateSize());

      Architecture_t::Copy(input_gate_value[i], layer.GetInputGateTensorAt(i));


      forget_gate_value.emplace_back(layer.GetBatchSize(), layer.GetStateSize());

      Architecture_t::Copy(forget_gate_value[i], layer.GetForgetGateTensorAt(i));


      candidate_gate_value.emplace_back(layer.GetBatchSize(), layer.GetStateSize());

      Architecture_t::Copy(candidate_gate_value[i], layer.GetCandidateGateTensorAt(i));


      output_gate_value.emplace_back(layer.GetBatchSize(), layer.GetStateSize());

      Architecture_t::Copy(output_gate_value[i], layer.GetOutputGateTensorAt(i));


      cell_value.emplace_back(layer.GetBatchSize(), layer.GetStateSize());

      Architecture_t::Copy(cell_value[i], layer.GetCellTensorAt(i));

   }


   // Gradient matrices not copied

   Architecture_t::Copy(fState, layer.GetState());

   Architecture_t::Copy(fCell, layer.GetCell());


   // Copy each gate values.

   Architecture_t::Copy(fInputValue, layer.GetInputGateValue());

   Architecture_t::Copy(fCandidateValue, layer.GetCandidateValue());

   Architecture_t::Copy(fForgetValue, layer.GetForgetGateValue());

   Architecture_t::Copy(fOutputValue, layer.GetOutputGateValue());


   Architecture_t::InitializeLSTMTensors(this);

}


//______________________________________________________________________________

template <typename Architecture_t>


void TBasicLSTMLayer<Architecture_t>::Initialize()

{

   VGeneralLayer<Architecture_t>::Initialize();


   Architecture_t::InitializeLSTMDescriptors(fDescriptors, this);

   Architecture_t::InitializeLSTMWorkspace(fWorkspace, fDescriptors, this);

}


//______________________________________________________________________________

template <typename Architecture_t>


auto inline TBasicLSTMLayer<Architecture_t>::InputGate(const Matrix_t &input, Matrix_t &di)

-> void

{

   /*! Computes input gate values according to equation:

    *  input = act(W_input . input + W_state . state + bias)

    *  activation function: sigmoid. */

   const DNN::EActivationFunction fInp = this->GetActivationFunctionF1();

   Matrix_t tmpState(fInputValue.GetNrows(), fInputValue.GetNcols());

   Architecture_t::MultiplyTranspose(tmpState, fState, fWeightsInputGateState);

   Architecture_t::MultiplyTranspose(fInputValue, input, fWeightsInputGate);

   Architecture_t::ScaleAdd(fInputValue, tmpState);

   Architecture_t::AddRowWise(fInputValue, fInputGateBias);

   DNN::evaluateDerivativeMatrix<Architecture_t>(di, fInp, fInputValue);

   DNN::evaluateMatrix<Architecture_t>(fInputValue, fInp);

}


 //______________________________________________________________________________

template <typename Architecture_t>


auto inline TBasicLSTMLayer<Architecture_t>::ForgetGate(const Matrix_t &input, Matrix_t &df)

-> void

{

   /*! Computes forget gate values according to equation:

    *  forget = act(W_input . input + W_state . state + bias)

    *  activation function: sigmoid. */

   const DNN::EActivationFunction fFor = this->GetActivationFunctionF1();

   Matrix_t tmpState(fForgetValue.GetNrows(), fForgetValue.GetNcols());

   Architecture_t::MultiplyTranspose(tmpState, fState, fWeightsForgetGateState);

   Architecture_t::MultiplyTranspose(fForgetValue, input, fWeightsForgetGate);

   Architecture_t::ScaleAdd(fForgetValue, tmpState);

   Architecture_t::AddRowWise(fForgetValue, fForgetGateBias);

   DNN::evaluateDerivativeMatrix<Architecture_t>(df, fFor, fForgetValue);

   DNN::evaluateMatrix<Architecture_t>(fForgetValue, fFor);

}


 //______________________________________________________________________________

template <typename Architecture_t>


auto inline TBasicLSTMLayer<Architecture_t>::CandidateValue(const Matrix_t &input, Matrix_t &dc)

-> void

{

   /*! Candidate value will be used to scale input gate values followed by Hadamard product.

    *  candidate_value = act(W_input . input + W_state . state + bias)

    *  activation function = tanh. */

   const DNN::EActivationFunction fCan = this->GetActivationFunctionF2();

   Matrix_t tmpState(fCandidateValue.GetNrows(), fCandidateValue.GetNcols());

   Architecture_t::MultiplyTranspose(tmpState, fState, fWeightsCandidateState);

   Architecture_t::MultiplyTranspose(fCandidateValue, input, fWeightsCandidate);

   Architecture_t::ScaleAdd(fCandidateValue, tmpState);

   Architecture_t::AddRowWise(fCandidateValue, fCandidateBias);

   DNN::evaluateDerivativeMatrix<Architecture_t>(dc, fCan, fCandidateValue);

   DNN::evaluateMatrix<Architecture_t>(fCandidateValue, fCan);

}


 //______________________________________________________________________________

template <typename Architecture_t>


auto inline TBasicLSTMLayer<Architecture_t>::OutputGate(const Matrix_t &input, Matrix_t &dout)

-> void

{

   /*! Output gate values will be used to calculate next hidden state and output values.

    *  output = act(W_input . input + W_state . state + bias)

    *  activation function = sigmoid. */

   const DNN::EActivationFunction fOut = this->GetActivationFunctionF1();

   Matrix_t tmpState(fOutputValue.GetNrows(), fOutputValue.GetNcols());

   Architecture_t::MultiplyTranspose(tmpState, fState, fWeightsOutputGateState);

   Architecture_t::MultiplyTranspose(fOutputValue, input, fWeightsOutputGate);

   Architecture_t::ScaleAdd(fOutputValue, tmpState);

   Architecture_t::AddRowWise(fOutputValue, fOutputGateBias);

   DNN::evaluateDerivativeMatrix<Architecture_t>(dout, fOut, fOutputValue);

   DNN::evaluateMatrix<Architecture_t>(fOutputValue, fOut);

}


 //______________________________________________________________________________

template <typename Architecture_t>


auto inline TBasicLSTMLayer<Architecture_t>::Forward(Tensor_t &input, bool  isTraining )

-> void

{


   // for Cudnn

   if (Architecture_t::IsCudnn()) {


      // input size is stride[1] of input tensor that is B x T x inputSize

      assert(input.GetStrides()[1] == this->GetInputSize());


      Tensor_t &x = this->fX;

      Tensor_t &y = this->fY;

      Architecture_t::Rearrange(x, input);


      //const auto &weights = this->GetWeightsAt(0);

      const auto &weights = this->GetWeightsTensor();

      // Tensor_t cx({1}); // not used for normal RNN

      // Tensor_t cy({1}); // not used for normal RNN


      // hx is fState - tensor are of right shape

      auto &hx = this->fState;

      //auto &cx = this->fCell;

      auto &cx = this->fCell; // pass an empty cell state

      // use same for hy and cy

      auto &hy = this->fState;

      auto &cy = this->fCell;


      auto & rnnDesc = static_cast<RNNDescriptors_t &>(*fDescriptors);

      auto & rnnWork = static_cast<RNNWorkspace_t &>(*fWorkspace);


      Architecture_t::RNNForward(x, hx, cx, weights, y, hy, cy, rnnDesc, rnnWork, isTraining);


      if (fReturnSequence) {

         Architecture_t::Rearrange(this->GetOutput(), y); // swap B and T from y to Output

      } else {

         // tmp is a reference to y (full cudnn output)

         Tensor_t tmp = (y.At(y.GetShape()[0] - 1)).Reshape({y.GetShape()[1], 1, y.GetShape()[2]});

         Architecture_t::Copy(this->GetOutput(), tmp);

      }


      return;

   }


   // Standard CPU implementation


   // D : input size

   // H : state size

   // T : time size

   // B : batch size


   Tensor_t arrInput( fTimeSteps, this->GetBatchSize(), this->GetInputWidth());

   //Tensor_t &arrInput = this->GetX();


   Architecture_t::Rearrange(arrInput, input); // B x T x D


   Tensor_t arrOutput ( fTimeSteps, this->GetBatchSize(), fStateSize);


   if (!this->fRememberState) {

      InitState(DNN::EInitialization::kZero);

   }


   /*! Pass each gate values to CellForward() to calculate

    *  next hidden state and next cell state. */

   for (size_t t = 0; t < fTimeSteps; ++t) {

      /* Feed forward network: value of each gate being computed at each timestep t. */

      Matrix_t arrInputMt = arrInput[t];

      InputGate(arrInputMt, fDerivativesInput[t]);

      ForgetGate(arrInputMt, fDerivativesForget[t]);

      CandidateValue(arrInputMt, fDerivativesCandidate[t]);

      OutputGate(arrInputMt, fDerivativesOutput[t]);


      Architecture_t::Copy(this->GetInputGateTensorAt(t), fInputValue);

      Architecture_t::Copy(this->GetForgetGateTensorAt(t), fForgetValue);

      Architecture_t::Copy(this->GetCandidateGateTensorAt(t), fCandidateValue);

      Architecture_t::Copy(this->GetOutputGateTensorAt(t), fOutputValue);


      CellForward(fInputValue, fForgetValue, fCandidateValue, fOutputValue);

      Matrix_t arrOutputMt = arrOutput[t];

      Architecture_t::Copy(arrOutputMt, fState);

      Architecture_t::Copy(this->GetCellTensorAt(t), fCell);

   }


   // check if full output needs to be returned

   if (fReturnSequence)

      Architecture_t::Rearrange(this->GetOutput(), arrOutput); // B x T x D

   else {

      // get T[end[]]

      Tensor_t tmp = arrOutput.At(fTimeSteps - 1); // take last time step

      // shape of tmp is  for CPU (columnwise) B x D ,   need to reshape to  make a B x D x 1

      //  and transpose it to 1 x D x B  (this is how output is expected in columnmajor format)

      tmp = tmp.Reshape( {tmp.GetShape()[0], tmp.GetShape()[1], 1});

      assert(tmp.GetSize() == this->GetOutput().GetSize());

      assert( tmp.GetShape()[0] == this->GetOutput().GetShape()[2]);  // B is last dim in output and first in tmp

      Architecture_t::Rearrange(this->GetOutput(), tmp);

      // keep array output

      fY = arrOutput;

   }

}


 //______________________________________________________________________________

template <typename Architecture_t>


auto inline TBasicLSTMLayer<Architecture_t>::CellForward(Matrix_t &inputGateValues, const Matrix_t &forgetGateValues,

                                                         const Matrix_t &candidateValues, const Matrix_t &outputGateValues)

-> void

{


   // Update cell state.

   Architecture_t::Hadamard(fCell, forgetGateValues);

   Architecture_t::Hadamard(inputGateValues, candidateValues);

   Architecture_t::ScaleAdd(fCell, inputGateValues);


   Matrix_t cache(fCell.GetNrows(), fCell.GetNcols());

   Architecture_t::Copy(cache, fCell);


   // Update hidden state.

   const DNN::EActivationFunction fAT = this->GetActivationFunctionF2();

   DNN::evaluateMatrix<Architecture_t>(cache, fAT);


   /*! The Hadamard product of output_gate_value . tanh(cell_state)

    *  will be copied to next hidden state (passed to next LSTM cell)

    *  and we will update our outputGateValues also. */

   Architecture_t::Copy(fState, cache);

   Architecture_t::Hadamard(fState, outputGateValues);

}


 //____________________________________________________________________________

template <typename Architecture_t>


auto inline TBasicLSTMLayer<Architecture_t>::Backward(Tensor_t &gradients_backward,           // B x T x D

                                                      const Tensor_t &activations_backward)   // B x T x D

-> void

{


   // BACKWARD for CUDNN

   if (Architecture_t::IsCudnn()) {


      Tensor_t &x = this->fX;

      Tensor_t &y = this->fY;

      Tensor_t &dx = this->fDx;

      Tensor_t &dy = this->fDy;


      // input size is stride[1] of input tensor that is B x T x inputSize

      assert(activations_backward.GetStrides()[1] == this->GetInputSize());


      Architecture_t::Rearrange(x, activations_backward);


      if (!fReturnSequence) {


         // Architecture_t::InitializeZero(dy);

         Architecture_t::InitializeZero(dy);


         // Tensor_t tmp1 = y.At(y.GetShape()[0] - 1).Reshape({y.GetShape()[1], 1, y.GetShape()[2]});

         // dy is a tensor of shape (rowmajor for Cudnn): T x B x S

         // and this->ActivationGradients is  B x (T=1) x S

         Tensor_t tmp2 = dy.At(dy.GetShape()[0] - 1).Reshape({dy.GetShape()[1], 1, dy.GetShape()[2]});


         // Architecture_t::Copy(tmp1, this->GetOutput());

         Architecture_t::Copy(tmp2, this->GetActivationGradients());

      } else {

         Architecture_t::Rearrange(y, this->GetOutput());

         Architecture_t::Rearrange(dy, this->GetActivationGradients());

      }


      // Architecture_t::PrintTensor(this->GetOutput(), "output before bwd");


      // for cudnn Matrix_t and Tensor_t are same type

      const auto &weights = this->GetWeightsTensor();

      auto &weightGradients = this->GetWeightGradientsTensor();

      // note that cudnnRNNBackwardWeights accumulate the weight gradients.

      // We need then to initialize the tensor to zero every time

      Architecture_t::InitializeZero(weightGradients);


      // hx is fState

      auto &hx = this->GetState();

      auto &cx = this->GetCell();

      //auto &cx = this->GetCell();

      // use same for hy and cy

      auto &dhy = hx;

      auto &dcy = cx;

      auto &dhx = hx;

      auto &dcx = cx;


      auto & rnnDesc = static_cast<RNNDescriptors_t &>(*fDescriptors);

      auto & rnnWork = static_cast<RNNWorkspace_t &>(*fWorkspace);


      Architecture_t::RNNBackward(x, hx, cx, y, dy, dhy, dcy, weights, dx, dhx, dcx, weightGradients, rnnDesc, rnnWork);


      // Architecture_t::PrintTensor(this->GetOutput(), "output after bwd");


      if (gradients_backward.GetSize() != 0)

         Architecture_t::Rearrange(gradients_backward, dx);


      return;

   }

   // CPU implementation


   // gradients_backward is activationGradients of layer before it, which is input layer.

   // Currently, gradients_backward is for input(x) and not for state.

   // For the state it can be:

   Matrix_t state_gradients_backward(this->GetBatchSize(), fStateSize); // B x H

   DNN::initialize<Architecture_t>(state_gradients_backward, DNN::EInitialization::kZero); // B x H


   Matrix_t cell_gradients_backward(this->GetBatchSize(), fStateSize); // B x H

   DNN::initialize<Architecture_t>(cell_gradients_backward, DNN::EInitialization::kZero); // B x H


   // if dummy is false gradients_backward will be written back on the matrix

   bool dummy = false;

   if (gradients_backward.GetSize() == 0 || gradients_backward[0].GetNrows() == 0 || gradients_backward[0].GetNcols() == 0) {

      dummy = true;

   }


   Tensor_t arr_gradients_backward ( fTimeSteps, this->GetBatchSize(), this->GetInputSize());


   //Architecture_t::Rearrange(arr_gradients_backward, gradients_backward); // B x T x D

   // activations_backward is input.

   Tensor_t arr_activations_backward ( fTimeSteps, this->GetBatchSize(), this->GetInputSize());


   Architecture_t::Rearrange(arr_activations_backward, activations_backward); // B x T x D


   /*! For backpropagation, we need to calculate loss. For loss, output must be known.

    *  We obtain outputs during forward propagation and place the results in arr_output tensor. */

   Tensor_t arr_output (  fTimeSteps, this->GetBatchSize(), fStateSize);


   Matrix_t initState(this->GetBatchSize(), fCellSize); // B x H

   DNN::initialize<Architecture_t>(initState, DNN::EInitialization::kZero); // B x H


   // This will take partial derivative of state[t] w.r.t state[t-1]


   Tensor_t arr_actgradients(fTimeSteps, this->GetBatchSize(), fStateSize);


   if (fReturnSequence) {

      Architecture_t::Rearrange(arr_output, this->GetOutput());

      Architecture_t::Rearrange(arr_actgradients, this->GetActivationGradients());

   } else {

      // here for CPU need to transpose the input activation gradients into the right format

      arr_output = fY;

      Architecture_t::InitializeZero(arr_actgradients);

      // need to reshape to pad a time dimension = 1 (note here is columnmajor tensors)

      Tensor_t tmp_grad = arr_actgradients.At(fTimeSteps - 1).Reshape( {this->GetBatchSize(), fStateSize, 1});

      assert(tmp_grad.GetSize() == this->GetActivationGradients().GetSize());

      assert(tmp_grad.GetShape()[0] == this->GetActivationGradients().GetShape()[2]);  // B in tmp is [0] and [2] in input act. gradients


      Architecture_t::Rearrange(tmp_grad, this->GetActivationGradients());

   }


   /*! There are total 8 different weight matrices and 4 bias vectors.

    *  Re-initialize them with zero because it should have some value. (can't be garbage values) */


   // Input Gate.

   fWeightsInputGradients.Zero();

   fWeightsInputStateGradients.Zero();

   fInputBiasGradients.Zero();


   // Forget Gate.

   fWeightsForgetGradients.Zero();

   fWeightsForgetStateGradients.Zero();

   fForgetBiasGradients.Zero();


   // Candidate Gate.

   fWeightsCandidateGradients.Zero();

   fWeightsCandidateStateGradients.Zero();

   fCandidateBiasGradients.Zero();


   // Output Gate.

   fWeightsOutputGradients.Zero();

   fWeightsOutputStateGradients.Zero();

   fOutputBiasGradients.Zero();


   for (size_t t = fTimeSteps; t > 0; t--) {

      // Store the sum of gradients obtained at each timestep during backward pass.

      Architecture_t::ScaleAdd(state_gradients_backward, arr_actgradients[t-1]);

      if (t > 1) {

         const Matrix_t &prevStateActivations = arr_output[t-2];

         const Matrix_t &prevCellActivations = this->GetCellTensorAt(t-2);

         // During forward propagation, each gate value calculates their gradients.

         Matrix_t dx = arr_gradients_backward[t-1];

         CellBackward(state_gradients_backward, cell_gradients_backward,

                      prevStateActivations, prevCellActivations,

                      this->GetInputGateTensorAt(t-1), this->GetForgetGateTensorAt(t-1),

                      this->GetCandidateGateTensorAt(t-1), this->GetOutputGateTensorAt(t-1),

                      arr_activations_backward[t-1], dx,

                      fDerivativesInput[t-1], fDerivativesForget[t-1],

                      fDerivativesCandidate[t-1], fDerivativesOutput[t-1], t-1);

      } else {

         const Matrix_t &prevStateActivations = initState;

         const Matrix_t &prevCellActivations = initState;

         Matrix_t dx = arr_gradients_backward[t-1];

         CellBackward(state_gradients_backward, cell_gradients_backward,

                      prevStateActivations, prevCellActivations,

                      this->GetInputGateTensorAt(t-1), this->GetForgetGateTensorAt(t-1),

                      this->GetCandidateGateTensorAt(t-1), this->GetOutputGateTensorAt(t-1),

                      arr_activations_backward[t-1], dx,

                      fDerivativesInput[t-1], fDerivativesForget[t-1],

                      fDerivativesCandidate[t-1], fDerivativesOutput[t-1], t-1);

        }

   }


   if (!dummy) {

      Architecture_t::Rearrange(gradients_backward, arr_gradients_backward );

   }


}


 //______________________________________________________________________________

template <typename Architecture_t>


auto inline TBasicLSTMLayer<Architecture_t>::CellBackward(Matrix_t & state_gradients_backward,

                                                          Matrix_t & cell_gradients_backward,

                                                          const Matrix_t & precStateActivations, const Matrix_t & precCellActivations,

                                                          const Matrix_t & input_gate, const Matrix_t & forget_gate,

                                                          const Matrix_t & candidate_gate, const Matrix_t & output_gate,

                                                          const Matrix_t & input, Matrix_t & input_gradient,

                                                          Matrix_t &di, Matrix_t &df, Matrix_t &dc, Matrix_t &dout,

                                                          size_t t)

-> Matrix_t &

{

   /*! Call here LSTMLayerBackward() to pass parameters i.e. gradient

    *  values obtained from each gate during forward propagation. */


   // cell gradient for current time step

   const DNN::EActivationFunction fAT = this->GetActivationFunctionF2();

   Matrix_t cell_gradient(this->GetCellTensorAt(t).GetNrows(), this->GetCellTensorAt(t).GetNcols());

   DNN::evaluateDerivativeMatrix<Architecture_t>(cell_gradient, fAT, this->GetCellTensorAt(t));


   // cell tanh value for current time step

   Matrix_t cell_tanh(this->GetCellTensorAt(t).GetNrows(), this->GetCellTensorAt(t).GetNcols());

   Architecture_t::Copy(cell_tanh, this->GetCellTensorAt(t));

   DNN::evaluateMatrix<Architecture_t>(cell_tanh, fAT);


   return Architecture_t::LSTMLayerBackward(state_gradients_backward, cell_gradients_backward,

                                            fWeightsInputGradients, fWeightsForgetGradients, fWeightsCandidateGradients,

                                            fWeightsOutputGradients, fWeightsInputStateGradients, fWeightsForgetStateGradients,

                                            fWeightsCandidateStateGradients, fWeightsOutputStateGradients, fInputBiasGradients, fForgetBiasGradients,

                                            fCandidateBiasGradients, fOutputBiasGradients, di, df, dc, dout,

                                            precStateActivations, precCellActivations,

                                            input_gate, forget_gate, candidate_gate, output_gate,

                                            fWeightsInputGate, fWeightsForgetGate, fWeightsCandidate, fWeightsOutputGate,

                                            fWeightsInputGateState, fWeightsForgetGateState, fWeightsCandidateState,

                                            fWeightsOutputGateState, input, input_gradient,

                                            cell_gradient, cell_tanh);

}


 //______________________________________________________________________________

template <typename Architecture_t>


auto TBasicLSTMLayer<Architecture_t>::InitState(DNN::EInitialization /* m */)

-> void

{

   DNN::initialize<Architecture_t>(this->GetState(),  DNN::EInitialization::kZero);

   DNN::initialize<Architecture_t>(this->GetCell(),  DNN::EInitialization::kZero);

}


 //______________________________________________________________________________

template<typename Architecture_t>


auto TBasicLSTMLayer<Architecture_t>::Print() const

-> void

{

   std::cout << " LSTM Layer: \t ";

   std::cout << " (NInput = " << this->GetInputSize();  // input size

   std::cout << ", NState = " << this->GetStateSize();  // hidden state size

   std::cout << ", NTime  = " << this->GetTimeSteps() << " )";  // time size

   std::cout << "\tOutput = ( " << this->GetOutput().GetFirstSize() << " , " << this->GetOutput()[0].GetNrows() << " , " << this->GetOutput()[0].GetNcols() << " )\n";

}


 //______________________________________________________________________________

template <typename Architecture_t>


auto inline TBasicLSTMLayer<Architecture_t>::AddWeightsXMLTo(void *parent)

-> void

{

   auto layerxml = gTools().xmlengine().NewChild(parent, nullptr, "LSTMLayer");


   // Write all other info like outputSize, cellSize, inputSize, timeSteps, rememberState

   gTools().xmlengine().NewAttr(layerxml, nullptr, "StateSize", gTools().StringFromInt(this->GetStateSize()));

   gTools().xmlengine().NewAttr(layerxml, nullptr, "CellSize", gTools().StringFromInt(this->GetCellSize()));

   gTools().xmlengine().NewAttr(layerxml, nullptr, "InputSize", gTools().StringFromInt(this->GetInputSize()));

   gTools().xmlengine().NewAttr(layerxml, nullptr, "TimeSteps", gTools().StringFromInt(this->GetTimeSteps()));

   gTools().xmlengine().NewAttr(layerxml, nullptr, "RememberState", gTools().StringFromInt(this->DoesRememberState()));

   gTools().xmlengine().NewAttr(layerxml, nullptr, "ReturnSequence", gTools().StringFromInt(this->DoesReturnSequence()));


   // write weights and bias matrices

   this->WriteMatrixToXML(layerxml, "InputWeights", this->GetWeightsAt(0));

   this->WriteMatrixToXML(layerxml, "InputStateWeights", this->GetWeightsAt(1));

   this->WriteMatrixToXML(layerxml, "InputBiases", this->GetBiasesAt(0));

   this->WriteMatrixToXML(layerxml, "ForgetWeights", this->GetWeightsAt(2));

   this->WriteMatrixToXML(layerxml, "ForgetStateWeights", this->GetWeightsAt(3));

   this->WriteMatrixToXML(layerxml, "ForgetBiases", this->GetBiasesAt(1));

   this->WriteMatrixToXML(layerxml, "CandidateWeights", this->GetWeightsAt(4));

   this->WriteMatrixToXML(layerxml, "CandidateStateWeights", this->GetWeightsAt(5));

   this->WriteMatrixToXML(layerxml, "CandidateBiases", this->GetBiasesAt(2));

   this->WriteMatrixToXML(layerxml, "OuputWeights", this->GetWeightsAt(6));

   this->WriteMatrixToXML(layerxml, "OutputStateWeights", this->GetWeightsAt(7));

   this->WriteMatrixToXML(layerxml, "OutputBiases", this->GetBiasesAt(3));

}


 //______________________________________________________________________________

template <typename Architecture_t>


auto inline TBasicLSTMLayer<Architecture_t>::ReadWeightsFromXML(void *parent)

-> void

{

    // Read weights and biases

   this->ReadMatrixXML(parent, "InputWeights", this->GetWeightsAt(0));

   this->ReadMatrixXML(parent, "InputStateWeights", this->GetWeightsAt(1));

   this->ReadMatrixXML(parent, "InputBiases", this->GetBiasesAt(0));

   this->ReadMatrixXML(parent, "ForgetWeights", this->GetWeightsAt(2));

   this->ReadMatrixXML(parent, "ForgetStateWeights", this->GetWeightsAt(3));

   this->ReadMatrixXML(parent, "ForgetBiases", this->GetBiasesAt(1));

   this->ReadMatrixXML(parent, "CandidateWeights", this->GetWeightsAt(4));

   this->ReadMatrixXML(parent, "CandidateStateWeights", this->GetWeightsAt(5));

   this->ReadMatrixXML(parent, "CandidateBiases", this->GetBiasesAt(2));

   this->ReadMatrixXML(parent, "OuputWeights", this->GetWeightsAt(6));

   this->ReadMatrixXML(parent, "OutputStateWeights", this->GetWeightsAt(7));

   this->ReadMatrixXML(parent, "OutputBiases", this->GetBiasesAt(3));

}


} // namespace LSTM

} // namespace DNN

} // namespace TMVA


#endif // LSTM_LAYER_H

TMatrix.h

TMVA::DNN::RNN::TBasicLSTMLayer::InputGate
void InputGate(const Matrix_t &input, Matrix_t &di)
Decides the values we'll update (NN with Sigmoid).
Definition LSTMLayer.h:479

TMVA::DNN::RNN::TBasicLSTMLayer::GetForgetGateTensorAt
const Matrix_t & GetForgetGateTensorAt(size_t i) const
Definition LSTMLayer.h:276

TMVA::DNN::RNN::TBasicLSTMLayer::GetWeightsOutputGateState
Matrix_t & GetWeightsOutputGateState()
Definition LSTMLayer.h:251

TMVA::DNN::RNN::TBasicLSTMLayer::GetOutputGateTensor
const std::vector< Matrix_t > & GetOutputGateTensor() const
Definition LSTMLayer.h:282

TMVA::DNN::RNN::TBasicLSTMLayer::fWeightsTensor
Tensor_t fWeightsTensor
Tensor for all weights.
Definition LSTMLayer.h:132

TMVA::DNN::RNN::TBasicLSTMLayer::GetInputGateTensor
const std::vector< Matrix_t > & GetInputGateTensor() const
Definition LSTMLayer.h:270

TMVA::DNN::RNN::TBasicLSTMLayer::GetDerivativesOutput
std::vector< Matrix_t > & GetDerivativesOutput()
Definition LSTMLayer.h:266

TMVA::DNN::RNN::TBasicLSTMLayer::GetWeigthsForgetStateGradients
const Matrix_t & GetWeigthsForgetStateGradients() const
Definition LSTMLayer.h:307

TMVA::DNN::RNN::TBasicLSTMLayer::GetWeightsForgetGate
Matrix_t & GetWeightsForgetGate()
Definition LSTMLayer.h:241

TMVA::DNN::RNN::TBasicLSTMLayer::Matrix_t
typename Architecture_t::Matrix_t Matrix_t
Definition LSTMLayer.h:61

TMVA::DNN::RNN::TBasicLSTMLayer::GetCandidateGateTensorAt
Matrix_t & GetCandidateGateTensorAt(size_t i)
Definition LSTMLayer.h:281

TMVA::DNN::RNN::TBasicLSTMLayer::InitState
void InitState(DNN::EInitialization m=DNN::EInitialization::kZero)
Initialize the hidden state and cell state method.
Definition LSTMLayer.h:902

TMVA::DNN::RNN::TBasicLSTMLayer::fWeightsCandidateGradients
Matrix_t & fWeightsCandidateGradients
Gradients w.r.t the candidate gate - input weights.
Definition LSTMLayer.h:124

TMVA::DNN::RNN::TBasicLSTMLayer::GetOutputGateBias
const Matrix_t & GetOutputGateBias() const
Definition LSTMLayer.h:297

TMVA::DNN::RNN::TBasicLSTMLayer::GetWeightsCandidateStateGradients
Matrix_t & GetWeightsCandidateStateGradients()
Definition LSTMLayer.h:314

TMVA::DNN::RNN::TBasicLSTMLayer::GetWeightsInputGate
Matrix_t & GetWeightsInputGate()
Definition LSTMLayer.h:237

TMVA::DNN::RNN::TBasicLSTMLayer::GetWeightsInputGateState
Matrix_t & GetWeightsInputGateState()
Definition LSTMLayer.h:245

TMVA::DNN::RNN::TBasicLSTMLayer::GetCandidateGateTensor
const std::vector< Matrix_t > & GetCandidateGateTensor() const
Definition LSTMLayer.h:278

TMVA::DNN::RNN::TBasicLSTMLayer::GetInputGateTensorAt
const Matrix_t & GetInputGateTensorAt(size_t i) const
Definition LSTMLayer.h:272

TMVA::DNN::RNN::TBasicLSTMLayer::GetForgetGateTensor
std::vector< Matrix_t > & GetForgetGateTensor()
Definition LSTMLayer.h:275

TMVA::DNN::RNN::TBasicLSTMLayer::cell_value
std::vector< Matrix_t > cell_value
cell value for every time step
Definition LSTMLayer.h:112

TMVA::DNN::RNN::TBasicLSTMLayer::Backward
void Backward(Tensor_t &gradients_backward, const Tensor_t &activations_backward) override
Backpropagates the error.
Definition LSTMLayer.h:681

TMVA::DNN::RNN::TBasicLSTMLayer::fWeightsOutputGradients
Matrix_t & fWeightsOutputGradients
Gradients w.r.t the output gate - input weights.
Definition LSTMLayer.h:127

TMVA::DNN::RNN::TBasicLSTMLayer::GetOutputGateBias
Matrix_t & GetOutputGateBias()
Definition LSTMLayer.h:298

TMVA::DNN::RNN::TBasicLSTMLayer::fOutputBiasGradients
Matrix_t & fOutputBiasGradients
Gradients w.r.t the output gate - bias weights.
Definition LSTMLayer.h:129

TMVA::DNN::RNN::TBasicLSTMLayer::Initialize
void Initialize() override
Initialize the weights according to the given initialization method.
Definition LSTMLayer.h:469

TMVA::DNN::RNN::TBasicLSTMLayer::fF1
DNN::EActivationFunction fF1
Activation function: sigmoid.
Definition LSTMLayer.h:82

TMVA::DNN::RNN::TBasicLSTMLayer::fDy
Tensor_t fDy
cached activation gradient (input of backward) as T x B x S
Definition LSTMLayer.h:139

TMVA::DNN::RNN::TBasicLSTMLayer::GetDX
Tensor_t & GetDX()
Definition LSTMLayer.h:331

TMVA::DNN::RNN::TBasicLSTMLayer::fWeightsOutputGate
Matrix_t & fWeightsOutputGate
Output Gate weights for input, fWeights[6].
Definition LSTMLayer.h:104

TMVA::DNN::RNN::TBasicLSTMLayer::GetForgetGateBias
Matrix_t & GetForgetGateBias()
Definition LSTMLayer.h:294

TMVA::DNN::RNN::TBasicLSTMLayer::fWeightsCandidateStateGradients
Matrix_t & fWeightsCandidateStateGradients
Gradients w.r.t the candidate gate - hidden state weights.
Definition LSTMLayer.h:125

TMVA::DNN::RNN::TBasicLSTMLayer::GetInputGateBias
const Matrix_t & GetInputGateBias() const
Definition LSTMLayer.h:291

TMVA::DNN::RNN::TBasicLSTMLayer::Scalar_t
typename Architecture_t::Scalar_t Scalar_t
Definition LSTMLayer.h:62

TMVA::DNN::RNN::TBasicLSTMLayer::GetInputSize
size_t GetInputSize() const
Getters.
Definition LSTMLayer.h:211

TMVA::DNN::RNN::TBasicLSTMLayer::GetForgetGateTensorAt
Matrix_t & GetForgetGateTensorAt(size_t i)
Definition LSTMLayer.h:277

TMVA::DNN::RNN::TBasicLSTMLayer::GetOutputGateTensorAt
const Matrix_t & GetOutputGateTensorAt(size_t i) const
Definition LSTMLayer.h:284

TMVA::DNN::RNN::TBasicLSTMLayer::GetCellTensorAt
const Matrix_t & GetCellTensorAt(size_t i) const
Definition LSTMLayer.h:288

TMVA::DNN::RNN::TBasicLSTMLayer::fX
Tensor_t fX
cached input tensor as T x B x I
Definition LSTMLayer.h:136

TMVA::DNN::RNN::TBasicLSTMLayer::GetActivationFunctionF2
DNN::EActivationFunction GetActivationFunctionF2() const
Definition LSTMLayer.h:220

TMVA::DNN::RNN::TBasicLSTMLayer::GetCellTensorAt
Matrix_t & GetCellTensorAt(size_t i)
Definition LSTMLayer.h:289

TMVA::DNN::RNN::TBasicLSTMLayer::fWeightsInputStateGradients
Matrix_t & fWeightsInputStateGradients
Gradients w.r.t the input gate - hidden state weights.
Definition LSTMLayer.h:119

TMVA::DNN::RNN::TBasicLSTMLayer::CellForward
void CellForward(Matrix_t &inputGateValues, const Matrix_t &forgetGateValues, const Matrix_t &candidateValues, const Matrix_t &outputGateValues)
Forward for a single cell (time unit).
Definition LSTMLayer.h:655

TMVA::DNN::RNN::TBasicLSTMLayer::CellBackward
Matrix_t & CellBackward(Matrix_t &state_gradients_backward, Matrix_t &cell_gradients_backward, const Matrix_t &precStateActivations, const Matrix_t &precCellActivations, const Matrix_t &input_gate, const Matrix_t &forget_gate, const Matrix_t &candidate_gate, const Matrix_t &output_gate, const Matrix_t &input, Matrix_t &input_gradient, Matrix_t &di, Matrix_t &df, Matrix_t &dc, Matrix_t &dout, size_t t)
Backward for a single time unit a the corresponding call to Forward(...).
Definition LSTMLayer.h:863

TMVA::DNN::RNN::TBasicLSTMLayer::GetWeightsInputStateGradients
const Matrix_t & GetWeightsInputStateGradients() const
Definition LSTMLayer.h:301

TMVA::DNN::RNN::TBasicLSTMLayer::fDerivativesOutput
std::vector< Matrix_t > fDerivativesOutput
First fDerivatives of the activations output gate.
Definition LSTMLayer.h:116

TMVA::DNN::RNN::TBasicLSTMLayer::GetStateSize
size_t GetStateSize() const
Definition LSTMLayer.h:213

TMVA::DNN::RNN::TBasicLSTMLayer::ReadWeightsFromXML
void ReadWeightsFromXML(void *parent) override
Read the information and the weights about the layer from XML node.
Definition LSTMLayer.h:953

TMVA::DNN::RNN::TBasicLSTMLayer::fWeightsForgetGateState
Matrix_t & fWeightsForgetGateState
Forget Gate weights for prev state, fWeights[3].
Definition LSTMLayer.h:97

TMVA::DNN::RNN::TBasicLSTMLayer::fOutputGateBias
Matrix_t & fOutputGateBias
Output Gate bias.
Definition LSTMLayer.h:106

TMVA::DNN::RNN::TBasicLSTMLayer::fDerivativesCandidate
std::vector< Matrix_t > fDerivativesCandidate
First fDerivatives of the activations candidate gate.
Definition LSTMLayer.h:115

TMVA::DNN::RNN::TBasicLSTMLayer::GetInputDerivativesAt
const Matrix_t & GetInputDerivativesAt(size_t i) const
Definition LSTMLayer.h:255

TMVA::DNN::RNN::TBasicLSTMLayer::fWeightsForgetGate
Matrix_t & fWeightsForgetGate
Forget Gate weights for input, fWeights[2].
Definition LSTMLayer.h:96

TMVA::DNN::RNN::TBasicLSTMLayer::fWeightsInputGradients
Matrix_t & fWeightsInputGradients
Gradients w.r.t the input gate - input weights.
Definition LSTMLayer.h:118

TMVA::DNN::RNN::TBasicLSTMLayer::Tensor_t
typename Architecture_t::Tensor_t Tensor_t
Definition LSTMLayer.h:63

TMVA::DNN::RNN::TBasicLSTMLayer::GetDerivativesInput
const std::vector< Matrix_t > & GetDerivativesInput() const
Definition LSTMLayer.h:253

TMVA::DNN::RNN::TBasicLSTMLayer::GetWeightsCandidate
Matrix_t & GetWeightsCandidate()
Definition LSTMLayer.h:239

TMVA::DNN::RNN::TBasicLSTMLayer::fForgetGateBias
Matrix_t & fForgetGateBias
Forget Gate bias.
Definition LSTMLayer.h:98

TMVA::DNN::RNN::TBasicLSTMLayer::GetWeightsInputGradients
Matrix_t & GetWeightsInputGradients()
Definition LSTMLayer.h:300

TMVA::DNN::RNN::TBasicLSTMLayer::GetCandidateBiasGradients
Matrix_t & GetCandidateBiasGradients()
Definition LSTMLayer.h:316

TMVA::DNN::RNN::TBasicLSTMLayer::GetWeightsOutputGradients
Matrix_t & GetWeightsOutputGradients()
Definition LSTMLayer.h:318

TMVA::DNN::RNN::TBasicLSTMLayer::fCandidateBias
Matrix_t & fCandidateBias
Candidate Gate bias.
Definition LSTMLayer.h:102

TMVA::DNN::RNN::TBasicLSTMLayer::GetY
Tensor_t & GetY()
Definition LSTMLayer.h:330

TMVA::DNN::RNN::TBasicLSTMLayer::fCandidateValue
Matrix_t fCandidateValue
Computed candidate values.
Definition LSTMLayer.h:86

TMVA::DNN::RNN::TBasicLSTMLayer::GetWeightGradientsTensor
Tensor_t & GetWeightGradientsTensor()
Definition LSTMLayer.h:326

TMVA::DNN::RNN::TBasicLSTMLayer::DoesRememberState
bool DoesRememberState() const
Definition LSTMLayer.h:216

TMVA::DNN::RNN::TBasicLSTMLayer::GetWeightsOutputGradients
const Matrix_t & GetWeightsOutputGradients() const
Definition LSTMLayer.h:317

TMVA::DNN::RNN::TBasicLSTMLayer::LayerDescriptor_t
typename Architecture_t::RecurrentDescriptor_t LayerDescriptor_t
Definition LSTMLayer.h:65

TMVA::DNN::RNN::TBasicLSTMLayer::GetWeightsInputGradients
const Matrix_t & GetWeightsInputGradients() const
Definition LSTMLayer.h:299

TMVA::DNN::RNN::TBasicLSTMLayer::GetWeightsCandidateState
Matrix_t & GetWeightsCandidateState()
Definition LSTMLayer.h:249

TMVA::DNN::RNN::TBasicLSTMLayer::GetInputBiasGradients
Matrix_t & GetInputBiasGradients()
Definition LSTMLayer.h:304

TMVA::DNN::RNN::TBasicLSTMLayer::GetInputBiasGradients
const Matrix_t & GetInputBiasGradients() const
Definition LSTMLayer.h:303

TMVA::DNN::RNN::TBasicLSTMLayer::GetTimeSteps
size_t GetTimeSteps() const
Definition LSTMLayer.h:212

TMVA::DNN::RNN::TBasicLSTMLayer::fF2
DNN::EActivationFunction fF2
Activation function: tanh.
Definition LSTMLayer.h:83

TMVA::DNN::RNN::TBasicLSTMLayer::AddWeightsXMLTo
void AddWeightsXMLTo(void *parent) override
Writes the information and the weights about the layer in an XML node.
Definition LSTMLayer.h:923

TMVA::DNN::RNN::TBasicLSTMLayer::fInputBiasGradients
Matrix_t & fInputBiasGradients
Gradients w.r.t the input gate - bias weights.
Definition LSTMLayer.h:120

TMVA::DNN::RNN::TBasicLSTMLayer::GetWeightsOutputStateGradients
Matrix_t & GetWeightsOutputStateGradients()
Definition LSTMLayer.h:320

TMVA::DNN::RNN::TBasicLSTMLayer::fWeightsCandidateState
Matrix_t & fWeightsCandidateState
Candidate Gate weights for prev state, fWeights[5].
Definition LSTMLayer.h:101

TMVA::DNN::RNN::TBasicLSTMLayer::GetForgetGateValue
Matrix_t & GetForgetGateValue()
Definition LSTMLayer.h:227

TMVA::DNN::RNN::TBasicLSTMLayer::fDerivativesForget
std::vector< Matrix_t > fDerivativesForget
First fDerivatives of the activations forget gate.
Definition LSTMLayer.h:114

TMVA::DNN::RNN::TBasicLSTMLayer::GetWeightGradientsTensor
const Tensor_t & GetWeightGradientsTensor() const
Definition LSTMLayer.h:327

TMVA::DNN::RNN::TBasicLSTMLayer::GetForgetDerivativesAt
Matrix_t & GetForgetDerivativesAt(size_t i)
Definition LSTMLayer.h:260

TMVA::DNN::RNN::TBasicLSTMLayer::GetWeightsInputGateState
const Matrix_t & GetWeightsInputGateState() const
Definition LSTMLayer.h:244

TMVA::DNN::RNN::TBasicLSTMLayer::GetWeightsInputStateGradients
Matrix_t & GetWeightsInputStateGradients()
Definition LSTMLayer.h:302

TMVA::DNN::RNN::TBasicLSTMLayer::HelperDescriptor_t
typename Architecture_t::DropoutDescriptor_t HelperDescriptor_t
Definition LSTMLayer.h:68

TMVA::DNN::RNN::TBasicLSTMLayer::fForgetBiasGradients
Matrix_t & fForgetBiasGradients
Gradients w.r.t the forget gate - bias weights.
Definition LSTMLayer.h:123

TMVA::DNN::RNN::TBasicLSTMLayer::GetCandidateBias
const Matrix_t & GetCandidateBias() const
Definition LSTMLayer.h:295

TMVA::DNN::RNN::TBasicLSTMLayer::output_gate_value
std::vector< Matrix_t > output_gate_value
output gate value for every time step
Definition LSTMLayer.h:111

TMVA::DNN::RNN::TBasicLSTMLayer::GetDerivativesCandidate
const std::vector< Matrix_t > & GetDerivativesCandidate() const
Definition LSTMLayer.h:261

TMVA::DNN::RNN::TBasicLSTMLayer::fStateSize
size_t fStateSize
Hidden state size for LSTM.
Definition LSTMLayer.h:75

TMVA::DNN::RNN::TBasicLSTMLayer::CandidateValue
void CandidateValue(const Matrix_t &input, Matrix_t &dc)
Decides the new candidate values (NN with Tanh).
Definition LSTMLayer.h:515

TMVA::DNN::RNN::TBasicLSTMLayer::fDerivativesInput
std::vector< Matrix_t > fDerivativesInput
First fDerivatives of the activations input gate.
Definition LSTMLayer.h:113

TMVA::DNN::RNN::TBasicLSTMLayer::GetWeightsForgetGateState
const Matrix_t & GetWeightsForgetGateState() const
Definition LSTMLayer.h:246

TMVA::DNN::RNN::TBasicLSTMLayer::GetWeightsForgetGateState
Matrix_t & GetWeightsForgetGateState()
Definition LSTMLayer.h:247

TMVA::DNN::RNN::TBasicLSTMLayer::GetX
Tensor_t & GetX()
Definition LSTMLayer.h:329

TMVA::DNN::RNN::TBasicLSTMLayer::GetWeightsInputGate
const Matrix_t & GetWeightsInputGate() const
Definition LSTMLayer.h:236

TMVA::DNN::RNN::TBasicLSTMLayer::GetInputGateValue
const Matrix_t & GetInputGateValue() const
Definition LSTMLayer.h:222

TMVA::DNN::RNN::TBasicLSTMLayer::Update
void Update(const Scalar_t learningRate)

TMVA::DNN::RNN::TBasicLSTMLayer::DoesReturnSequence
bool DoesReturnSequence() const
Definition LSTMLayer.h:217

TMVA::DNN::RNN::TBasicLSTMLayer::fDx
Tensor_t fDx
cached gradient on the input (output of backward) as T x B x I
Definition LSTMLayer.h:138

TMVA::DNN::RNN::TBasicLSTMLayer::RNNWorkspace_t
typename Architecture_t::RNNWorkspace_t RNNWorkspace_t
Definition LSTMLayer.h:70

TMVA::DNN::RNN::TBasicLSTMLayer::GetOutputGateValue
Matrix_t & GetOutputGateValue()
Definition LSTMLayer.h:229

TMVA::DNN::RNN::TBasicLSTMLayer::TBasicLSTMLayer
TBasicLSTMLayer(size_t batchSize, size_t stateSize, size_t inputSize, size_t timeSteps, bool rememberState=false, bool returnSequence=false, DNN::EActivationFunction f1=DNN::EActivationFunction::kSigmoid, DNN::EActivationFunction f2=DNN::EActivationFunction::kTanh, bool training=true, DNN::EInitialization fA=DNN::EInitialization::kZero)
Constructor.
Definition LSTMLayer.h:341

TMVA::DNN::RNN::TBasicLSTMLayer::GetWeightsForgetStateGradients
Matrix_t & GetWeightsForgetStateGradients()
Definition LSTMLayer.h:308

TMVA::DNN::RNN::TBasicLSTMLayer::GetOutputBiasGradients
const Matrix_t & GetOutputBiasGradients() const
Definition LSTMLayer.h:321

TMVA::DNN::RNN::TBasicLSTMLayer::GetDY
Tensor_t & GetDY()
Definition LSTMLayer.h:332

TMVA::DNN::RNN::TBasicLSTMLayer::TensorDescriptor_t
typename Architecture_t::TensorDescriptor_t TensorDescriptor_t
Definition LSTMLayer.h:67

TMVA::DNN::RNN::TBasicLSTMLayer::GetWeightsOutputStateGradients
const Matrix_t & GetWeightsOutputStateGradients() const
Definition LSTMLayer.h:319

TMVA::DNN::RNN::TBasicLSTMLayer::fWeightsOutputStateGradients
Matrix_t & fWeightsOutputStateGradients
Gradients w.r.t the output gate - hidden state weights.
Definition LSTMLayer.h:128

TMVA::DNN::RNN::TBasicLSTMLayer::fReturnSequence
bool fReturnSequence
Return in output full sequence or just last element.
Definition LSTMLayer.h:80

TMVA::DNN::RNN::TBasicLSTMLayer::GetWeightsForgetGradients
Matrix_t & GetWeightsForgetGradients()
Definition LSTMLayer.h:306

TMVA::DNN::RNN::TBasicLSTMLayer::GetWeightsCandidateGradients
Matrix_t & GetWeightsCandidateGradients()
Definition LSTMLayer.h:312

TMVA::DNN::RNN::TBasicLSTMLayer::Forward
void Forward(Tensor_t &input, bool isTraining=true) override
Computes the next hidden state and next cell state with given input matrix.
Definition LSTMLayer.h:553

TMVA::DNN::RNN::TBasicLSTMLayer::GetWeightsForgetGradients
const Matrix_t & GetWeightsForgetGradients() const
Definition LSTMLayer.h:305

TMVA::DNN::RNN::TBasicLSTMLayer::fCell
Matrix_t fCell
Cell state of LSTM.
Definition LSTMLayer.h:90

TMVA::DNN::RNN::TBasicLSTMLayer::GetDerivativesCandidate
std::vector< Matrix_t > & GetDerivativesCandidate()
Definition LSTMLayer.h:262

TMVA::DNN::RNN::TBasicLSTMLayer::GetForgetBiasGradients
const Matrix_t & GetForgetBiasGradients() const
Definition LSTMLayer.h:309

TMVA::DNN::RNN::TBasicLSTMLayer::GetOutputGateTensor
std::vector< Matrix_t > & GetOutputGateTensor()
Definition LSTMLayer.h:283

TMVA::DNN::RNN::TBasicLSTMLayer::GetCandidateValue
Matrix_t & GetCandidateValue()
Definition LSTMLayer.h:225

TMVA::DNN::RNN::TBasicLSTMLayer::GetForgetDerivativesAt
const Matrix_t & GetForgetDerivativesAt(size_t i) const
Definition LSTMLayer.h:259

TMVA::DNN::RNN::TBasicLSTMLayer::fState
Matrix_t fState
Hidden state of LSTM.
Definition LSTMLayer.h:89

TMVA::DNN::RNN::TBasicLSTMLayer::OutputGate
void OutputGate(const Matrix_t &input, Matrix_t &dout)
Computes output values (NN with Sigmoid).
Definition LSTMLayer.h:533

TMVA::DNN::RNN::TBasicLSTMLayer::GetForgetGateValue
const Matrix_t & GetForgetGateValue() const
Definition LSTMLayer.h:226

TMVA::DNN::RNN::TBasicLSTMLayer::candidate_gate_value
std::vector< Matrix_t > candidate_gate_value
candidate gate value for every time step
Definition LSTMLayer.h:110

TMVA::DNN::RNN::TBasicLSTMLayer::GetInputGateValue
Matrix_t & GetInputGateValue()
Definition LSTMLayer.h:223

TMVA::DNN::RNN::TBasicLSTMLayer::GetState
const Matrix_t & GetState() const
Definition LSTMLayer.h:231

TMVA::DNN::RNN::TBasicLSTMLayer::GetWeightsCandidateState
const Matrix_t & GetWeightsCandidateState() const
Definition LSTMLayer.h:248

TMVA::DNN::RNN::TBasicLSTMLayer::GetCandidateBias
Matrix_t & GetCandidateBias()
Definition LSTMLayer.h:296

TMVA::DNN::RNN::TBasicLSTMLayer::GetForgetGateTensor
const std::vector< Matrix_t > & GetForgetGateTensor() const
Definition LSTMLayer.h:274

TMVA::DNN::RNN::TBasicLSTMLayer::GetDerivativesOutput
const std::vector< Matrix_t > & GetDerivativesOutput() const
Definition LSTMLayer.h:265

TMVA::DNN::RNN::TBasicLSTMLayer::GetCellTensor
const std::vector< Matrix_t > & GetCellTensor() const
Definition LSTMLayer.h:286

TMVA::DNN::RNN::TBasicLSTMLayer::GetWeightsTensor
const Tensor_t & GetWeightsTensor() const
Definition LSTMLayer.h:325

TMVA::DNN::RNN::TBasicLSTMLayer::fWeightsInputGate
Matrix_t & fWeightsInputGate
Input Gate weights for input, fWeights[0].
Definition LSTMLayer.h:92

TMVA::DNN::RNN::TBasicLSTMLayer::GetCandidateGateTensor
std::vector< Matrix_t > & GetCandidateGateTensor()
Definition LSTMLayer.h:279

TMVA::DNN::RNN::TBasicLSTMLayer::GetOutputDerivativesAt
const Matrix_t & GetOutputDerivativesAt(size_t i) const
Definition LSTMLayer.h:267

TMVA::DNN::RNN::TBasicLSTMLayer::GetCell
const Matrix_t & GetCell() const
Definition LSTMLayer.h:233

TMVA::DNN::RNN::TBasicLSTMLayer::fWeightsForgetStateGradients
Matrix_t & fWeightsForgetStateGradients
Gradients w.r.t the forget gate - hidden state weights.
Definition LSTMLayer.h:122

TMVA::DNN::RNN::TBasicLSTMLayer::fWorkspace
TWorkspace * fWorkspace
Definition LSTMLayer.h:142

TMVA::DNN::RNN::TBasicLSTMLayer::GetCandidateGateTensorAt
const Matrix_t & GetCandidateGateTensorAt(size_t i) const
Definition LSTMLayer.h:280

TMVA::DNN::RNN::TBasicLSTMLayer::fOutputValue
Matrix_t fOutputValue
Computed output gate values.
Definition LSTMLayer.h:88

TMVA::DNN::RNN::TBasicLSTMLayer::fCellSize
size_t fCellSize
Cell state size of LSTM.
Definition LSTMLayer.h:76

TMVA::DNN::RNN::TBasicLSTMLayer::GetOutputDerivativesAt
Matrix_t & GetOutputDerivativesAt(size_t i)
Definition LSTMLayer.h:268

TMVA::DNN::RNN::TBasicLSTMLayer::GetInputGateTensorAt
Matrix_t & GetInputGateTensorAt(size_t i)
Definition LSTMLayer.h:273

TMVA::DNN::RNN::TBasicLSTMLayer::GetDerivativesInput
std::vector< Matrix_t > & GetDerivativesInput()
Definition LSTMLayer.h:254

TMVA::DNN::RNN::TBasicLSTMLayer::fWeightsOutputGateState
Matrix_t & fWeightsOutputGateState
Output Gate weights for prev state, fWeights[7].
Definition LSTMLayer.h:105

TMVA::DNN::RNN::TBasicLSTMLayer::GetDerivativesForget
const std::vector< Matrix_t > & GetDerivativesForget() const
Definition LSTMLayer.h:257

TMVA::DNN::RNN::TBasicLSTMLayer::GetForgetBiasGradients
Matrix_t & GetForgetBiasGradients()
Definition LSTMLayer.h:310

TMVA::DNN::RNN::TBasicLSTMLayer::GetForgetGateBias
const Matrix_t & GetForgetGateBias() const
Definition LSTMLayer.h:293

TMVA::DNN::RNN::TBasicLSTMLayer::GetCandidateDerivativesAt
const Matrix_t & GetCandidateDerivativesAt(size_t i) const
Definition LSTMLayer.h:263

TMVA::DNN::RNN::TBasicLSTMLayer::GetInputGateBias
Matrix_t & GetInputGateBias()
Definition LSTMLayer.h:292

TMVA::DNN::RNN::TBasicLSTMLayer::GetOutputGateTensorAt
Matrix_t & GetOutputGateTensorAt(size_t i)
Definition LSTMLayer.h:285

TMVA::DNN::RNN::TBasicLSTMLayer::fTimeSteps
size_t fTimeSteps
Timesteps for LSTM.
Definition LSTMLayer.h:77

TMVA::DNN::RNN::TBasicLSTMLayer::GetCandidateBiasGradients
const Matrix_t & GetCandidateBiasGradients() const
Definition LSTMLayer.h:315

TMVA::DNN::RNN::TBasicLSTMLayer::GetCandidateValue
const Matrix_t & GetCandidateValue() const
Definition LSTMLayer.h:224

TMVA::DNN::RNN::TBasicLSTMLayer::WeightsDescriptor_t
typename Architecture_t::FilterDescriptor_t WeightsDescriptor_t
Definition LSTMLayer.h:66

TMVA::DNN::RNN::TBasicLSTMLayer::fInputGateBias
Matrix_t & fInputGateBias
Input Gate bias.
Definition LSTMLayer.h:94

TMVA::DNN::RNN::TBasicLSTMLayer::GetWeightsForgetGate
const Matrix_t & GetWeightsForgetGate() const
Definition LSTMLayer.h:240

TMVA::DNN::RNN::TBasicLSTMLayer::input_gate_value
std::vector< Matrix_t > input_gate_value
input gate value for every time step
Definition LSTMLayer.h:108

TMVA::DNN::RNN::TBasicLSTMLayer::GetWeightsCandidateStateGradients
const Matrix_t & GetWeightsCandidateStateGradients() const
Definition LSTMLayer.h:313

TMVA::DNN::RNN::TBasicLSTMLayer::GetWeightsTensor
Tensor_t & GetWeightsTensor()
Definition LSTMLayer.h:324

TMVA::DNN::RNN::TBasicLSTMLayer::fWeightsForgetGradients
Matrix_t & fWeightsForgetGradients
Gradients w.r.t the forget gate - input weights.
Definition LSTMLayer.h:121

TMVA::DNN::RNN::TBasicLSTMLayer::GetDerivativesForget
std::vector< Matrix_t > & GetDerivativesForget()
Definition LSTMLayer.h:258

TMVA::DNN::RNN::TBasicLSTMLayer::GetWeightsOutputGate
const Matrix_t & GetWeightsOutputGate() const
Definition LSTMLayer.h:242

TMVA::DNN::RNN::TBasicLSTMLayer::ForgetGate
void ForgetGate(const Matrix_t &input, Matrix_t &df)
Forgets the past values (NN with Sigmoid).
Definition LSTMLayer.h:497

TMVA::DNN::RNN::TBasicLSTMLayer::GetInputGateTensor
std::vector< Matrix_t > & GetInputGateTensor()
Definition LSTMLayer.h:271

TMVA::DNN::RNN::TBasicLSTMLayer::GetOutputBiasGradients
Matrix_t & GetOutputBiasGradients()
Definition LSTMLayer.h:322

TMVA::DNN::RNN::TBasicLSTMLayer::Print
void Print() const override
Prints the info about the layer.
Definition LSTMLayer.h:911

TMVA::DNN::RNN::TBasicLSTMLayer::GetOutputGateValue
const Matrix_t & GetOutputGateValue() const
Definition LSTMLayer.h:228

TMVA::DNN::RNN::TBasicLSTMLayer::GetWeightsOutputGateState
const Matrix_t & GetWeightsOutputGateState() const
Definition LSTMLayer.h:250

TMVA::DNN::RNN::TBasicLSTMLayer::GetCandidateDerivativesAt
Matrix_t & GetCandidateDerivativesAt(size_t i)
Definition LSTMLayer.h:264

TMVA::DNN::RNN::TBasicLSTMLayer::GetState
Matrix_t & GetState()
Definition LSTMLayer.h:232

TMVA::DNN::RNN::TBasicLSTMLayer::fInputValue
Matrix_t fInputValue
Computed input gate values.
Definition LSTMLayer.h:85

TMVA::DNN::RNN::TBasicLSTMLayer::GetWeightsOutputGate
Matrix_t & GetWeightsOutputGate()
Definition LSTMLayer.h:243

TMVA::DNN::RNN::TBasicLSTMLayer::GetWeightsCandidate
const Matrix_t & GetWeightsCandidate() const
Definition LSTMLayer.h:238

TMVA::DNN::RNN::TBasicLSTMLayer::GetWeightsCandidateGradients
const Matrix_t & GetWeightsCandidateGradients() const
Definition LSTMLayer.h:311

TMVA::DNN::RNN::TBasicLSTMLayer::fWeightGradientsTensor
Tensor_t fWeightGradientsTensor
Tensor for all weight gradients.
Definition LSTMLayer.h:133

TMVA::DNN::RNN::TBasicLSTMLayer::GetCell
Matrix_t & GetCell()
Definition LSTMLayer.h:234

TMVA::DNN::RNN::TBasicLSTMLayer::GetInputDerivativesAt
Matrix_t & GetInputDerivativesAt(size_t i)
Definition LSTMLayer.h:256

TMVA::DNN::RNN::TBasicLSTMLayer::RNNDescriptors_t
typename Architecture_t::RNNDescriptors_t RNNDescriptors_t
Definition LSTMLayer.h:71

TMVA::DNN::RNN::TBasicLSTMLayer::GetActivationFunctionF1
DNN::EActivationFunction GetActivationFunctionF1() const
Definition LSTMLayer.h:219

TMVA::DNN::RNN::TBasicLSTMLayer::fY
Tensor_t fY
cached output tensor as T x B x S
Definition LSTMLayer.h:137

TMVA::DNN::RNN::TBasicLSTMLayer::forget_gate_value
std::vector< Matrix_t > forget_gate_value
forget gate value for every time step
Definition LSTMLayer.h:109

TMVA::DNN::RNN::TBasicLSTMLayer::fWeightsCandidate
Matrix_t & fWeightsCandidate
Candidate Gate weights for input, fWeights[4].
Definition LSTMLayer.h:100

TMVA::DNN::RNN::TBasicLSTMLayer::fRememberState
bool fRememberState
Remember state in next pass.
Definition LSTMLayer.h:79

TMVA::DNN::RNN::TBasicLSTMLayer::fWeightsInputGateState
Matrix_t & fWeightsInputGateState
Input Gate weights for prev state, fWeights[1].
Definition LSTMLayer.h:93

TMVA::DNN::RNN::TBasicLSTMLayer::fDescriptors
TDescriptors * fDescriptors
Keeps all the RNN descriptors.
Definition LSTMLayer.h:141

TMVA::DNN::RNN::TBasicLSTMLayer::GetCellTensor
std::vector< Matrix_t > & GetCellTensor()
Definition LSTMLayer.h:287

TMVA::DNN::RNN::TBasicLSTMLayer::GetCellSize
size_t GetCellSize() const
Definition LSTMLayer.h:214

TMVA::DNN::RNN::TBasicLSTMLayer::fCandidateBiasGradients
Matrix_t & fCandidateBiasGradients
Gradients w.r.t the candidate gate - bias weights.
Definition LSTMLayer.h:126

TMVA::DNN::RNN::TBasicLSTMLayer::fForgetValue
Matrix_t fForgetValue
Computed forget gate values.
Definition LSTMLayer.h:87

TMVA::DNN::VGeneralLayer::GetWeightsAt
const Matrix_t & GetWeightsAt(size_t i) const
Definition GeneralLayer.h:175

TMVA::DNN::VGeneralLayer::Initialize
virtual void Initialize()
Initialize the weights and biases according to the given initialization method.
Definition GeneralLayer.h:395

TMVA::DNN::VGeneralLayer::GetOutput
const Tensor_t & GetOutput() const
Definition GeneralLayer.h:196

TMVA::DNN::VGeneralLayer::WriteMatrixToXML
void WriteMatrixToXML(void *node, const char *name, const Matrix_t &matrix)
Definition GeneralLayer.h:521

TMVA::DNN::VGeneralLayer::GetActivationGradients
const Tensor_t & GetActivationGradients() const
Definition GeneralLayer.h:199

TMVA::DNN::VGeneralLayer::GetBiasesAt
const Matrix_t & GetBiasesAt(size_t i) const
Definition GeneralLayer.h:181

TMVA::DNN::VGeneralLayer::GetBiasGradientsAt
const Matrix_t & GetBiasGradientsAt(size_t i) const
Definition GeneralLayer.h:193

TMVA::DNN::VGeneralLayer::GetBatchSize
size_t GetBatchSize() const
Getters.
Definition GeneralLayer.h:163

TMVA::DNN::VGeneralLayer::ReadMatrixXML
void ReadMatrixXML(void *node, const char *name, Matrix_t &matrix)
Definition GeneralLayer.h:544

TMVA::DNN::VGeneralLayer::GetWeightGradientsAt
const Matrix_t & GetWeightGradientsAt(size_t i) const
Definition GeneralLayer.h:187

TMVA::DNN::VGeneralLayer::VGeneralLayer
VGeneralLayer(size_t BatchSize, size_t InputDepth, size_t InputHeight, size_t InputWidth, size_t Depth, size_t Height, size_t Width, size_t WeightsNSlices, size_t WeightsNRows, size_t WeightsNCols, size_t BiasesNSlices, size_t BiasesNRows, size_t BiasesNCols, size_t OutputNSlices, size_t OutputNRows, size_t OutputNCols, EInitialization Init)
Constructor.
Definition GeneralLayer.h:239

TMVA::DNN::VGeneralLayer::GetInputWidth
size_t GetInputWidth() const
Definition GeneralLayer.h:166

TMVA::Tools::xmlengine
TXMLEngine & xmlengine()
Definition Tools.h:262

TXMLEngine::NewChild
XMLNodePointer_t NewChild(XMLNodePointer_t parent, XMLNsPointer_t ns, const char *name, const char *content=nullptr)
create new child element for parent node
Definition TXMLEngine.cxx:725

TXMLEngine::NewAttr
XMLAttrPointer_t NewAttr(XMLNodePointer_t xmlnode, XMLNsPointer_t, const char *name, const char *value)
creates new attribute for xmlnode, namespaces are not supported for attributes
Definition TXMLEngine.cxx:596

y
Double_t y[n]
Definition legend1.C:17

x
Double_t x[n]
Definition legend1.C:17

f1
TF1 * f1
Definition legend1.C:11

TMVA::DNN::RNN
Definition ContextHandles.h:93

TMVA::DNN
Definition Adadelta.h:36

TMVA::DNN::EInitialization
EInitialization
Definition Functions.h:72

TMVA::DNN::EInitialization::kZero
@ kZero
Definition Functions.h:76

TMVA::DNN::evaluateDerivativeMatrix
void evaluateDerivativeMatrix(typename Architecture_t::Matrix_t &B, EActivationFunction f, const typename Architecture_t::Matrix_t &A)
Definition Functions.h:160

TMVA::DNN::evaluateMatrix
void evaluateMatrix(typename Architecture_t::Matrix_t &A, EActivationFunction f)
Definition Functions.h:152

TMVA::DNN::EActivationFunction
EActivationFunction
Enum that represents layer activation functions.
Definition Functions.h:32

TMVA::DNN::EActivationFunction::kTanh
@ kTanh
Definition Functions.h:36

TMVA::DNN::EActivationFunction::kSigmoid
@ kSigmoid
Definition Functions.h:35

TMVA::DNN::initialize
void initialize(typename Architecture_t::Matrix_t &A, EInitialization m)
Definition Functions.h:282

TMVA
create variable transformations
Definition GeneticMinimizer.h:22

TMVA::gTools
Tools & gTools()

TMVA::DNN::TDescriptors
Definition ContextHandles.h:29

TMVA::DNN::TWorkspace
Definition ContextHandles.h:32

m
TMarker m
Definition textangle.C:8

Functions.h