29 #ifndef TMVA_DNN_RNN_LAYER 30 #define TMVA_DNN_RNN_LAYER 54 template<
typename Architecture_t>
60 using Matrix_t =
typename Architecture_t::Matrix_t;
61 using Scalar_t =
typename Architecture_t::Scalar_t;
85 TBasicRNNLayer(
size_t batchSize,
size_t stateSize,
size_t inputSize,
86 size_t timeSteps,
bool rememberState =
false,
111 const Tensor_t &activations_backward,
112 std::vector<Matrix_t> &inp1,
113 std::vector<Matrix_t> &inp2);
121 const Matrix_t & precStateActivations,
164 template <
typename Architecture_t>
169 :
VGeneralLayer<Architecture_t>(batchSize, 1, timeSteps, inputSize, 1, timeSteps, stateSize, 2,
170 {stateSize, stateSize}, {inputSize, stateSize}, 1, {stateSize}, {1}, batchSize,
171 timeSteps, stateSize, fA),
176 fState(batchSize, stateSize),
184 for (
size_t i = 0; i < timeSteps; ++i) {
191 template <
typename Architecture_t>
219 template <
typename Architecture_t>
226 template<
typename Architecture_t>
230 std::cout <<
" RECURRENT Layer: \t ";
233 std::cout <<
", NTime = " << this->
GetTimeSteps() <<
" )";
234 std::cout <<
"\tOutput = ( " << this->
GetOutput().size() <<
" , " << this->
GetOutput()[0].GetNrows() <<
" , " << this->
GetOutput()[0].GetNcols() <<
" )\n";
237 template <
typename Architecture_t>
238 auto debugMatrix(
const typename Architecture_t::Matrix_t &
A,
const std::string
name =
"matrix")
241 std::cout <<
name <<
"\n";
242 for (
size_t i = 0; i <
A.GetNrows(); ++i) {
243 for (
size_t j = 0; j <
A.GetNcols(); ++j) {
244 std::cout <<
A(i, j) <<
" ";
248 std::cout <<
"********\n";
253 template <
typename Architecture_t>
264 Architecture_t::Rearrange(arrInput, input);
266 for (
size_t t = 0; t <
fTimeSteps;++t) arrOutput.emplace_back(this->GetBatchSize(),
fStateSize);
273 Architecture_t::Rearrange(this->
GetOutput(), arrOutput);
277 template <
typename Architecture_t>
286 Architecture_t::ScaleAdd(
fState, tmpState);
288 DNN::evaluateDerivative<Architecture_t>(dF, fAF,
fState);
289 DNN::evaluate<Architecture_t>(
fState, fAF);
293 template <
typename Architecture_t>
295 const Tensor_t &activations_backward,
296 std::vector<Matrix_t> & , std::vector<Matrix_t> &
306 if (gradients_backward.size() == 0 || gradients_backward[0].GetNrows() == 0 || gradients_backward[0].GetNcols() == 0) {
310 for (
size_t t = 0; t <
fTimeSteps; ++t) arr_gradients_backward.emplace_back(this->GetBatchSize(), this->
GetInputSize());
317 for (
size_t t = 0; t <
fTimeSteps; ++t) arr_activations_backward.emplace_back(this->GetBatchSize(), this->
GetInputSize());
318 Architecture_t::Rearrange(arr_activations_backward, activations_backward);
327 for (
size_t t = 0; t <
fTimeSteps; ++t) arr_output.emplace_back(this->GetBatchSize(),
fStateSize);
328 Architecture_t::Rearrange(arr_output, this->
GetOutput());
331 for (
size_t t = 0; t <
fTimeSteps; ++t) arr_actgradients.emplace_back(this->GetBatchSize(),
fStateSize);
339 for (
size_t t = fTimeSteps; t > 0; t--) {
341 Architecture_t::ScaleAdd(state_gradients_backward, arr_actgradients[t - 1]);
343 const Matrix_t & precStateActivations = arr_output[t - 2];
344 CellBackward(state_gradients_backward, precStateActivations, arr_activations_backward[t - 1],
347 const Matrix_t & precStateActivations = initState;
348 CellBackward(state_gradients_backward, precStateActivations, arr_activations_backward[t - 1],
353 Architecture_t::Rearrange(gradients_backward, arr_gradients_backward );
359 template <
typename Architecture_t>
361 const Matrix_t & precStateActivations,
371 template <
typename Architecture_t>
391 template <
typename Architecture_t>
size_t GetInputWidth() const
Generic General Layer class.
Matrix_t & fBiases
Biases.
void ReadMatrixXML(void *node, const char *name, Matrix_t &matrix)
const Matrix_t & GetBiasesAt(size_t i) const
Matrix_t & GetWeightsState()
Matrix_t & fBiasGradients
Gradients w.r.t. the bias values.
std::vector< Matrix_t > & GetDerivatives()
bool IsRememberState() const
Matrix_t & GetBiasesState()
const Matrix_t & GetState() const
Matrix_t & fWeightsState
Prev state weights, fWeights[1].
std::vector< Matrix_t > fDerivatives
First fDerivatives of the activations.
void Backward(Tensor_t &gradients_backward, const Tensor_t &activations_backward, std::vector< Matrix_t > &inp1, std::vector< Matrix_t > &inp2)
Backpropagates the error.
size_t GetBatchSize() const
Getters.
bool fRememberState
Remember state in next pass.
Matrix_t & GetWeightsInput()
const std::vector< Matrix_t > & GetActivationGradients() const
void Forward(Tensor_t &input, bool isTraining=true)
Compute and return the next state with given input matrix.
Matrix_t & GetBiasStateGradients()
const Matrix_t & GetWeightsState() const
void Update(const Scalar_t learningRate)
void InitState(DNN::EInitialization m=DNN::EInitialization::kZero)
Initialize the weights according to the given initialization method.
const Matrix_t & GetBiasGradientsAt(size_t i) const
const Matrix_t & GetBiasStateGradients() const
const Matrix_t & GetWeightsInput() const
Matrix_t & fWeightStateGradients
Gradients w.r.t. the recurring weights.
void Print() const
Prints the info about the layer.
size_t GetTimeSteps() const
Getters.
Matrix_t & fWeightInputGradients
Gradients w.r.t. the input weights.
const Matrix_t & GetWeightsAt(size_t i) const
XMLAttrPointer_t NewAttr(XMLNodePointer_t xmlnode, XMLNsPointer_t, const char *name, const char *value)
creates new attribute for xmlnode, namespaces are not supported for attributes
void Copy(void *source, void *dest)
TBasicRNNLayer(size_t batchSize, size_t stateSize, size_t inputSize, size_t timeSteps, bool rememberState=false, DNN::EActivationFunction f=DNN::EActivationFunction::kTanh, bool training=true, DNN::EInitialization fA=DNN::EInitialization::kZero)
Constructor.
size_t GetInputSize() const
void CellForward(const Matrix_t &input, Matrix_t &dF)
Forward for a single cell (time unit)
static RooMathCoreReg dummy
const Matrix_t & GetBiasesState() const
auto debugMatrix(const typename Architecture_t::Matrix_t &A, const std::string name="matrix") -> void
const Matrix_t & GetWeightInputGradients() const
size_t GetStateSize() const
Matrix_t & GetWeightStateGradients()
Abstract ClassifierFactory template that handles arbitrary types.
const Matrix_t & GetWeightStateGradients() const
const Matrix_t & GetWeightGradientsAt(size_t i) const
Matrix_t & GetDerivativesAt(size_t i)
Matrix_t & CellBackward(Matrix_t &state_gradients_backward, const Matrix_t &precStateActivations, const Matrix_t &input, Matrix_t &input_gradient, Matrix_t &dF)
Backward for a single time unit a the corresponding call to Forward(...).
const std::vector< Matrix_t > & GetDerivatives() const
XMLNodePointer_t NewChild(XMLNodePointer_t parent, XMLNsPointer_t ns, const char *name, const char *content=0)
create new child element for parent node
Matrix_t & GetWeightInputGradients()
std::vector< Matrix_t > Tensor_t
void WriteMatrixToXML(void *node, const char *name, const Matrix_t &matrix)
DNN::EActivationFunction GetActivationFunction() const
typename Architecture_t::Scalar_t Scalar_t
EActivationFunction
Enum that represents layer activation functions.
const std::vector< Matrix_t > & GetOutput() const
DNN::EActivationFunction fF
Activation function of the hidden state.
Matrix_t & fWeightsInput
Input weights, fWeights[0].
size_t fStateSize
Hidden state size of RNN.
size_t fTimeSteps
Timesteps for RNN.
const Matrix_t & GetDerivativesAt(size_t i) const
virtual void ReadWeightsFromXML(void *parent)
Read the information and the weights about the layer from XML node.
Matrix_t fState
Hidden State.
virtual void AddWeightsXMLTo(void *parent)
Writes the information and the weights about the layer in an XML node.
typename Architecture_t::Matrix_t Matrix_t