29#ifndef TMVA_DNN_RNN_LAYER
30#define TMVA_DNN_RNN_LAYER
54template<
typename Architecture_t>
60 using Matrix_t =
typename Architecture_t::Matrix_t;
61 using Scalar_t =
typename Architecture_t::Scalar_t;
85 TBasicRNNLayer(
size_t batchSize,
size_t stateSize,
size_t inputSize,
86 size_t timeSteps,
bool rememberState =
false,
111 const Tensor_t &activations_backward,
112 std::vector<Matrix_t> &inp1,
113 std::vector<Matrix_t> &inp2);
121 const Matrix_t & precStateActivations,
164template <
typename Architecture_t>
169 :
VGeneralLayer<Architecture_t>(batchSize, 1, timeSteps, inputSize, 1, timeSteps, stateSize, 2,
170 {stateSize, stateSize}, {inputSize, stateSize}, 1, {stateSize}, {1}, batchSize,
171 timeSteps, stateSize, fA),
172 fTimeSteps(timeSteps),
173 fStateSize(stateSize),
174 fRememberState(rememberState),
176 fState(batchSize, stateSize),
177 fWeightsInput(this->GetWeightsAt(0)),
178 fWeightsState(this->GetWeightsAt(1)),
179 fBiases(this->GetBiasesAt(0)),
180 fWeightInputGradients(this->GetWeightGradientsAt(0)),
181 fWeightStateGradients(this->GetWeightGradientsAt(1)),
182 fBiasGradients(this->GetBiasGradientsAt(0))
184 for (
size_t i = 0; i < timeSteps; ++i) {
185 fDerivatives.emplace_back(batchSize, stateSize);
191template <
typename Architecture_t>
193 :
VGeneralLayer<Architecture_t>(layer), fTimeSteps(layer.fTimeSteps), fStateSize(layer.fStateSize),
194 fRememberState(layer.fRememberState), fF(layer.GetActivationFunction()),
195 fState(layer.GetBatchSize(), layer.GetStateSize()), fWeightsInput(this->GetWeightsAt(0)),
196 fWeightsState(this->GetWeightsAt(1)), fBiases(this->GetBiasesAt(0)),
197 fDerivatives(), fWeightInputGradients(this->GetWeightGradientsAt(0)),
198 fWeightStateGradients(this->GetWeightGradientsAt(1)), fBiasGradients(this->GetBiasGradientsAt(0))
219template <
typename Architecture_t>
226template<
typename Architecture_t>
230 std::cout <<
" RECURRENT Layer: \t ";
231 std::cout <<
" (NInput = " << this->GetInputSize();
232 std::cout <<
", NState = " << this->GetStateSize();
233 std::cout <<
", NTime = " << this->GetTimeSteps() <<
" )";
234 std::cout <<
"\tOutput = ( " << this->GetOutput().size() <<
" , " << this->GetOutput()[0].GetNrows() <<
" , " << this->GetOutput()[0].GetNcols() <<
" )\n";
237template <
typename Architecture_t>
238auto debugMatrix(
const typename Architecture_t::Matrix_t &
A,
const std::string
name =
"matrix")
241 std::cout <<
name <<
"\n";
242 for (
size_t i = 0; i <
A.GetNrows(); ++i) {
243 for (
size_t j = 0; j <
A.GetNcols(); ++j) {
244 std::cout <<
A(i, j) <<
" ";
248 std::cout <<
"********\n";
253template <
typename Architecture_t>
263 for (
size_t t = 0; t < fTimeSteps; ++t) arrInput.emplace_back(this->GetBatchSize(), this->GetInputWidth());
264 Architecture_t::Rearrange(arrInput, input);
266 for (
size_t t = 0; t < fTimeSteps;++t) arrOutput.emplace_back(this->GetBatchSize(), fStateSize);
269 for (
size_t t = 0; t < fTimeSteps; ++t) {
270 CellForward(arrInput[t], fDerivatives[t]);
273 Architecture_t::Rearrange(this->GetOutput(), arrOutput);
277template <
typename Architecture_t>
283 Matrix_t tmpState(fState.GetNrows(), fState.GetNcols());
284 Architecture_t::MultiplyTranspose(tmpState, fState, fWeightsState);
285 Architecture_t::MultiplyTranspose(fState, input, fWeightsInput);
286 Architecture_t::ScaleAdd(fState, tmpState);
287 Architecture_t::AddRowWise(fState, fBiases);
288 DNN::evaluateDerivative<Architecture_t>(dF, fAF, fState);
289 DNN::evaluate<Architecture_t>(fState, fAF);
293template <
typename Architecture_t>
295 const Tensor_t &activations_backward,
296 std::vector<Matrix_t> & , std::vector<Matrix_t> &
306 if (gradients_backward.size() == 0 || gradients_backward[0].GetNrows() == 0 || gradients_backward[0].GetNcols() == 0) {
310 for (
size_t t = 0; t < fTimeSteps; ++t) arr_gradients_backward.emplace_back(this->GetBatchSize(), this->GetInputSize());
317 for (
size_t t = 0; t < fTimeSteps; ++t) arr_activations_backward.emplace_back(this->GetBatchSize(), this->GetInputSize());
318 Architecture_t::Rearrange(arr_activations_backward, activations_backward);
320 Matrix_t state_gradients_backward(this->GetBatchSize(), fStateSize);
323 Matrix_t initState(this->GetBatchSize(), fStateSize);
327 for (
size_t t = 0; t < fTimeSteps; ++t) arr_output.emplace_back(this->GetBatchSize(), fStateSize);
328 Architecture_t::Rearrange(arr_output, this->GetOutput());
331 for (
size_t t = 0; t < fTimeSteps; ++t) arr_actgradients.emplace_back(this->GetBatchSize(), fStateSize);
332 Architecture_t::Rearrange(arr_actgradients, this->GetActivationGradients());
335 fWeightInputGradients.Zero();
336 fWeightStateGradients.Zero();
337 fBiasGradients.Zero();
339 for (
size_t t = fTimeSteps; t > 0; t--) {
341 Architecture_t::ScaleAdd(state_gradients_backward, arr_actgradients[t - 1]);
343 const Matrix_t & precStateActivations = arr_output[t - 2];
344 CellBackward(state_gradients_backward, precStateActivations, arr_activations_backward[t - 1],
345 arr_gradients_backward[t - 1], fDerivatives[t - 1]);
347 const Matrix_t & precStateActivations = initState;
348 CellBackward(state_gradients_backward, precStateActivations, arr_activations_backward[t - 1],
349 arr_gradients_backward[t - 1], fDerivatives[t - 1]);
353 Architecture_t::Rearrange(gradients_backward, arr_gradients_backward );
359template <
typename Architecture_t>
361 const Matrix_t & precStateActivations,
365 return Architecture_t::RecurrentLayerBackward(state_gradients_backward, fWeightInputGradients, fWeightStateGradients,
366 fBiasGradients, dF, precStateActivations, fWeightsInput,
367 fWeightsState, input, input_gradient);
371template <
typename Architecture_t>
383 this->WriteMatrixToXML(layerxml,
"InputWeights",
this -> GetWeightsAt(0));
384 this->WriteMatrixToXML(layerxml,
"StateWeights",
this -> GetWeightsAt(1));
385 this->WriteMatrixToXML(layerxml,
"Biases",
this -> GetBiasesAt(0));
391template <
typename Architecture_t>
395 this->ReadMatrixXML(parent,
"InputWeights",
this -> GetWeightsAt(0));
396 this->ReadMatrixXML(parent,
"StateWeights",
this -> GetWeightsAt(1));
397 this->ReadMatrixXML(parent,
"Biases",
this -> GetBiasesAt(0));
static RooMathCoreReg dummy
size_t GetStateSize() const
DNN::EActivationFunction GetActivationFunction() const
void InitState(DNN::EInitialization m=DNN::EInitialization::kZero)
Initialize the weights according to the given initialization method.
const Matrix_t & GetWeightInputGradients() const
void Print() const
Prints the info about the layer.
const Matrix_t & GetWeightStateGradients() const
Matrix_t & fWeightsInput
Input weights, fWeights[0].
Matrix_t & fWeightsState
Prev state weights, fWeights[1].
Matrix_t & fBiases
Biases.
std::vector< Matrix_t > fDerivatives
First fDerivatives of the activations.
virtual void ReadWeightsFromXML(void *parent)
Read the information and the weights about the layer from XML node.
Matrix_t & GetBiasStateGradients()
size_t fStateSize
Hidden state size of RNN.
Matrix_t & GetWeightsState()
const Matrix_t & GetState() const
void Forward(Tensor_t &input, bool isTraining=true)
Compute and return the next state with given input matrix.
const std::vector< Matrix_t > & GetDerivatives() const
Matrix_t & CellBackward(Matrix_t &state_gradients_backward, const Matrix_t &precStateActivations, const Matrix_t &input, Matrix_t &input_gradient, Matrix_t &dF)
Backward for a single time unit a the corresponding call to Forward(...).
TBasicRNNLayer(size_t batchSize, size_t stateSize, size_t inputSize, size_t timeSteps, bool rememberState=false, DNN::EActivationFunction f=DNN::EActivationFunction::kTanh, bool training=true, DNN::EInitialization fA=DNN::EInitialization::kZero)
Constructor.
typename Architecture_t::Matrix_t Matrix_t
Matrix_t fState
Hidden State.
Matrix_t & fWeightInputGradients
Gradients w.r.t. the input weights.
DNN::EActivationFunction fF
Activation function of the hidden state.
Matrix_t & GetDerivativesAt(size_t i)
Matrix_t & GetWeightsInput()
size_t GetTimeSteps() const
Getters.
std::vector< Matrix_t > & GetDerivatives()
bool fRememberState
Remember state in next pass.
Matrix_t & fWeightStateGradients
Gradients w.r.t. the recurring weights.
Matrix_t & GetWeightInputGradients()
const Matrix_t & GetBiasesState() const
void Update(const Scalar_t learningRate)
virtual void AddWeightsXMLTo(void *parent)
Writes the information and the weights about the layer in an XML node.
size_t fTimeSteps
Timesteps for RNN.
void CellForward(const Matrix_t &input, Matrix_t &dF)
Forward for a single cell (time unit)
std::vector< Matrix_t > Tensor_t
bool IsRememberState() const
const Matrix_t & GetBiasStateGradients() const
size_t GetInputSize() const
Matrix_t & GetWeightStateGradients()
Matrix_t & fBiasGradients
Gradients w.r.t. the bias values.
const Matrix_t & GetWeightsInput() const
Matrix_t & GetBiasesState()
const Matrix_t & GetWeightsState() const
const Matrix_t & GetDerivativesAt(size_t i) const
void Backward(Tensor_t &gradients_backward, const Tensor_t &activations_backward, std::vector< Matrix_t > &inp1, std::vector< Matrix_t > &inp2)
Backpropagates the error.
Generic General Layer class.
typename Architecture_t::Scalar_t Scalar_t
size_t GetBatchSize() const
Getters.
size_t GetInputWidth() const
XMLAttrPointer_t NewAttr(XMLNodePointer_t xmlnode, XMLNsPointer_t, const char *name, const char *value)
creates new attribute for xmlnode, namespaces are not supported for attributes
XMLNodePointer_t NewChild(XMLNodePointer_t parent, XMLNsPointer_t ns, const char *name, const char *content=0)
create new child element for parent node
void Copy(void *source, void *dest)
auto debugMatrix(const typename Architecture_t::Matrix_t &A, const std::string name="matrix") -> void
EActivationFunction
Enum that represents layer activation functions.
create variable transformations