Logo ROOT  
Reference Guide
 
Loading...
Searching...
No Matches
RNNLayer.h
Go to the documentation of this file.
1// @(#)root/tmva/tmva/dnn/rnn:$Id$
2// Author: Saurav Shekhar 19/07/17
3
4/**********************************************************************************
5 * Project: TMVA - a Root-integrated toolkit for multivariate data analysis *
6 * Package: TMVA *
7 * Class : BasicRNNLayer *
8 * *
9 * Description: *
10 * NeuralNetwork *
11 * *
12 * Authors (alphabetical): *
13 * Saurav Shekhar <sauravshekhar01@gmail.com> - ETH Zurich, Switzerland *
14 * *
15 * Copyright (c) 2005-2015: *
16 * All rights reserved. *
17 * CERN, Switzerland *
18 * *
19 * For the licensing terms see $ROOTSYS/LICENSE. *
20 * For the list of contributors see $ROOTSYS/README/CREDITS. *
21 **********************************************************************************/
22
23//#pragma once
24
25//////////////////////////////////////////////////////////////////////
26// <Description> //
27//////////////////////////////////////////////////////////////////////
28
29#ifndef TMVA_DNN_RNN_LAYER
30#define TMVA_DNN_RNN_LAYER
31
32#include <cmath>
33#include <iostream>
34#include <vector>
35#include <string>
36
37#include "TMatrix.h"
38#include "TMVA/DNN/Functions.h"
39
40namespace TMVA
41{
42namespace DNN
43{
44
45namespace RNN {
46
47//______________________________________________________________________________
48//
49// Basic RNN Layer
50//______________________________________________________________________________
51
52/** \class BasicRNNLayer
53 Generic implementation
54*/
55template<typename Architecture_t>
56 class TBasicRNNLayer : public VGeneralLayer<Architecture_t>
57{
58
59public:
60
61 using Tensor_t = typename Architecture_t::Tensor_t;
62 using Matrix_t = typename Architecture_t::Matrix_t;
63 using Scalar_t = typename Architecture_t::Scalar_t;
64
65 using LayerDescriptor_t = typename Architecture_t::RecurrentDescriptor_t;
66 using WeightsDescriptor_t = typename Architecture_t::FilterDescriptor_t;
67 using TensorDescriptor_t = typename Architecture_t::TensorDescriptor_t;
68 using HelperDescriptor_t = typename Architecture_t::DropoutDescriptor_t;
69
70 using RNNWorkspace_t = typename Architecture_t::RNNWorkspace_t;
71 using RNNDescriptors_t = typename Architecture_t::RNNDescriptors_t;
72
73private:
74
75 size_t fTimeSteps; ///< Timesteps for RNN
76 size_t fStateSize; ///< Hidden state size of RNN
77 bool fRememberState; ///< Remember state in next pass
78 bool fReturnSequence = false; ///< Return in output full sequence or just last element in time
79
80 DNN::EActivationFunction fF; ///< Activation function of the hidden state
81
82 Matrix_t fState; ///< Hidden State
83 Matrix_t &fWeightsInput; ///< Input weights, fWeights[0]
84 Matrix_t &fWeightsState; ///< Prev state weights, fWeights[1]
85 Matrix_t &fBiases; ///< Biases
86
87 Tensor_t fDerivatives; ///< First fDerivatives of the activations
88 Matrix_t &fWeightInputGradients; ///< Gradients w.r.t. the input weights
89 Matrix_t &fWeightStateGradients; ///< Gradients w.r.t. the recurring weights
90 Matrix_t &fBiasGradients; ///< Gradients w.r.t. the bias values
91
94
95 typename Architecture_t::ActivationDescriptor_t fActivationDesc;
96
97 TDescriptors *fDescriptors = nullptr; ///< Keeps all the RNN descriptors
98 TWorkspace *fWorkspace = nullptr; // workspace needed for GPU computation (CudNN)
99
100 Matrix_t fCell; ///< Empty matrix for RNN
101
102 // tensors used internally for the forward and backward pass
103 Tensor_t fX; ///< cached input tensor as T x B x I
104 Tensor_t fY; ///< cached output tensor as T x B x S
105 Tensor_t fDx; ///< cached gradient on the input (output of backward) as T x B x I
106 Tensor_t fDy; ///< cached activation gradient (input of backward) as T x B x S
107
108
109public:
110
111 /** Constructor */
112 TBasicRNNLayer(size_t batchSize, size_t stateSize, size_t inputSize,
113 size_t timeSteps, bool rememberState = false, bool returnSequence = false,
115 bool training = true, DNN::EInitialization fA = DNN::EInitialization::kZero);
116
117 /** Copy Constructor */
119
120 /*! Destructor. */
121 virtual ~TBasicRNNLayer();
122
123 /*! Initialize the weights according to the given initialization
124 ** method. */
125 virtual void Initialize();
126
127 /*! Initialize the state
128 ** method. */
130
131 /*! Compute and return the next state with given input
132 * matrix */
133 void Forward(Tensor_t &input, bool isTraining = true);
134
135 /*! Forward for a single cell (time unit) */
136 void CellForward(const Matrix_t &input, Matrix_t & dF);
137
138 /*! Backpropagates the error. Must only be called directly at the corresponding
139 * call to Forward(...). */
140 void Backward(Tensor_t &gradients_backward,
141 const Tensor_t &activations_backward);
142
143 /* Updates weights and biases, given the learning rate */
144 void Update(const Scalar_t learningRate);
145
146 /*! Backward for a single time unit
147 * a the corresponding call to Forward(...). */
148 inline Matrix_t & CellBackward(Matrix_t & state_gradients_backward,
149 const Matrix_t & precStateActivations,
150 const Matrix_t & input, Matrix_t & input_gradient, Matrix_t &dF);
151
152 /** Prints the info about the layer */
153 void Print() const;
154
155 /*! Writes the information and the weights about the layer in an XML node. */
156 virtual void AddWeightsXMLTo(void *parent);
157
158 /*! Read the information and the weights about the layer from XML node. */
159 virtual void ReadWeightsFromXML(void *parent);
160
161 void InitTensors();
162 // void InitializeDescriptors();
163 // void ReleaseDescriptors();
164 // void InitializeWorkspace();
165 // void FreeWorkspace();
166
167 /** Getters */
168 size_t GetTimeSteps() const { return fTimeSteps; }
169 size_t GetStateSize() const { return fStateSize; }
170 size_t GetInputSize() const { return this->GetInputWidth(); }
171 inline bool DoesRememberState() const {return fRememberState;}
172 inline bool DoesReturnSequence() const { return fReturnSequence; }
174 Matrix_t & GetState() {return fState;} // RNN Hidden state
175 const Matrix_t & GetState() const {return fState;}
176 Matrix_t &GetCell() { return fCell; } // this returns an empty matrixfor RNN
177 const Matrix_t &GetCell() const { return fCell; }
178
180 const Matrix_t & GetWeightsInput() const {return fWeightsInput;}
182 const Matrix_t & GetWeightsState() const {return fWeightsState;}
184 const Tensor_t & GetDerivatives() const {return fDerivatives;}
185 // Matrix_t &GetDerivativesAt(size_t i) { return fDerivatives[i]; }
186 // const Matrix_t &GetDerivativesAt(size_t i) const { return fDerivatives[i]; }
187
189 const Matrix_t & GetBiasesState() const {return fBiases;}
196
198 const Tensor_t &GetWeightsTensor() const { return fWeightsTensor; }
201
202 Tensor_t &GetX() { return fX; }
203 Tensor_t &GetY() { return fY; }
204 Tensor_t &GetDX() { return fDx; }
205 Tensor_t &GetDY() { return fDy; }
206};
207
208//______________________________________________________________________________
209//
210// BasicRNNLayer Implementation
211//______________________________________________________________________________
212template <typename Architecture_t>
213TBasicRNNLayer<Architecture_t>::TBasicRNNLayer(size_t batchSize, size_t stateSize, size_t inputSize, size_t timeSteps,
214 bool rememberState, bool returnSequence, DNN::EActivationFunction f, bool /*training*/,
216 // TODO inputDepth and outputDepth changed to batchSize??
217 : VGeneralLayer<Architecture_t>(batchSize, 1, timeSteps, inputSize, 1, (returnSequence) ? timeSteps : 1 ,
218 stateSize, 2, {stateSize, stateSize}, {inputSize, stateSize}, 1, {stateSize}, {1},
219 batchSize, (returnSequence) ? timeSteps : 1, stateSize, fA),
220 fTimeSteps(timeSteps), fStateSize(stateSize), fRememberState(rememberState), fReturnSequence(returnSequence), fF(f), fState(batchSize, stateSize),
221 fWeightsInput(this->GetWeightsAt(0)), fWeightsState(this->GetWeightsAt(1)),
222 fBiases(this->GetBiasesAt(0)), fDerivatives(timeSteps, batchSize, stateSize), // create tensor time x bs x S
223 fWeightInputGradients(this->GetWeightGradientsAt(0)), fWeightStateGradients(this->GetWeightGradientsAt(1)),
224 fBiasGradients(this->GetBiasGradientsAt(0)), fWeightsTensor({0}), fWeightGradientsTensor({0})
225{
226 InitTensors();
227}
228
229//______________________________________________________________________________
230template <typename Architecture_t>
232 : VGeneralLayer<Architecture_t>(layer), fTimeSteps(layer.fTimeSteps), fStateSize(layer.fStateSize),
233 fRememberState(layer.fRememberState), fReturnSequence(layer.fReturnSequence), fF(layer.GetActivationFunction()),
234 fState(layer.GetBatchSize(), layer.GetStateSize()),
235 fWeightsInput(this->GetWeightsAt(0)), fWeightsState(this->GetWeightsAt(1)), fBiases(this->GetBiasesAt(0)),
236 fDerivatives(layer.GetDerivatives().GetShape()), fWeightInputGradients(this->GetWeightGradientsAt(0)),
237 fWeightStateGradients(this->GetWeightGradientsAt(1)), fBiasGradients(this->GetBiasGradientsAt(0)),
238 fWeightsTensor({0}), fWeightGradientsTensor({0})
239{
240
241 Architecture_t::Copy(fDerivatives, layer.GetDerivatives() );
242
243 // Gradient matrices not copied
244 Architecture_t::Copy(fState, layer.GetState());
245 InitTensors();
246}
247
248template <typename Architecture_t>
250{
251 if (fDescriptors) {
252 Architecture_t::ReleaseRNNDescriptors(fDescriptors);
253 delete fDescriptors;
254 }
255
256 if (fWorkspace) {
257 Architecture_t::FreeRNNWorkspace(fWorkspace);
258 delete fWorkspace;
259 }
260}
261
262//______________________________________________________________________________
263template<typename Architecture_t>
265{
266 // auto m = this->GetInitialization();
267 // DNN::initialize<Architecture_t>(fWeightsInput, m);
268 // DNN::initialize<Architecture_t>(fWeightsState, m);
269 // DNN::initialize<Architecture_t>(fBiases, DNN::EInitialization::kZero);
270
272
273 Architecture_t::InitializeRNNDescriptors(fDescriptors, this);
274 Architecture_t::InitializeRNNWorkspace(fWorkspace, fDescriptors, this);
275}
276
277//______________________________________________________________________________
278template <typename Architecture_t>
280{
281 // fix output tensor for Cudnn must be a tensor of B x T x S of right layout
282 Architecture_t::InitializeRNNTensors(this);
283}
284//______________________________________________________________________________
285template <typename Architecture_t>
287{
288 DNN::initialize<Architecture_t>(this->GetState(), DNN::EInitialization::kZero);
289
290 Architecture_t::InitializeActivationDescriptor(fActivationDesc,this->GetActivationFunction());
291}
292
293//______________________________________________________________________________
294template<typename Architecture_t>
296-> void
297{
298 std::cout << " RECURRENT Layer: \t ";
299 std::cout << " (NInput = " << this->GetInputSize(); // input size
300 std::cout << ", NState = " << this->GetStateSize(); // hidden state size
301 std::cout << ", NTime = " << this->GetTimeSteps() << " )"; // time size
302 std::cout << "\tOutput = ( " << this->GetOutput().GetFirstSize() << " , " << this->GetOutput().GetHSize() << " , " << this->GetOutput().GetWSize() << " )\n";
303}
304
305template <typename Architecture_t>
306auto debugMatrix(const typename Architecture_t::Matrix_t &A, const std::string name = "matrix")
307-> void
308{
309 std::cout << name << "\n";
310 for (size_t i = 0; i < A.GetNrows(); ++i) {
311 for (size_t j = 0; j < A.GetNcols(); ++j) {
312 std::cout << A(i, j) << " ";
313 }
314 std::cout << "\n";
315 }
316 std::cout << "********\n";
317}
318
319
320//______________________________________________________________________________
321template <typename Architecture_t>
322void TBasicRNNLayer<Architecture_t>::Forward(Tensor_t &input, bool isTraining ) // B x T x D
323{
324
325 //printf("doing RNNLayer forward\n");
326 // for Cudnn
327 if (Architecture_t::IsCudnn()) {
328
329 Tensor_t &x = this->fX;
330 Tensor_t &y = this->fY;
331
332 Architecture_t::Rearrange(x, input);
333
334 // why passing the first weight, better to pass all weight tensor (including bias)
335 // LM 05/24
336 //const auto &weights = this->GetWeightsAt(0);
337 const auto & weights = this->GetWeightsTensor();
338
339 // Tensor_t cx({1}); // not used for normal RNN
340 // Tensor_t cy({1}); // not used for normal RNN
341
342 // hx is fState - tensor are of right shape
343 auto &hx = this->GetState();
344 auto &cx = this->GetCell();
345 // use same for hy and cy
346 auto &hy = this->GetState();
347 auto &cy = this->GetCell();
348
349 auto & rnnDesc = static_cast<RNNDescriptors_t &>(*fDescriptors);
350 auto & rnnWork = static_cast<RNNWorkspace_t &>(*fWorkspace);
351
352 //printf("doing RNNLayer forward - calling cudnn forwsrd\n");
353
354 Architecture_t::RNNForward(x, hx, cx, weights, y, hy, cy, rnnDesc, rnnWork, isTraining);
355
356 if (fReturnSequence) {
357 Architecture_t::Rearrange(this->GetOutput(), y); // swap B and T from y to Output
358 }
359 else {
360 // tmp is a reference to y (full cudnn output)
361 Tensor_t tmp = (y.At(y.GetShape()[0] - 1)).Reshape({y.GetShape()[1], 1, y.GetShape()[2]});
362 Architecture_t::Copy(this->GetOutput(), tmp);
363 }
364 return;
365 }
366
367 // FORWARD for CPU architecture
368 // D : input size
369 // H : state size
370 // T : time size
371 // B : batch size
372
373 Tensor_t arrInput (fTimeSteps, this->GetBatchSize(), this->GetInputWidth() );
374 //for (size_t t = 0; t < fTimeSteps; ++t) arrInput.emplace_back(this->GetBatchSize(), this->GetInputWidth()); // T x B x D
375 Architecture_t::Rearrange(arrInput, input);
376 Tensor_t arrOutput ( fTimeSteps, this->GetBatchSize(), fStateSize);
377 //for (size_t t = 0; t < fTimeSteps;++t) arrOutput.emplace_back(this->GetBatchSize(), fStateSize); // T x B x H
378
379 if (!this->fRememberState) InitState(DNN::EInitialization::kZero);
380
381 for (size_t t = 0; t < fTimeSteps; ++t) {
382 Matrix_t arrInput_m = arrInput.At(t).GetMatrix();
383 Matrix_t df_m = fDerivatives.At(t).GetMatrix();
384 CellForward(arrInput_m, df_m );
385 Matrix_t arrOutput_m = arrOutput.At(t).GetMatrix();
386 Architecture_t::Copy(arrOutput_m, fState);
387 }
388
389 if (fReturnSequence)
390 Architecture_t::Rearrange(this->GetOutput(), arrOutput); // B x T x D
391 else {
392 // get T[end[]]
393
394 Tensor_t tmp = arrOutput.At(fTimeSteps - 1); // take last time step
395 // shape of tmp is for CPU (column wise) B x D , need to reshape to make a B x D x 1
396 // and transpose it to 1 x D x B (this is how output is expected in columnmajor format)
397 tmp = tmp.Reshape({tmp.GetShape()[0], tmp.GetShape()[1], 1});
398 assert(tmp.GetSize() == this->GetOutput().GetSize());
399 assert(tmp.GetShape()[0] == this->GetOutput().GetShape()[2]); // B is last dim in output and first in tmp
400 Architecture_t::Rearrange(this->GetOutput(), tmp);
401 // keep array output
402 fY = arrOutput;
403 }
404}
405
406//______________________________________________________________________________
407template <typename Architecture_t>
409-> void
410{
411 // State = act(W_input . input + W_state . state + bias)
412 const DNN::EActivationFunction fAF = this->GetActivationFunction();
413 Matrix_t tmpState(fState.GetNrows(), fState.GetNcols());
414 Architecture_t::MultiplyTranspose(tmpState, fState, fWeightsState);
415 Architecture_t::MultiplyTranspose(fState, input, fWeightsInput);
416 Architecture_t::ScaleAdd(fState, tmpState);
417 Architecture_t::AddRowWise(fState, fBiases);
418 Tensor_t inputActivFunc(dF);
419 Tensor_t tState(fState);
420
421 // DNN::evaluateDerivative<Architecture_t>(dFt, fAF, fState);
422 // DNN::evaluate<Architecture_t>(tState, fAF);
423
424 Architecture_t::Copy(inputActivFunc, tState);
425 Architecture_t::ActivationFunctionForward(tState, fAF, fActivationDesc);
426
427}
428
429//____________________________________________________________________________
430template <typename Architecture_t>
431auto inline TBasicRNNLayer<Architecture_t>::Backward(Tensor_t &gradients_backward, // B x T x D
432 const Tensor_t &activations_backward) -> void // B x T x D
433 // std::vector<Matrix_t> & /*inp1*/, std::vector<Matrix_t> &
434 // /*inp2*/) -> void
435{
436 //BACKWARD for CUDNN
437 if (Architecture_t::IsCudnn() ) {
438
439 Tensor_t &x = this->fX;
440 Tensor_t &y = this->fY;
441 Tensor_t &dx = this->fDx;
442 Tensor_t &dy = this->fDy;
443
444 // input size is stride[1] of input tensor that is B x T x inputSize
445 assert(activations_backward.GetStrides()[1] == this->GetInputSize() );
446
447 Architecture_t::Rearrange(x, activations_backward);
448
449 if (!fReturnSequence) {
450
451 //Architecture_t::InitializeZero(dy);
452 Architecture_t::InitializeZero(dy);
453
454 //Tensor_t tmp1 = y.At(y.GetShape()[0] - 1).Reshape({y.GetShape()[1], 1, y.GetShape()[2]});
455 Tensor_t tmp2 = dy.At(dy.GetShape()[0] - 1).Reshape({dy.GetShape()[1], 1, dy.GetShape()[2]});
456
457 //Architecture_t::Copy(tmp1, this->GetOutput());
458 Architecture_t::Copy(tmp2, this->GetActivationGradients());
459 }
460 else {
461 Architecture_t::Rearrange(y, this->GetOutput());
462 Architecture_t::Rearrange(dy, this->GetActivationGradients());
463 }
464
465
466
467 // for cudnn Matrix_t and Tensor_t are same type
468 //const auto &weights = this->GetWeightsTensor();
469 auto &weights = this->GetWeightsTensor();
470 auto &weightGradients = this->GetWeightGradientsTensor();
471 // note that cudnnRNNBackwardWeights accumulate the weight gradients.
472 // We need then to initialize the tensor to zero every time
473 Architecture_t::InitializeZero(weightGradients);
474
475
476 // hx is fState
477 auto &hx = this->GetState();
478 auto &cx = this->GetCell();
479 // use same for hy and cy
480 auto &dhy = hx;
481 auto &dcy = cx;
482 auto &dhx = hx;
483 auto &dcx = cx;
484
485
486 auto & rnnDesc = static_cast<RNNDescriptors_t &>(*fDescriptors);
487 auto & rnnWork = static_cast<RNNWorkspace_t &>(*fWorkspace);
488
489 Architecture_t::RNNBackward(x, hx, cx, y, dy, dhy, dcy, weights, dx, dhx, dcx, weightGradients, rnnDesc, rnnWork);
490
491 if (gradients_backward.GetSize() != 0)
492 Architecture_t::Rearrange(gradients_backward, dx);
493
494 return;
495 }
496
497 // BACKWARD FOR CPU
498 // activations backward is input
499 // gradients_backward is activationGradients of layer before it, which is input layer
500 // currently gradient_backward is for input(x) and not for state
501 // TODO use this to change initial state??
502
503
504 bool dummy = false;
505 if (gradients_backward.GetSize() == 0) {
506 dummy = true;
507 }
508 Tensor_t arr_gradients_backward ( fTimeSteps, this->GetBatchSize(), this->GetInputSize());
509 //for (size_t t = 0; t < fTimeSteps; ++t) arr_gradients_backward.emplace_back(this->GetBatchSize(), this->GetInputSize()); // T x B x D
510
511 if (!dummy) {
512 // TODO gradients_backward will be written back on the matrix
513 //Architecture_t::Rearrange(arr_gradients_backward, gradients_backward);
514 }
515 Tensor_t arr_activations_backward ( fTimeSteps, this->GetBatchSize(), this->GetInputSize());
516 //for (size_t t = 0; t < fTimeSteps; ++t) arr_activations_backward.emplace_back(this->GetBatchSize(), this->GetInputSize()); // T x B x D
517 Architecture_t::Rearrange(arr_activations_backward, activations_backward);
518
519 Matrix_t state_gradients_backward(this->GetBatchSize(), fStateSize); // B x H
520 DNN::initialize<Architecture_t>(state_gradients_backward, DNN::EInitialization::kZero);
521
522 Matrix_t initState(this->GetBatchSize(), fStateSize); // B x H
523 DNN::initialize<Architecture_t>(initState, DNN::EInitialization::kZero);
524
525 Tensor_t arr_output ( fTimeSteps, this->GetBatchSize(), fStateSize);
526 Tensor_t arr_actgradients(fTimeSteps, this->GetBatchSize(), fStateSize);
527
528 if (fReturnSequence) {
529 Architecture_t::Rearrange(arr_output, this->GetOutput());
530 Architecture_t::Rearrange(arr_actgradients, this->GetActivationGradients());
531 } else {
532 //
533 arr_output = fY;
534
535 Architecture_t::InitializeZero(arr_actgradients);
536 // need to reshape to pad a time dimension = 1 (note here is columnmajor tensors)
537 Tensor_t tmp_grad = arr_actgradients.At(fTimeSteps - 1).Reshape({this->GetBatchSize(), fStateSize, 1});
538 assert(tmp_grad.GetSize() == this->GetActivationGradients().GetSize());
539 assert(tmp_grad.GetShape()[0] ==
540 this->GetActivationGradients().GetShape()[2]); // B in tmp is [0] and [2] in input act. gradients
541
542 Architecture_t::Rearrange(tmp_grad, this->GetActivationGradients());
543 }
544
545 // reinitialize weights and biases gradients to 0
546 fWeightInputGradients.Zero();
547 fWeightStateGradients.Zero();
548 fBiasGradients.Zero();
549
550 for (size_t t = fTimeSteps; t > 0; t--) {
551 //const Matrix_t & currStateActivations = arr_output[t - 1];
552 Matrix_t actgrad_m = arr_actgradients.At(t - 1).GetMatrix();
553 Architecture_t::ScaleAdd(state_gradients_backward, actgrad_m);
554
555 Matrix_t actbw_m = arr_activations_backward.At(t - 1).GetMatrix();
556 Matrix_t gradbw_m = arr_gradients_backward.At(t - 1).GetMatrix();
557
558 // compute derivatives of activations
559 Tensor_t df = fDerivatives.At(t-1);
560 Tensor_t dy = Tensor_t(state_gradients_backward);
561 //Tensor_t dy = arr_actgradients.At(t - 1);
562 Tensor_t y = arr_output.At(t-1);
563 Architecture_t::ActivationFunctionBackward(df, y,
564 dy, df, //do in place (should work)
565 this->GetActivationFunction(), fActivationDesc);
566
567 Matrix_t df_m = df.GetMatrix();
568
569 // Architecture_t::PrintTensor(df, "dy before");
570 if (t > 1) {
571 Matrix_t precStateActivations = arr_output.At(t - 2).GetMatrix();
572 CellBackward(state_gradients_backward, precStateActivations, actbw_m, gradbw_m, df_m);
573
574 } else {
575 const Matrix_t & precStateActivations = initState;
576 CellBackward(state_gradients_backward, precStateActivations, actbw_m, gradbw_m, df_m);
577
578 }
579 }
580 if (!dummy) {
581 Architecture_t::Rearrange(gradients_backward, arr_gradients_backward );
582 }
583}
584
585//______________________________________________________________________________
586template <typename Architecture_t>
587auto inline TBasicRNNLayer<Architecture_t>::CellBackward(Matrix_t & state_gradients_backward,
588 const Matrix_t & precStateActivations,
589 const Matrix_t & input, Matrix_t & input_gradient, Matrix_t &dF)
590-> Matrix_t &
591{
592 return Architecture_t::RecurrentLayerBackward(state_gradients_backward, fWeightInputGradients, fWeightStateGradients,
593 fBiasGradients, dF, precStateActivations, fWeightsInput,
594 fWeightsState, input, input_gradient);
595}
596
597//______________________________________________________________________________
598template <typename Architecture_t>
600{
601 auto layerxml = gTools().xmlengine().NewChild(parent, nullptr, "RNNLayer");
602
603 // write All other info like stateSize, inputSize, timeSteps,rememberState
604 gTools().xmlengine().NewAttr(layerxml, nullptr, "StateSize", gTools().StringFromInt(this->GetStateSize()));
605 gTools().xmlengine().NewAttr(layerxml, nullptr, "InputSize", gTools().StringFromInt(this->GetInputSize()));
606 gTools().xmlengine().NewAttr(layerxml, nullptr, "TimeSteps", gTools().StringFromInt(this->GetTimeSteps()));
607 gTools().xmlengine().NewAttr(layerxml, nullptr, "RememberState", gTools().StringFromInt(this->DoesRememberState()));
608 gTools().xmlengine().NewAttr(layerxml, nullptr, "ReturnSequence", gTools().StringFromInt(this->DoesReturnSequence()));
609
610 // write weights and bias matrices
611 this->WriteMatrixToXML(layerxml, "InputWeights", this -> GetWeightsAt(0));
612 this->WriteMatrixToXML(layerxml, "StateWeights", this -> GetWeightsAt(1));
613 this->WriteMatrixToXML(layerxml, "Biases", this -> GetBiasesAt(0));
614
615
616}
617
618//______________________________________________________________________________
619template <typename Architecture_t>
621{
622 // Read weights and biases
623 this->ReadMatrixXML(parent,"InputWeights", this -> GetWeightsAt(0));
624 this->ReadMatrixXML(parent,"StateWeights", this -> GetWeightsAt(1));
625 this->ReadMatrixXML(parent,"Biases", this -> GetBiasesAt(0));
626
627}
628
629} // namespace RNN
630} // namespace DNN
631} // namespace TMVA
632
633#endif
#define f(i)
Definition RSha256.hxx:104
Option_t Option_t TPoint TPoint const char GetTextMagnitude GetFillStyle GetLineColor GetLineWidth GetMarkerStyle GetTextAlign GetTextColor GetTextSize void input
char name[80]
Definition TGX11.cxx:110
Tensor_t fDy
cached activation gradient (input of backward) as T x B x S
Definition RNNLayer.h:106
typename Architecture_t::RNNDescriptors_t RNNDescriptors_t
Definition RNNLayer.h:71
DNN::EActivationFunction GetActivationFunction() const
Definition RNNLayer.h:173
void InitState(DNN::EInitialization m=DNN::EInitialization::kZero)
Initialize the state method.
Definition RNNLayer.h:286
const Matrix_t & GetWeightInputGradients() const
Definition RNNLayer.h:193
const Tensor_t & GetWeightGradientsTensor() const
Definition RNNLayer.h:200
void Print() const
Prints the info about the layer.
Definition RNNLayer.h:295
typename Architecture_t::RecurrentDescriptor_t LayerDescriptor_t
Definition RNNLayer.h:65
Tensor_t fY
cached output tensor as T x B x S
Definition RNNLayer.h:104
Tensor_t fDerivatives
First fDerivatives of the activations.
Definition RNNLayer.h:87
const Matrix_t & GetWeightStateGradients() const
Definition RNNLayer.h:195
Matrix_t & fWeightsInput
Input weights, fWeights[0].
Definition RNNLayer.h:83
Matrix_t & fWeightsState
Prev state weights, fWeights[1].
Definition RNNLayer.h:84
virtual ~TBasicRNNLayer()
Destructor.
Definition RNNLayer.h:249
TDescriptors * fDescriptors
Keeps all the RNN descriptors.
Definition RNNLayer.h:97
Tensor_t fX
cached input tensor as T x B x I
Definition RNNLayer.h:103
void Forward(Tensor_t &input, bool isTraining=true)
Compute and return the next state with given input matrix.
Definition RNNLayer.h:322
Matrix_t & fBiases
Biases.
Definition RNNLayer.h:85
Architecture_t::ActivationDescriptor_t fActivationDesc
Definition RNNLayer.h:95
virtual void ReadWeightsFromXML(void *parent)
Read the information and the weights about the layer from XML node.
Definition RNNLayer.h:620
typename Architecture_t::TensorDescriptor_t TensorDescriptor_t
Definition RNNLayer.h:67
bool fReturnSequence
Return in output full sequence or just last element in time.
Definition RNNLayer.h:78
const Tensor_t & GetWeightsTensor() const
Definition RNNLayer.h:198
Matrix_t & GetBiasStateGradients()
Definition RNNLayer.h:190
size_t fStateSize
Hidden state size of RNN.
Definition RNNLayer.h:76
const Matrix_t & GetState() const
Definition RNNLayer.h:175
void Backward(Tensor_t &gradients_backward, const Tensor_t &activations_backward)
Backpropagates the error.
Definition RNNLayer.h:431
const Matrix_t & GetCell() const
Definition RNNLayer.h:177
Matrix_t & CellBackward(Matrix_t &state_gradients_backward, const Matrix_t &precStateActivations, const Matrix_t &input, Matrix_t &input_gradient, Matrix_t &dF)
Backward for a single time unit a the corresponding call to Forward(...).
Definition RNNLayer.h:587
typename Architecture_t::Matrix_t Matrix_t
Definition RNNLayer.h:62
typename Architecture_t::DropoutDescriptor_t HelperDescriptor_t
Definition RNNLayer.h:68
typename Architecture_t::RNNWorkspace_t RNNWorkspace_t
Definition RNNLayer.h:70
Matrix_t fState
Hidden State.
Definition RNNLayer.h:82
Matrix_t & fWeightInputGradients
Gradients w.r.t. the input weights.
Definition RNNLayer.h:88
DNN::EActivationFunction fF
Activation function of the hidden state.
Definition RNNLayer.h:80
TBasicRNNLayer(size_t batchSize, size_t stateSize, size_t inputSize, size_t timeSteps, bool rememberState=false, bool returnSequence=false, DNN::EActivationFunction f=DNN::EActivationFunction::kTanh, bool training=true, DNN::EInitialization fA=DNN::EInitialization::kZero)
Constructor.
Definition RNNLayer.h:213
Tensor_t & GetWeightGradientsTensor()
Definition RNNLayer.h:199
size_t GetTimeSteps() const
Getters.
Definition RNNLayer.h:168
bool fRememberState
Remember state in next pass.
Definition RNNLayer.h:77
Matrix_t & fWeightStateGradients
Gradients w.r.t. the recurring weights.
Definition RNNLayer.h:89
Matrix_t & GetWeightInputGradients()
Definition RNNLayer.h:192
const Matrix_t & GetBiasesState() const
Definition RNNLayer.h:189
void Update(const Scalar_t learningRate)
virtual void AddWeightsXMLTo(void *parent)
Writes the information and the weights about the layer in an XML node.
Definition RNNLayer.h:599
typename Architecture_t::Scalar_t Scalar_t
Definition RNNLayer.h:63
size_t fTimeSteps
Timesteps for RNN.
Definition RNNLayer.h:75
void CellForward(const Matrix_t &input, Matrix_t &dF)
Forward for a single cell (time unit)
Definition RNNLayer.h:408
Tensor_t fDx
cached gradient on the input (output of backward) as T x B x I
Definition RNNLayer.h:105
typename Architecture_t::Tensor_t Tensor_t
Definition RNNLayer.h:61
const Matrix_t & GetBiasStateGradients() const
Definition RNNLayer.h:191
Matrix_t & GetWeightStateGradients()
Definition RNNLayer.h:194
Matrix_t & fBiasGradients
Gradients w.r.t. the bias values.
Definition RNNLayer.h:90
const Matrix_t & GetWeightsInput() const
Definition RNNLayer.h:180
Matrix_t fCell
Empty matrix for RNN.
Definition RNNLayer.h:100
const Tensor_t & GetDerivatives() const
Definition RNNLayer.h:184
virtual void Initialize()
Initialize the weights according to the given initialization method.
Definition RNNLayer.h:264
const Matrix_t & GetWeightsState() const
Definition RNNLayer.h:182
typename Architecture_t::FilterDescriptor_t WeightsDescriptor_t
Definition RNNLayer.h:66
Generic General Layer class.
virtual void Initialize()
Initialize the weights and biases according to the given initialization method.
size_t GetInputWidth() const
TXMLEngine & xmlengine()
Definition Tools.h:262
XMLNodePointer_t NewChild(XMLNodePointer_t parent, XMLNsPointer_t ns, const char *name, const char *content=nullptr)
create new child element for parent node
XMLAttrPointer_t NewAttr(XMLNodePointer_t xmlnode, XMLNsPointer_t, const char *name, const char *value)
creates new attribute for xmlnode, namespaces are not supported for attributes
Double_t y[n]
Definition legend1.C:17
Double_t x[n]
Definition legend1.C:17
auto debugMatrix(const typename Architecture_t::Matrix_t &A, const std::string name="matrix") -> void
Definition RNNLayer.h:306
EActivationFunction
Enum that represents layer activation functions.
Definition Functions.h:32
create variable transformations
Tools & gTools()
TMarker m
Definition textangle.C:8