Logo ROOT  
Reference Guide
 
Loading...
Searching...
No Matches
GeneralLayer.h
Go to the documentation of this file.
1// @(#)root/tmva/tmva/dnn:$Id$
2// Author: Vladimir Ilievski
3
4/**********************************************************************************
5 * Project: TMVA - a Root-integrated toolkit for multivariate data analysis *
6 * Package: TMVA *
7 * Class : TGeneralLayer *
8 * Web : http://tmva.sourceforge.net *
9 * *
10 * Description: *
11 * General Deep Neural Network Layer *
12 * *
13 * Authors (alphabetical): *
14 * Vladimir Ilievski <ilievski.vladimir@live.com> - CERN, Switzerland *
15 * *
16 * Copyright (c) 2005-2015: *
17 * CERN, Switzerland *
18 * U. of Victoria, Canada *
19 * MPI-K Heidelberg, Germany *
20 * U. of Bonn, Germany *
21 * *
22 * Redistribution and use in source and binary forms, with or without *
23 * modification, are permitted according to the terms listed in LICENSE *
24 * (http://tmva.sourceforge.net/LICENSE) *
25 **********************************************************************************/
26
27#ifndef TMVA_DNN_GENERALLAYER
28#define TMVA_DNN_GENERALLAYER
29
30#include <sstream>
31#include <limits>
32#include <vector>
33#include <string>
34
35// for xml
36#include "TMVA/Tools.h"
37#include "TError.h" // for R__ASSERT
38
39#include "TMVA/DNN/Functions.h"
40
41namespace TMVA {
42namespace DNN {
43
44/** \class VGeneralLayer
45 Generic General Layer class.
46
47 This class represents the general class for all layers in the Deep Learning
48 Module.
49 */
50template <typename Architecture_t>
52
53 using Tensor_t = typename Architecture_t::Tensor_t;
54 using Matrix_t = typename Architecture_t::Matrix_t;
55 using Scalar_t = typename Architecture_t::Scalar_t;
56
57
58protected:
59 size_t fBatchSize; ///< Batch size used for training and evaluation
60
61 size_t fInputDepth; ///< The depth of the previous layer or input.
62 size_t fInputHeight; ///< The height of the previous layer or input.
63 size_t fInputWidth; ///< The width of the previous layer or input.
64
65 size_t fDepth; ///< The depth of the layer.
66 size_t fHeight; ///< The height of the layer.
67 size_t fWidth; ///< The width of this layer.
68
69 bool fIsTraining; ///< Flag indicating the mode
70
71 std::vector<Matrix_t> fWeights; ///< The weights associated to the layer.
72 std::vector<Matrix_t> fBiases; ///< The biases associated to the layer.
73
74 std::vector<Matrix_t> fWeightGradients; ///< Gradients w.r.t. the weights of the layer.
75 std::vector<Matrix_t> fBiasGradients; ///< Gradients w.r.t. the bias values of the layer.
76
77 Tensor_t fOutput; ///< Activations of this layer.
78 Tensor_t fActivationGradients; ///< Gradients w.r.t. the activations of this layer.
79
80 EInitialization fInit; ///< The initialization method.
81
82public:
83 /*! Constructor */
84 VGeneralLayer(size_t BatchSize, size_t InputDepth, size_t InputHeight, size_t InputWidth, size_t Depth,
85 size_t Height, size_t Width, size_t WeightsNSlices, size_t WeightsNRows, size_t WeightsNCols,
86 size_t BiasesNSlices, size_t BiasesNRows, size_t BiasesNCols, size_t OutputNSlices, size_t OutputNRows,
87 size_t OutputNCols, EInitialization Init);
88
89 /*! General Constructor with different weights dimension */
90 VGeneralLayer(size_t BatchSize, size_t InputDepth, size_t InputHeight, size_t InputWidth, size_t Depth,
91 size_t Height, size_t Width, size_t WeightsNSlices, std::vector<size_t> WeightsNRows,
92 std::vector<size_t> WeightsNCols, size_t BiasesNSlices, std::vector<size_t> BiasesNRows,
93 std::vector<size_t> BiasesNCols, size_t OutputNSlices, size_t OutputNRows, size_t OutputNCols,
94 EInitialization Init);
95
96 /*! Copy the layer provided as a pointer */
98
99 /*! Copy Constructor */
101
102 /*! Virtual Destructor. */
103 virtual ~VGeneralLayer();
104
105 /*! Initialize the weights and biases according to the given initialization method. */
106 virtual void Initialize();
107
108 /*! Computes activation of the layer for the given input. The input
109 * must be in 3D tensor form with the different matrices corresponding to
110 * different events in the batch. */
111 virtual void Forward(Tensor_t &input, bool applyDropout = false) = 0;
112
113 /*! Backpropagates the error. Must only be called directly at the corresponding
114 * call to Forward(...). */
115 virtual void Backward(Tensor_t &gradients_backward, const Tensor_t &activations_backward ) = 0;
116 ///// std::vector<Matrix_t> &inp1, std::vector<Matrix_t> &inp2) = 0;
117
118 /*! Reset some training flags after a loop on all batches
119 Some layer (e.g. batchnormalization) might need to implement the function in case some operations
120 are needed after looping an all batches */
121 virtual void ResetTraining() {}
122
123 /*! Updates the weights and biases, given the learning rate */
124 void Update(const Scalar_t learningRate);
125
126 /*! Updates the weights, given the gradients and the learning rate, */
127 void UpdateWeights(const std::vector<Matrix_t> &weightGradients, const Scalar_t learningRate);
128
129 /*! Updates the biases, given the gradients and the learning rate. */
130 void UpdateBiases(const std::vector<Matrix_t> &biasGradients, const Scalar_t learningRate);
131
132 /*! Updates the weight gradients, given some other weight gradients and learning rate. */
133 void UpdateWeightGradients(const std::vector<Matrix_t> &weightGradients, const Scalar_t learningRate);
134
135 /*! Updates the bias gradients, given some other weight gradients and learning rate. */
136 void UpdateBiasGradients(const std::vector<Matrix_t> &biasGradients, const Scalar_t learningRate);
137
138 /*! Copies the weights provided as an input. */
139 void CopyWeights(const std::vector<Matrix_t> &otherWeights);
140
141 /*! Copies the biases provided as an input. */
142 void CopyBiases(const std::vector<Matrix_t> &otherBiases);
143
144 /*! Copy all trainable weight and biases from another equivalent layer but with different architecture
145 The function can copy also extra parameters in addition to weights and biases if they are return
146 by the function GetExtraLayerParameters */
147 template <typename Arch>
148 void CopyParameters(const VGeneralLayer<Arch> &layer);
149
150 /*! Prints the info about the layer. */
151 virtual void Print() const = 0;
152
153 /*! Writes the information and the weights about the layer in an XML node. */
154 virtual void AddWeightsXMLTo(void *parent) = 0;
155
156 /*! Read the information and the weights about the layer from XML node. */
157 virtual void ReadWeightsFromXML(void *parent) = 0;
158
159 /*! Set Dropout probability. Reimplemented for layers supporting droput */
161
162 /*! Getters */
163 size_t GetBatchSize() const { return fBatchSize; }
164 size_t GetInputDepth() const { return fInputDepth; }
165 size_t GetInputHeight() const { return fInputHeight; }
166 size_t GetInputWidth() const { return fInputWidth; }
167 size_t GetDepth() const { return fDepth; }
168 size_t GetHeight() const { return fHeight; }
169 size_t GetWidth() const { return fWidth; }
170 bool IsTraining() const { return fIsTraining; }
171
172 const std::vector<Matrix_t> &GetWeights() const { return fWeights; }
173 std::vector<Matrix_t> &GetWeights() { return fWeights; }
174
175 const Matrix_t &GetWeightsAt(size_t i) const { return fWeights[i]; }
176 Matrix_t &GetWeightsAt(size_t i) { return fWeights[i]; }
177
178 const std::vector<Matrix_t> &GetBiases() const { return fBiases; }
179 std::vector<Matrix_t> &GetBiases() { return fBiases; }
180
181 const Matrix_t &GetBiasesAt(size_t i) const { return fBiases[i]; }
182 Matrix_t &GetBiasesAt(size_t i) { return fBiases[i]; }
183
184 const std::vector<Matrix_t> &GetWeightGradients() const { return fWeightGradients; }
185 std::vector<Matrix_t> &GetWeightGradients() { return fWeightGradients; }
186
187 const Matrix_t &GetWeightGradientsAt(size_t i) const { return fWeightGradients[i]; }
189
190 const std::vector<Matrix_t> &GetBiasGradients() const { return fBiasGradients; }
191 std::vector<Matrix_t> &GetBiasGradients() { return fBiasGradients; }
192
193 const Matrix_t &GetBiasGradientsAt(size_t i) const { return fBiasGradients[i]; }
195
196 const Tensor_t &GetOutput() const { return fOutput; }
198
201
202 Matrix_t GetOutputAt(size_t i) { return fOutput.At(i).GetMatrix(); }
203 const Matrix_t &GetOutputAt(size_t i) const { return fOutput.At(i).GetMatrix(); }
204
205 Matrix_t GetActivationGradientsAt(size_t i) { return fActivationGradients.At(i).GetMatrix(); }
206 const Matrix_t &GetActivationGradientsAt(size_t i) const { return fActivationGradients.At(i).GetMatrix(); }
207
208 // function to retrieve additional layer parameters which are learned during training but they are not weights
209 // an example are the mean and std of batch normalization layer
210 virtual std::vector<Matrix_t> GetExtraLayerParameters() const { return std::vector<Matrix_t>(); }
211 // same thing but to set these extra parameters
212 virtual void SetExtraLayerParameters(const std::vector<Matrix_t> & ) {}
213
215
216 /*! Setters */
217 void SetBatchSize(size_t batchSize) { fBatchSize = batchSize; }
218 void SetInputDepth(size_t inputDepth) { fInputDepth = inputDepth; }
219 void SetInputHeight(size_t inputHeight) { fInputHeight = inputHeight; }
220 void SetInputWidth(size_t inputWidth) { fInputWidth = inputWidth; }
221 void SetDepth(size_t depth) { fDepth = depth; }
222 void SetHeight(size_t height) { fHeight = height; }
223 void SetWidth(size_t width) { fWidth = width; }
224 void SetIsTraining(bool isTraining) { fIsTraining = isTraining; }
225
226 /// helper functions for XML
227 void WriteTensorToXML( void * node, const char * name, const std::vector<Matrix_t> & tensor);
228 void WriteMatrixToXML( void * node, const char * name, const Matrix_t & matrix);
229
230 void ReadMatrixXML( void * node, const char * name, Matrix_t & matrix);
231
232};
233
234//
235//
236// The General Layer Class - Implementation
237//_________________________________________________________________________________________________
238template <typename Architecture_t>
239VGeneralLayer<Architecture_t>::VGeneralLayer(size_t batchSize, size_t inputDepth, size_t inputHeight, size_t inputWidth,
240 size_t depth, size_t height, size_t width, size_t weightsNSlices,
241 size_t weightsNRows, size_t weightsNCols, size_t biasesNSlices,
242 size_t biasesNRows, size_t biasesNCols, size_t outputNSlices,
243 size_t outputNRows, size_t outputNCols, EInitialization init)
244 : fBatchSize(batchSize), fInputDepth(inputDepth), fInputHeight(inputHeight), fInputWidth(inputWidth), fDepth(depth),
245 fHeight(height), fWidth(width), fIsTraining(true), fWeights(), fBiases(), fWeightGradients(), fBiasGradients(),
246 fOutput( outputNSlices, outputNRows, outputNCols ),
247 fActivationGradients( outputNSlices, outputNRows, outputNCols ),
248 fInit(init)
249{
250
251 for (size_t i = 0; i < weightsNSlices; i++) {
252 fWeights.emplace_back(weightsNRows, weightsNCols);
253 fWeightGradients.emplace_back(weightsNRows, weightsNCols);
254 }
255
256 for (size_t i = 0; i < biasesNSlices; i++) {
257 fBiases.emplace_back(biasesNRows, biasesNCols);
258 fBiasGradients.emplace_back(biasesNRows, biasesNCols);
259 }
260}
261
262//_________________________________________________________________________________________________
263template <typename Architecture_t>
264VGeneralLayer<Architecture_t>::VGeneralLayer(size_t batchSize, size_t inputDepth, size_t inputHeight, size_t inputWidth,
265 size_t depth, size_t height, size_t width, size_t weightsNSlices,
266 std::vector<size_t> weightsNRows, std::vector<size_t> weightsNCols,
267 size_t biasesNSlices, std::vector<size_t> biasesNRows,
268 std::vector<size_t> biasesNCols, size_t outputNSlices, size_t outputNRows,
269 size_t outputNCols, EInitialization init)
270 : fBatchSize(batchSize), fInputDepth(inputDepth), fInputHeight(inputHeight), fInputWidth(inputWidth), fDepth(depth),
271 fHeight(height), fWidth(width), fIsTraining(true), fWeights(), fBiases(), fWeightGradients(), fBiasGradients(),
272 fOutput( outputNSlices, outputNRows, outputNCols ),
273 fActivationGradients( outputNSlices, outputNRows, outputNCols ),
274 fInit(init)
275{
276 // add constructor for weights with different shapes (e.g. in recurrent layers)
277 for (size_t i = 0; i < weightsNSlices; i++) {
278 fWeights.emplace_back(weightsNRows[i], weightsNCols[i]);
279 fWeightGradients.emplace_back(weightsNRows[i], weightsNCols[i]);
280 }
281
282 for (size_t i = 0; i < biasesNSlices; i++) {
283 fBiases.emplace_back(biasesNRows[i], biasesNCols[i]);
284 fBiasGradients.emplace_back(biasesNRows[i], biasesNCols[i]);
285 }
286
287 // for (size_t i = 0; i < outputNSlices; i++) {
288 // fOutput.emplace_back(outputNRows, outputNCols);
289 // fActivationGradients.emplace_back(outputNRows, outputNCols);
290 // }
291}
292
293//_________________________________________________________________________________________________
294template <typename Architecture_t>
296 : fBatchSize(layer->GetBatchSize()), fInputDepth(layer->GetInputDepth()), fInputHeight(layer->GetInputHeight()),
297 fInputWidth(layer->GetInputWidth()), fDepth(layer->GetDepth()), fHeight(layer->GetHeight()),
298 fWidth(layer->GetWidth()), fIsTraining(layer->IsTraining()), fWeights(), fBiases(), fWeightGradients(),
299 fBiasGradients(),
300 fOutput( layer->GetOutput().GetShape() ), // construct from shape of other tensor
301 fActivationGradients( layer->GetActivationGradients().GetShape() ),
302 fInit(layer->GetInitialization() )
303{
304 // Constructor from another layer pointer of a different architecture
305 size_t weightsNSlices = (layer->GetWeights()).size();
306 size_t weightsNRows = 0;
307 size_t weightsNCols = 0;
308
309 for (size_t i = 0; i < weightsNSlices; i++) {
310 weightsNRows = (layer->GetWeightsAt(i)).GetNrows();
311 weightsNCols = (layer->GetWeightsAt(i)).GetNcols();
312
313 fWeights.emplace_back(weightsNRows, weightsNCols);
314 fWeightGradients.emplace_back(weightsNRows, weightsNCols);
315
316 Architecture_t::Copy(fWeights[i], layer->GetWeightsAt(i));
317 }
318
319 size_t biasesNSlices = (layer->GetBiases()).size();
320 size_t biasesNRows = 0;
321 size_t biasesNCols = 0;
322
323 for (size_t i = 0; i < biasesNSlices; i++) {
324 biasesNRows = (layer->GetBiasesAt(i)).GetNrows();
325 biasesNCols = (layer->GetBiasesAt(i)).GetNcols();
326
327 fBiases.emplace_back(biasesNRows, biasesNCols);
328 fBiasGradients.emplace_back(biasesNRows, biasesNCols);
329
330 Architecture_t::Copy(fBiases[i], layer->GetBiasesAt(i));
331 }
332}
333
334//_________________________________________________________________________________________________
335template <typename Architecture_t>
337 : fBatchSize(layer.fBatchSize), fInputDepth(layer.fInputDepth), fInputHeight(layer.fInputHeight),
338 fInputWidth(layer.fInputWidth), fDepth(layer.fDepth), fHeight(layer.fHeight), fWidth(layer.fWidth),
339 fIsTraining(layer.fIsTraining), fWeights(), fBiases(), fWeightGradients(), fBiasGradients(),
340 fOutput( layer.GetOutput() ),
341 fActivationGradients( layer.GetActivationGradients() ),
342 fInit( layer.GetInitialization())
343{
344 // copy constructor
345 size_t weightsNSlices = layer.fWeights.size();
346 size_t weightsNRows = 0;
347 size_t weightsNCols = 0;
348
349 for (size_t i = 0; i < weightsNSlices; i++) {
350 weightsNRows = (layer.fWeights[i]).GetNrows();
351 weightsNCols = (layer.fWeights[i]).GetNcols();
352
353 fWeights.emplace_back(weightsNRows, weightsNCols);
354 fWeightGradients.emplace_back(weightsNRows, weightsNCols);
355
356 Architecture_t::Copy(fWeights[i], layer.fWeights[i]);
357 }
358
359 size_t biasesNSlices = layer.fBiases.size();
360 size_t biasesNRows = 0;
361 size_t biasesNCols = 0;
362
363 for (size_t i = 0; i < biasesNSlices; i++) {
364 biasesNRows = (layer.fBiases[i]).GetNrows();
365 biasesNCols = (layer.fBiases[i]).GetNcols();
366
367 fBiases.emplace_back(biasesNRows, biasesNCols);
368 fBiasGradients.emplace_back(biasesNRows, biasesNCols);
369
370 Architecture_t::Copy(fBiases[i], layer.fBiases[i]);
371 }
372
373 size_t outputNSlices = layer.fOutput.size();
374 size_t outputNRows = 0;
375 size_t outputNCols = 0;
376
377 for (size_t i = 0; i < outputNSlices; i++) {
378 outputNRows = (layer.fOutput[i]).GetNrows();
379 outputNCols = (layer.fOutput[i]).GetNcols();
380
381 fOutput.emplace_back(outputNRows, outputNCols);
382 fActivationGradients.emplace_back(outputNRows, outputNCols);
383 }
384}
385
386//_________________________________________________________________________________________________
387template <typename Architecture_t>
389{
390 // Nothing to do here.
391}
392
393//_________________________________________________________________________________________________
394template <typename Architecture_t>
396{
397 for (size_t i = 0; i < fWeights.size(); i++) {
398 initialize<Architecture_t>(fWeights[i], this->GetInitialization());
399 initialize<Architecture_t>(fWeightGradients[i], EInitialization::kZero);
400 }
401
402 for (size_t i = 0; i < fBiases.size(); i++) {
403 initialize<Architecture_t>(fBiases[i], EInitialization::kZero);
404 initialize<Architecture_t>(fBiasGradients[i], EInitialization::kZero);
405 }
406}
407
408//_________________________________________________________________________________________________
409template <typename Architecture_t>
410auto VGeneralLayer<Architecture_t>::Update(const Scalar_t learningRate) -> void
411{
412 this->UpdateWeights(fWeightGradients, learningRate);
413 this->UpdateBiases(fBiasGradients, learningRate);
414}
415
416//_________________________________________________________________________________________________
417template <typename Architecture_t>
418auto VGeneralLayer<Architecture_t>::UpdateWeights(const std::vector<Matrix_t> &weightGradients,
419 const Scalar_t learningRate) -> void
420{
421 for (size_t i = 0; i < fWeights.size(); i++) {
422 Architecture_t::ScaleAdd(fWeights[i], weightGradients[i], -learningRate);
423 }
424}
425
426//_________________________________________________________________________________________________
427template <typename Architecture_t>
428auto VGeneralLayer<Architecture_t>::UpdateBiases(const std::vector<Matrix_t> &biasGradients,
429 const Scalar_t learningRate) -> void
430{
431 for (size_t i = 0; i < fBiases.size(); i++) {
432 Architecture_t::ScaleAdd(fBiases[i], biasGradients[i], -learningRate);
433 }
434}
435
436//_________________________________________________________________________________________________
437template <typename Architecture_t>
438auto VGeneralLayer<Architecture_t>::UpdateWeightGradients(const std::vector<Matrix_t> &weightGradients,
439 const Scalar_t learningRate) -> void
440{
441 for (size_t i = 0; i < fWeightGradients.size(); i++) {
442 Architecture_t::ScaleAdd(fWeightGradients[i], weightGradients[i], -learningRate);
443 }
444}
445
446//_________________________________________________________________________________________________
447template <typename Architecture_t>
448auto VGeneralLayer<Architecture_t>::UpdateBiasGradients(const std::vector<Matrix_t> &biasGradients,
449 const Scalar_t learningRate) -> void
450{
451 for (size_t i = 0; i < fBiasGradients.size(); i++) {
452 Architecture_t::ScaleAdd(fBiasGradients[i], biasGradients[i], -learningRate);
453 }
454}
455
456//_________________________________________________________________________________________________
457template <typename Architecture_t>
458auto VGeneralLayer<Architecture_t>::CopyWeights(const std::vector<Matrix_t> &otherWeights) -> void
459{
460
461 for (size_t i = 0; i < fWeights.size(); i++) {
462 Architecture_t::Copy(fWeights[i], otherWeights[i]);
463 }
464}
465
466//_________________________________________________________________________________________________
467template <typename Architecture_t>
468auto VGeneralLayer<Architecture_t>::CopyBiases(const std::vector<Matrix_t> &otherBiases) -> void
469{
470 for (size_t i = 0; i < fBiases.size(); i++) {
471 Architecture_t::Copy(fBiases[i], otherBiases[i]);
472 }
473}
474
475//_________________________________________________________________________________________________
476template <typename Architecture_t>
477template <typename Arch>
479{
480 //assert(!std::is_same<Arch, Architecture_t>::value);
481 // copy weights from a different architecture- default generic implementation
482 Architecture_t::CopyDiffArch(this->GetWeights(), layer.GetWeights());
483 Architecture_t::CopyDiffArch(this->GetBiases(), layer.GetBiases());
484
485 // copy also the additional layer parameters
486 auto params = layer.GetExtraLayerParameters();
487 if (params.size() > 0) {
488 auto paramsToCopy = GetExtraLayerParameters();
489 Architecture_t::CopyDiffArch(paramsToCopy, params );
490 SetExtraLayerParameters(paramsToCopy);
491 }
492}
493
494//_________________________________________________________________________________________________
495template <typename Architecture_t>
496auto VGeneralLayer<Architecture_t>::WriteTensorToXML(void * node, const char * name, const std::vector<Matrix_t> & tensor) -> void
497{
498 auto xmlengine = gTools().xmlengine();
499 void* matnode = xmlengine.NewChild(node, 0, name);
500 if (tensor.size() == 0) return;
501 xmlengine.NewAttr(matnode,0,"Depth", gTools().StringFromInt(tensor.size()) );
502 // assume same number of rows and columns for every matrix in std::vector
503 xmlengine.NewAttr(matnode,0,"Rows", gTools().StringFromInt(tensor[0].GetNrows()) );
504 xmlengine.NewAttr(matnode,0,"Columns", gTools().StringFromInt(tensor[0].GetNcols()) );
505 std::stringstream s;
506 for (size_t i = 0; i < tensor.size(); ++i) {
507 auto & mat = tensor[i];
508 for (Int_t row = 0; row < mat.GetNrows(); row++) {
509 for (Int_t col = 0; col < mat.GetNcols(); col++) {
510 // TString tmp = TString::Format( "%5.15e ", (mat)(row,col) );
511 // s << tmp.Data();
512 s << std::scientific << mat(row, col) << " ";
513 }
514 }
515 }
516 xmlengine.AddRawLine( matnode, s.str().c_str() );
517}
518
519//_________________________________________________________________________________________________
520template <typename Architecture_t>
521auto VGeneralLayer<Architecture_t>::WriteMatrixToXML(void * node, const char * name, const Matrix_t & matrix) -> void
522{
523 auto xmlengine = gTools().xmlengine();
524 void* matnode = xmlengine.NewChild(node, nullptr, name);
525
526 xmlengine.NewAttr(matnode,nullptr,"Rows", gTools().StringFromInt(matrix.GetNrows()) );
527 xmlengine.NewAttr(matnode,nullptr,"Columns", gTools().StringFromInt(matrix.GetNcols()) );
528 std::stringstream s;
529 s.precision( std::numeric_limits<Scalar_t>::digits10 );
530 size_t nrows = matrix.GetNrows();
531 size_t ncols = matrix.GetNcols();
532 for (size_t row = 0; row < nrows; row++) {
533 for (size_t col = 0; col < ncols; col++) {
534 //TString tmp = TString::Format( "%5.15e ", matrix(row,col) );
535 s << std::scientific << matrix(row,col) << " ";
536 }
537 }
538
539 xmlengine.AddRawLine( matnode, s.str().c_str() );
540}
541
542//_________________________________________________________________________________________________
543template <typename Architecture_t>
544auto VGeneralLayer<Architecture_t>::ReadMatrixXML(void * node, const char * name, Matrix_t & matrix) -> void
545{
546 void *matrixXML = gTools().GetChild(node, name);
547 size_t rows, cols;
548 gTools().ReadAttr(matrixXML, "Rows", rows);
549 gTools().ReadAttr(matrixXML, "Columns", cols);
550
551 R__ASSERT((size_t) matrix.GetNrows() == rows);
552 R__ASSERT((size_t) matrix.GetNcols() == cols);
553
554 TMatrixT<Scalar_t> tmatrix(rows, cols);
555
556 const char * matrixString = gTools().xmlengine().GetNodeContent(matrixXML);
557 std::stringstream matrixStringStream(matrixString);
558
559 for (size_t i = 0; i < rows; i++)
560 {
561 for (size_t j = 0; j < cols; j++)
562 {
563#ifndef R__HAS_TMVAGPU
564 matrixStringStream >> tmatrix(i,j);
565#else
567 matrixStringStream >> value;
568 tmatrix(i,j) = value;
569#endif
570
571 }
572 }
573
574 // copy from tmatrix to matrix
575 Matrix_t tmp( tmatrix);
576 Architecture_t::Copy(matrix, tmp);
577
578}
579
580
581template <typename Architecture>
582auto debugTensor(const typename Architecture::Tensor_t & A, const std::string name = "tensor") -> void
583{
584 Architecture::PrintTensor(A,name);
585}
586
587} // namespace DNN
588} // namespace TMVA
589
590#endif
size_t size(const MatrixT &matrix)
retrieve the size of a square matrix
#define R__ASSERT(e)
Definition TError.h:118
Option_t Option_t TPoint TPoint const char GetTextMagnitude GetFillStyle GetLineColor GetLineWidth GetMarkerStyle GetTextAlign GetTextColor GetTextSize void input
Option_t Option_t TPoint TPoint const char GetTextMagnitude GetFillStyle GetLineColor GetLineWidth GetMarkerStyle GetTextAlign GetTextColor GetTextSize void value
Option_t Option_t width
Option_t Option_t TPoint TPoint const char GetTextMagnitude GetFillStyle GetLineColor GetLineWidth GetMarkerStyle GetTextAlign GetTextColor GetTextSize void char Point_t Rectangle_t height
char name[80]
Definition TGX11.cxx:110
Generic General Layer class.
std::vector< Matrix_t > fWeightGradients
Gradients w.r.t. the weights of the layer.
Tensor_t fOutput
Activations of this layer.
const std::vector< Matrix_t > & GetWeightGradients() const
virtual void SetDropoutProbability(Scalar_t)
Set Dropout probability.
void CopyParameters(const VGeneralLayer< Arch > &layer)
Copy all trainable weight and biases from another equivalent layer but with different architecture Th...
const Matrix_t & GetWeightsAt(size_t i) const
void SetHeight(size_t height)
void UpdateWeightGradients(const std::vector< Matrix_t > &weightGradients, const Scalar_t learningRate)
Updates the weight gradients, given some other weight gradients and learning rate.
virtual void Initialize()
Initialize the weights and biases according to the given initialization method.
Matrix_t & GetBiasesAt(size_t i)
void SetInputHeight(size_t inputHeight)
std::vector< Matrix_t > fBiasGradients
Gradients w.r.t. the bias values of the layer.
void SetDepth(size_t depth)
virtual void SetExtraLayerParameters(const std::vector< Matrix_t > &)
virtual void ReadWeightsFromXML(void *parent)=0
Read the information and the weights about the layer from XML node.
void UpdateBiasGradients(const std::vector< Matrix_t > &biasGradients, const Scalar_t learningRate)
Updates the bias gradients, given some other weight gradients and learning rate.
void SetBatchSize(size_t batchSize)
Setters.
void CopyWeights(const std::vector< Matrix_t > &otherWeights)
Copies the weights provided as an input.
size_t fBatchSize
Batch size used for training and evaluation.
virtual void AddWeightsXMLTo(void *parent)=0
Writes the information and the weights about the layer in an XML node.
void UpdateWeights(const std::vector< Matrix_t > &weightGradients, const Scalar_t learningRate)
Updates the weights, given the gradients and the learning rate,.
typename Architecture_t::Matrix_t Matrix_t
const std::vector< Matrix_t > & GetBiasGradients() const
void SetInputDepth(size_t inputDepth)
const std::vector< Matrix_t > & GetWeights() const
std::vector< Matrix_t > & GetWeights()
size_t fWidth
The width of this layer.
EInitialization fInit
The initialization method.
std::vector< Matrix_t > fBiases
The biases associated to the layer.
void SetIsTraining(bool isTraining)
size_t fInputWidth
The width of the previous layer or input.
size_t fHeight
The height of the layer.
virtual void Print() const =0
Prints the info about the layer.
size_t fInputDepth
The depth of the previous layer or input.
void SetWidth(size_t width)
bool fIsTraining
Flag indicating the mode.
const Tensor_t & GetOutput() const
const std::vector< Matrix_t > & GetBiases() const
typename Architecture_t::Scalar_t Scalar_t
std::vector< Matrix_t > & GetBiasGradients()
Tensor_t & GetActivationGradients()
std::vector< Matrix_t > fWeights
The weights associated to the layer.
EInitialization GetInitialization() const
Tensor_t fActivationGradients
Gradients w.r.t. the activations of this layer.
Matrix_t & GetWeightsAt(size_t i)
Matrix_t & GetBiasGradientsAt(size_t i)
size_t GetInputDepth() const
const Matrix_t & GetActivationGradientsAt(size_t i) const
std::vector< Matrix_t > & GetBiases()
virtual std::vector< Matrix_t > GetExtraLayerParameters() const
void WriteMatrixToXML(void *node, const char *name, const Matrix_t &matrix)
Matrix_t GetActivationGradientsAt(size_t i)
std::vector< Matrix_t > & GetWeightGradients()
const Tensor_t & GetActivationGradients() const
size_t fInputHeight
The height of the previous layer or input.
size_t fDepth
The depth of the layer.
virtual void Backward(Tensor_t &gradients_backward, const Tensor_t &activations_backward)=0
Backpropagates the error.
void CopyBiases(const std::vector< Matrix_t > &otherBiases)
Copies the biases provided as an input.
void Update(const Scalar_t learningRate)
Updates the weights and biases, given the learning rate.
const Matrix_t & GetBiasesAt(size_t i) const
virtual void ResetTraining()
Reset some training flags after a loop on all batches Some layer (e.g.
size_t GetInputHeight() const
void SetInputWidth(size_t inputWidth)
const Matrix_t & GetBiasGradientsAt(size_t i) const
void WriteTensorToXML(void *node, const char *name, const std::vector< Matrix_t > &tensor)
helper functions for XML
size_t GetBatchSize() const
Getters.
Matrix_t & GetWeightGradientsAt(size_t i)
void ReadMatrixXML(void *node, const char *name, Matrix_t &matrix)
virtual void Forward(Tensor_t &input, bool applyDropout=false)=0
Computes activation of the layer for the given input.
Matrix_t GetOutputAt(size_t i)
const Matrix_t & GetWeightGradientsAt(size_t i) const
void UpdateBiases(const std::vector< Matrix_t > &biasGradients, const Scalar_t learningRate)
Updates the biases, given the gradients and the learning rate.
typename Architecture_t::Tensor_t Tensor_t
virtual ~VGeneralLayer()
Virtual Destructor.
const Matrix_t & GetOutputAt(size_t i) const
VGeneralLayer(size_t BatchSize, size_t InputDepth, size_t InputHeight, size_t InputWidth, size_t Depth, size_t Height, size_t Width, size_t WeightsNSlices, size_t WeightsNRows, size_t WeightsNCols, size_t BiasesNSlices, size_t BiasesNRows, size_t BiasesNCols, size_t OutputNSlices, size_t OutputNRows, size_t OutputNCols, EInitialization Init)
Constructor.
size_t GetInputWidth() const
TXMLEngine & xmlengine()
Definition Tools.h:262
void ReadAttr(void *node, const char *, T &value)
read attribute from xml
Definition Tools.h:329
void * GetChild(void *parent, const char *childname=nullptr)
get child node
Definition Tools.cxx:1150
TMatrixT.
Definition TMatrixT.h:39
XMLNodePointer_t NewChild(XMLNodePointer_t parent, XMLNsPointer_t ns, const char *name, const char *content=nullptr)
create new child element for parent node
const char * GetNodeContent(XMLNodePointer_t xmlnode)
get contents (if any) of xmlnode
auto debugTensor(const typename Architecture::Tensor_t &A, const std::string name="tensor") -> void
create variable transformations
Tools & gTools()