Logo ROOT  
Reference Guide
DeepNet.h
Go to the documentation of this file.
1// @(#)root/tmva/tmva/dnn:$Id$
2// Author: Vladimir Ilievski
3
4/**********************************************************************************
5 * Project: TMVA - a Root-integrated toolkit for multivariate data analysis *
6 * Package: TMVA *
7 * Class : TDeepNet *
8 * Web : http://tmva.sourceforge.net *
9 * *
10 * Description: *
11 * Deep Neural Network *
12 * *
13 * Authors (alphabetical): *
14 * Akshay Vashistha <akshayvashistha1995@gmail.com> - CERN, Switzerland *
15 * Vladimir Ilievski <ilievski.vladimir@live.com> - CERN, Switzerland *
16 * Saurav Shekhar <sauravshekhar01@gmail.com> - CERN, Switzerland *
17 * *
18 * Copyright (c) 2005-2015: *
19 * CERN, Switzerland *
20 * U. of Victoria, Canada *
21 * MPI-K Heidelberg, Germany *
22 * U. of Bonn, Germany *
23 * *
24 * Redistribution and use in source and binary forms, with or without *
25 * modification, are permitted according to the terms listed in LICENSE *
26 * (http://tmva.sourceforge.net/LICENSE) *
27 **********************************************************************************/
28
29#ifndef TMVA_DNN_DEEPNET
30#define TMVA_DNN_DEEPNET
31
32#include "TMVA/DNN/Functions.h"
34
36#include "TMVA/DNN/DenseLayer.h"
39
42
46
47#ifdef HAVE_DAE
48#include "TMVA/DNN/DAE/CompressionLayer.h"
49#include "TMVA/DNN/DAE/CorruptionLayer.h"
50#include "TMVA/DNN/DAE/ReconstructionLayer.h"
51#include "TMVA/DNN/DAE/LogisticRegressionLayer.h"
52#endif
53
54#include <vector>
55#include <cmath>
56
57
58namespace TMVA {
59namespace DNN {
60
61 using namespace CNN;
62 using namespace RNN;
63
64 //using namespace DAE;
65
66/** \class TDeepNet
67 Generic Deep Neural Network class.
68 This class encapsulates the information for all types of Deep Neural Networks.
69 \tparam Architecture The Architecture type that holds the
70 architecture-specific data types.
71 */
72template <typename Architecture_t, typename Layer_t = VGeneralLayer<Architecture_t>>
73class TDeepNet {
74public:
75
76 using Tensor_t = typename Architecture_t::Tensor_t;
77 using Matrix_t = typename Architecture_t::Matrix_t;
78 using Scalar_t = typename Architecture_t::Scalar_t;
79
80
81private:
82 bool inline isInteger(Scalar_t x) const { return x == floor(x); }
83 size_t calculateDimension(int imgDim, int fltDim, int padding, int stride);
84
85private:
86 std::vector<Layer_t *> fLayers; ///< The layers consisting the DeepNet
87
88 size_t fBatchSize; ///< Batch size used for training and evaluation.
89 size_t fInputDepth; ///< The depth of the input.
90 size_t fInputHeight; ///< The height of the input.
91 size_t fInputWidth; ///< The width of the input.
92
93 size_t fBatchDepth; ///< The depth of the batch used for training/testing.
94 size_t fBatchHeight; ///< The height of the batch used for training/testing.
95 size_t fBatchWidth; ///< The width of the batch used for training/testing.
96
97 bool fIsTraining; ///< Is the network training?
98
99 ELossFunction fJ; ///< The loss function of the network.
100 EInitialization fI; ///< The initialization method of the network.
101 ERegularization fR; ///< The regularization used for the network.
102 Scalar_t fWeightDecay; ///< The weight decay factor.
103
104public:
105 /*! Default Constructor */
107
108 /*! Constructor */
109 TDeepNet(size_t BatchSize, size_t InputDepth, size_t InputHeight, size_t InputWidth, size_t BatchDepth,
110 size_t BatchHeight, size_t BatchWidth, ELossFunction fJ, EInitialization fI = EInitialization::kZero,
111 ERegularization fR = ERegularization::kNone, Scalar_t fWeightDecay = 0.0, bool isTraining = false);
112
113 /*! Copy-constructor */
115
116 /*! Destructor */
118
119 /*! Function for adding Convolution layer in the Deep Neural Network,
120 * with a given depth, filter height and width, striding in rows and columns,
121 * the zero paddings, as well as the activation function and the dropout
122 * probability. Based on these parameters, it calculates the width and height
123 * of the convolutional layer. */
124 TConvLayer<Architecture_t> *AddConvLayer(size_t depth, size_t filterHeight, size_t filterWidth, size_t strideRows,
125 size_t strideCols, size_t paddingHeight, size_t paddingWidth,
126 EActivationFunction f, Scalar_t dropoutProbability = 1.0);
127
128 /*! Function for adding Convolution Layer in the Deep Neural Network,
129 * when the layer is already created. */
131
132 /*! Function for adding Pooling layer in the Deep Neural Network,
133 * with a given filter height and width, striding in rows and columns as
134 * well as the dropout probability. The depth is same as the previous
135 * layer depth. Based on these parameters, it calculates the width and
136 * height of the pooling layer. */
137 TMaxPoolLayer<Architecture_t> *AddMaxPoolLayer(size_t frameHeight, size_t frameWidth, size_t strideRows,
138 size_t strideCols, Scalar_t dropoutProbability = 1.0);
139 /*! Function for adding Max Pooling layer in the Deep Neural Network,
140 * when the layer is already created. */
142
143
144 /*! Function for adding Recurrent Layer in the Deep Neural Network,
145 * with given parameters */
146 TBasicRNNLayer<Architecture_t> *AddBasicRNNLayer(size_t stateSize, size_t inputSize, size_t timeSteps,
147 bool rememberState = false,bool returnSequence = false,
149
150 /*! Function for adding Vanilla RNN when the layer is already created
151 */
153
154 /*! Function for adding LSTM Layer in the Deep Neural Network,
155 * with given parameters */
156 TBasicLSTMLayer<Architecture_t> *AddBasicLSTMLayer(size_t stateSize, size_t inputSize, size_t timeSteps,
157 bool rememberState = false, bool returnSequence = false);
158
159 /*! Function for adding LSTM Layer in the Deep Neural Network,
160 * when the layer is already created. */
162
163 /*! Function for adding GRU Layer in the Deep Neural Network,
164 * with given parameters */
165 TBasicGRULayer<Architecture_t> *AddBasicGRULayer(size_t stateSize, size_t inputSize, size_t timeSteps,
166 bool rememberState = false, bool returnSequence = false,
167 bool resetGateAfter = false);
168
169 /*! Function for adding GRU Layer in the Deep Neural Network,
170 * when the layer is already created. */
172
173 /*! Function for adding Dense Connected Layer in the Deep Neural Network,
174 * with a given width, activation function and dropout probability.
175 * Based on the previous layer dimensions, it calculates the input width
176 * of the fully connected layer. */
178
179 /*! Function for adding Dense Layer in the Deep Neural Network, when
180 * the layer is already created. */
182
183 /*! Function for adding Reshape Layer in the Deep Neural Network, with a given
184 * height and width. It will take every matrix from the previous layer and
185 * reshape it to a matrix with new dimensions. */
186 TReshapeLayer<Architecture_t> *AddReshapeLayer(size_t depth, size_t height, size_t width, bool flattening);
187
188 /*! Function for adding a Batch Normalization layer with given parameters */
190
191 /*! Function for adding Reshape Layer in the Deep Neural Network, when
192 * the layer is already created. */
194
195#ifdef HAVE_DAE /// DAE functions
196 /*! Function for adding Corruption layer in the Deep Neural Network,
197 * with given number of visibleUnits and hiddenUnits. It corrupts input
198 * according to given corruptionLevel and dropoutProbability. */
199 TCorruptionLayer<Architecture_t> *AddCorruptionLayer(size_t visibleUnits, size_t hiddenUnits,
200 Scalar_t dropoutProbability, Scalar_t corruptionLevel);
201
202 /*! Function for adding Corruption Layer in the Deep Neural Network,
203 * when the layer is already created. */
204 void AddCorruptionLayer(TCorruptionLayer<Architecture_t> *corruptionLayer);
205
206 /*! Function for adding Compression layer in the Deep Neural Network,
207 * with given number of visibleUnits and hiddenUnits. It compresses the input units
208 * taking weights and biases from prev layers. */
209 TCompressionLayer<Architecture_t> *AddCompressionLayer(size_t visibleUnits, size_t hiddenUnits,
210 Scalar_t dropoutProbability, EActivationFunction f,
211 std::vector<Matrix_t> weights, std::vector<Matrix_t> biases);
212
213 /*! Function for adding Compression Layer in the Deep Neural Network, when
214 * the layer is already created. */
215 void AddCompressionLayer(TCompressionLayer<Architecture_t> *compressionLayer);
216
217 /*! Function for adding Reconstruction layer in the Deep Neural Network,
218 * with given number of visibleUnits and hiddenUnits. It reconstructs the input units
219 * taking weights and biases from prev layers. Same corruptionLevel and dropoutProbability
220 * must be passed as in corruptionLayer. */
221 TReconstructionLayer<Architecture_t> *AddReconstructionLayer(size_t visibleUnits, size_t hiddenUnits,
222 Scalar_t learningRate, EActivationFunction f,
223 std::vector<Matrix_t> weights,
224 std::vector<Matrix_t> biases, Scalar_t corruptionLevel,
225 Scalar_t dropoutProbability);
226
227 /*! Function for adding Reconstruction Layer in the Deep Neural Network, when
228 * the layer is already created. */
229 void AddReconstructionLayer(TReconstructionLayer<Architecture_t> *reconstructionLayer);
230
231 /*! Function for adding logisticRegressionLayer in the Deep Neural Network,
232 * with given number of inputUnits and outputUnits. It classifies the outputUnits. */
233 TLogisticRegressionLayer<Architecture_t> *AddLogisticRegressionLayer(size_t inputUnits, size_t outputUnits,
234 size_t testDataBatchSize,
235 Scalar_t learningRate);
236
237 /*! Function for adding logisticRegressionLayer in the Deep Neural Network, when
238 * the layer is already created. */
239 void AddLogisticRegressionLayer(TLogisticRegressionLayer<Architecture_t> *logisticRegressionLayer);
240
241 /* To train the Deep AutoEncoder network with required number of Corruption, Compression and Reconstruction
242 * layers. */
243 void PreTrain(std::vector<Matrix_t> &input, std::vector<size_t> numHiddenUnitsPerLayer, Scalar_t learningRate,
244 Scalar_t corruptionLevel, Scalar_t dropoutProbability, size_t epochs, EActivationFunction f,
245 bool applyDropout = false);
246
247 /* To classify outputLabel in Deep AutoEncoder. Should be used after PreTrain if required.
248 * Currently, it used Logistic Regression Layer. Otherwise we can use any other classification layer also.
249 */
250 void FineTune(std::vector<Matrix_t> &input, std::vector<Matrix_t> &testInput, std::vector<Matrix_t> &outputLabel,
251 size_t outputUnits, size_t testDataBatchSize, Scalar_t learningRate, size_t epochs);
252#endif
253
254 /*! Function for initialization of the Neural Net. */
256
257 /*! Function that executes the entire forward pass in the network. */
258 void Forward(Tensor_t &input, bool applyDropout = false);
259
260 /*! Function that reset some training flags after looping all the events but not the weights*/
262
263
264
265 /*! Function that executes the entire backward pass in the network. */
266 void Backward(const Tensor_t &input, const Matrix_t &groundTruth, const Matrix_t &weights);
267
268
269#ifdef USE_PARALLEL_DEEPNET
270 /*! Function for parallel forward in the vector of deep nets, where the master
271 * net is the net calling this function. There is one batch for one deep net.*/
272 void ParallelForward(std::vector<TDeepNet<Architecture_t, Layer_t>> &nets,
273 std::vector<TTensorBatch<Architecture_t>> &batches, bool applyDropout = false);
274
275 /*! Function for parallel backward in the vector of deep nets, where the master
276 * net is the net calling this function and getting the updates from the other nets.
277 * There is one batch for one deep net.*/
278 void ParallelBackward(std::vector<TDeepNet<Architecture_t, Layer_t>> &nets,
279 std::vector<TTensorBatch<Architecture_t>> &batches, Scalar_t learningRate);
280
281 /*! Function for parallel backward in the vector of deep nets, where the master
282 * net is the net calling this function and getting the updates from the other nets,
283 * following the momentum strategy. There is one batch for one deep net.*/
284 void ParallelBackwardMomentum(std::vector<TDeepNet<Architecture_t, Layer_t>> &nets,
285 std::vector<TTensorBatch<Architecture_t>> &batches, Scalar_t learningRate,
286 Scalar_t momentum);
287
288 /*! Function for parallel backward in the vector of deep nets, where the master
289 * net is the net calling this function and getting the updates from the other nets,
290 * following the Nestorov momentum strategy. There is one batch for one deep net.*/
291 void ParallelBackwardNestorov(std::vector<TDeepNet<Architecture_t, Layer_t>> &nets,
292 std::vector<TTensorBatch<Architecture_t>> &batches, Scalar_t learningRate,
293 Scalar_t momentum);
294
295#endif // endif use parallel deepnet
296
297 /*! Function that will update the weights and biases in the layers that
298 * contain weights and biases. */
299 void Update(Scalar_t learningRate);
300
301 /*! Function for evaluating the loss, based on the activations stored
302 * in the last layer. */
303 Scalar_t Loss(const Matrix_t &groundTruth, const Matrix_t &weights, bool includeRegularization = true) const;
304
305 /*! Function for evaluating the loss, based on the propagation of the given input. */
306 Scalar_t Loss(Tensor_t &input, const Matrix_t &groundTruth, const Matrix_t &weights,
307 bool inTraining = false, bool includeRegularization = true);
308
309 /*! Function for computing the regularizaton term to be added to the loss function */
311
312 /*! Prediction based on activations stored in the last layer. */
313 void Prediction(Matrix_t &predictions, EOutputFunction f) const;
314
315 /*! Prediction for the given inputs, based on what network learned. */
317
318 /*! Print the Deep Net Info */
319 void Print() const;
320
321 /*! Get the layer in the vector of layers at position i */
322 inline Layer_t *GetLayerAt(size_t i) { return fLayers[i]; }
323 inline const Layer_t *GetLayerAt(size_t i) const { return fLayers[i]; }
324
325 /* Depth and the output width of the network. */
326 inline size_t GetDepth() const { return fLayers.size(); }
327 inline size_t GetOutputWidth() const { return fLayers.back()->GetWidth(); }
328
329 /* Return a reference to the layers. */
330 inline std::vector<Layer_t *> &GetLayers() { return fLayers; }
331 inline const std::vector<Layer_t *> &GetLayers() const { return fLayers; }
332
333 /*! Remove all layers from the network. */
334 inline void Clear() { fLayers.clear(); }
335
336 /*! Getters */
337 inline size_t GetBatchSize() const { return fBatchSize; }
338 inline size_t GetInputDepth() const { return fInputDepth; }
339 inline size_t GetInputHeight() const { return fInputHeight; }
340 inline size_t GetInputWidth() const { return fInputWidth; }
341
342 inline size_t GetBatchDepth() const { return fBatchDepth; }
343 inline size_t GetBatchHeight() const { return fBatchHeight; }
344 inline size_t GetBatchWidth() const { return fBatchWidth; }
345
346 inline bool IsTraining() const { return fIsTraining; }
347
348 inline ELossFunction GetLossFunction() const { return fJ; }
349 inline EInitialization GetInitialization() const { return fI; }
350 inline ERegularization GetRegularization() const { return fR; }
351 inline Scalar_t GetWeightDecay() const { return fWeightDecay; }
352
353 /*! Setters */
354 // FIXME many of these won't work as the data structure storing activations
355 // and gradients have not changed in all the layers, also params in layers
356 // have not changed either
357 inline void SetBatchSize(size_t batchSize) { fBatchSize = batchSize; }
358 inline void SetInputDepth(size_t inputDepth) { fInputDepth = inputDepth; }
359 inline void SetInputHeight(size_t inputHeight) { fInputHeight = inputHeight; }
360 inline void SetInputWidth(size_t inputWidth) { fInputWidth = inputWidth; }
361 inline void SetBatchDepth(size_t batchDepth) { fBatchDepth = batchDepth; }
362 inline void SetBatchHeight(size_t batchHeight) { fBatchHeight = batchHeight; }
363 inline void SetBatchWidth(size_t batchWidth) { fBatchWidth = batchWidth; }
364 inline void SetLossFunction(ELossFunction J) { fJ = J; }
368
369 void SetDropoutProbabilities(const std::vector<Double_t> & probabilities);
370
371};
372
373//
374// Deep Net Class - Implementation
375//
376//______________________________________________________________________________
377template <typename Architecture_t, typename Layer_t>
379 : fLayers(), fBatchSize(0), fInputDepth(0), fInputHeight(0), fInputWidth(0), fBatchDepth(0), fBatchHeight(0),
380 fBatchWidth(0), fJ(ELossFunction::kMeanSquaredError), fI(EInitialization::kZero), fR(ERegularization::kNone),
381 fIsTraining(true), fWeightDecay(0.0)
382{
383 // Nothing to do here.
384}
385
386//______________________________________________________________________________
387template <typename Architecture_t, typename Layer_t>
388TDeepNet<Architecture_t, Layer_t>::TDeepNet(size_t batchSize, size_t inputDepth, size_t inputHeight, size_t inputWidth,
389 size_t batchDepth, size_t batchHeight, size_t batchWidth, ELossFunction J,
391 : fLayers(), fBatchSize(batchSize), fInputDepth(inputDepth), fInputHeight(inputHeight), fInputWidth(inputWidth),
392 fBatchDepth(batchDepth), fBatchHeight(batchHeight), fBatchWidth(batchWidth), fIsTraining(isTraining), fJ(J), fI(I),
393 fR(R), fWeightDecay(weightDecay)
394{
395 // Nothing to do here.
396}
397
398//______________________________________________________________________________
399template <typename Architecture_t, typename Layer_t>
401 : fLayers(), fBatchSize(deepNet.fBatchSize), fInputDepth(deepNet.fInputDepth), fInputHeight(deepNet.fInputHeight),
402 fInputWidth(deepNet.fInputWidth), fBatchDepth(deepNet.fBatchDepth), fBatchHeight(deepNet.fBatchHeight),
403 fBatchWidth(deepNet.fBatchWidth), fIsTraining(deepNet.fIsTraining), fJ(deepNet.fJ), fI(deepNet.fI), fR(deepNet.fR),
404 fWeightDecay(deepNet.fWeightDecay)
405{
406 // Nothing to do here.
407}
408
409//______________________________________________________________________________
410template <typename Architecture_t, typename Layer_t>
412{
413 // Relese the layers memory
414 for (auto layer : fLayers)
415 delete layer;
416 fLayers.clear();
417}
418
419//______________________________________________________________________________
420template <typename Architecture_t, typename Layer_t>
421auto TDeepNet<Architecture_t, Layer_t>::calculateDimension(int imgDim, int fltDim, int padding, int stride) -> size_t
422{
423 Scalar_t dimension = ((imgDim - fltDim + 2 * padding) / stride) + 1;
424 if (!isInteger(dimension) || dimension <= 0) {
425 this->Print();
426 int iLayer = fLayers.size();
427 Fatal("calculateDimension","Not compatible hyper parameters for layer %d - (imageDim, filterDim, padding, stride) %d , %d , %d , %d",
428 iLayer, imgDim, fltDim, padding, stride);
429 // std::cout << " calculateDimension - Not compatible hyper parameters (imgDim, fltDim, padding, stride)"
430 // << imgDim << " , " << fltDim << " , " << padding << " , " << stride<< " resulting dim is " << dimension << std::endl;
431 // std::exit(EXIT_FAILURE);
432 }
433
434 return (size_t)dimension;
435}
436
437//______________________________________________________________________________
438template <typename Architecture_t, typename Layer_t>
440 size_t filterWidth, size_t strideRows,
441 size_t strideCols, size_t paddingHeight,
442 size_t paddingWidth, EActivationFunction f,
443 Scalar_t dropoutProbability)
444{
445 // All variables defining a convolutional layer
446 size_t batchSize = this->GetBatchSize();
447 size_t inputDepth;
448 size_t inputHeight;
449 size_t inputWidth;
450 EInitialization init = this->GetInitialization();
451 ERegularization reg = this->GetRegularization();
452 Scalar_t decay = this->GetWeightDecay();
453
454 if (fLayers.size() == 0) {
455 inputDepth = this->GetInputDepth();
456 inputHeight = this->GetInputHeight();
457 inputWidth = this->GetInputWidth();
458 } else {
459 Layer_t *lastLayer = fLayers.back();
460 inputDepth = lastLayer->GetDepth();
461 inputHeight = lastLayer->GetHeight();
462 inputWidth = lastLayer->GetWidth();
463 }
464
465
466
467 // Create the conv layer
469 batchSize, inputDepth, inputHeight, inputWidth, depth, init, filterHeight, filterWidth, strideRows,
470 strideCols, paddingHeight, paddingWidth, dropoutProbability, f, reg, decay);
471
472 fLayers.push_back(convLayer);
473 return convLayer;
474}
475
476//______________________________________________________________________________
477template <typename Architecture_t, typename Layer_t>
479{
480 fLayers.push_back(convLayer);
481}
482
483//______________________________________________________________________________
484template <typename Architecture_t, typename Layer_t>
486 size_t strideRows, size_t strideCols,
487 Scalar_t dropoutProbability)
488{
489 size_t batchSize = this->GetBatchSize();
490 size_t inputDepth;
491 size_t inputHeight;
492 size_t inputWidth;
493
494 if (fLayers.size() == 0) {
495 inputDepth = this->GetInputDepth();
496 inputHeight = this->GetInputHeight();
497 inputWidth = this->GetInputWidth();
498 } else {
499 Layer_t *lastLayer = fLayers.back();
500 inputDepth = lastLayer->GetDepth();
501 inputHeight = lastLayer->GetHeight();
502 inputWidth = lastLayer->GetWidth();
503 }
504
506 batchSize, inputDepth, inputHeight, inputWidth, frameHeight, frameWidth,
507 strideRows, strideCols, dropoutProbability);
508
509 // But this creates a copy or what?
510 fLayers.push_back(maxPoolLayer);
511
512 return maxPoolLayer;
513}
514
515//______________________________________________________________________________
516template <typename Architecture_t, typename Layer_t>
518{
519 fLayers.push_back(maxPoolLayer);
520}
521
522//______________________________________________________________________________
523template <typename Architecture_t, typename Layer_t>
525 size_t timeSteps,
526 bool rememberState, bool returnSequence,
528{
529
530 // should check if input and time size are consistent
531
532 //std::cout << "Create RNN " << fLayers.size() << " " << this->GetInputHeight() << " " << this->GetInputWidth() << std::endl;
533 size_t inputHeight, inputWidth, inputDepth;
534 if (fLayers.size() == 0) {
535 inputHeight = this->GetInputHeight();
536 inputWidth = this->GetInputWidth();
537 inputDepth = this->GetInputDepth();
538 } else {
539 Layer_t *lastLayer = fLayers.back();
540 inputHeight = lastLayer->GetHeight();
541 inputWidth = lastLayer->GetWidth();
542 inputDepth = lastLayer->GetDepth();
543 }
544 if (inputSize != inputWidth) {
545 Error("AddBasicRNNLayer","Inconsistent input size with input layout - it should be %zu instead of %zu",inputSize, inputWidth);
546 }
547 if (timeSteps != inputHeight && timeSteps != inputDepth) {
548 Error("AddBasicRNNLayer","Inconsistent time steps with input layout - it should be %zu instead of %zu or %zu",timeSteps, inputHeight,inputDepth);
549 }
550
551 TBasicRNNLayer<Architecture_t> *basicRNNLayer =
552 new TBasicRNNLayer<Architecture_t>(this->GetBatchSize(), stateSize, inputSize, timeSteps, rememberState, returnSequence,
553 f, fIsTraining, this->GetInitialization());
554 fLayers.push_back(basicRNNLayer);
555 return basicRNNLayer;
556}
557
558//______________________________________________________________________________
559template <typename Architecture_t, typename Layer_t>
561{
562 fLayers.push_back(basicRNNLayer);
563}
564
565//______________________________________________________________________________
566template <typename Architecture_t, typename Layer_t>
568 size_t timeSteps, bool rememberState, bool returnSequence)
569{
570 // should check if input and time size are consistent
571 size_t inputHeight, inputWidth, inputDepth;
572 if (fLayers.size() == 0) {
573 inputHeight = this->GetInputHeight();
574 inputWidth = this->GetInputWidth();
575 inputDepth = this->GetInputDepth();
576 } else {
577 Layer_t *lastLayer = fLayers.back();
578 inputHeight = lastLayer->GetHeight();
579 inputWidth = lastLayer->GetWidth();
580 inputDepth = lastLayer->GetDepth();
581 }
582 if (inputSize != inputWidth) {
583 Error("AddBasicLSTMLayer", "Inconsistent input size with input layout - it should be %zu instead of %zu", inputSize, inputWidth);
584 }
585 if (timeSteps != inputHeight && timeSteps != inputDepth) {
586 Error("AddBasicLSTMLayer", "Inconsistent time steps with input layout - it should be %zu instead of %zu", timeSteps, inputHeight);
587 }
588
589 TBasicLSTMLayer<Architecture_t> *basicLSTMLayer =
590 new TBasicLSTMLayer<Architecture_t>(this->GetBatchSize(), stateSize, inputSize, timeSteps, rememberState, returnSequence,
593 fIsTraining, this->GetInitialization());
594 fLayers.push_back(basicLSTMLayer);
595 return basicLSTMLayer;
596}
597
598//______________________________________________________________________________
599template <typename Architecture_t, typename Layer_t>
601{
602 fLayers.push_back(basicLSTMLayer);
603}
604
605
606//______________________________________________________________________________
607template <typename Architecture_t, typename Layer_t>
609 size_t timeSteps, bool rememberState, bool returnSequence, bool resetGateAfter)
610{
611 // should check if input and time size are consistent
612 size_t inputHeight, inputWidth, inputDepth;
613 if (fLayers.size() == 0) {
614 inputHeight = this->GetInputHeight();
615 inputWidth = this->GetInputWidth();
616 inputDepth = this->GetInputDepth();
617 } else {
618 Layer_t *lastLayer = fLayers.back();
619 inputHeight = lastLayer->GetHeight();
620 inputWidth = lastLayer->GetWidth();
621 inputDepth = lastLayer->GetDepth();
622 }
623 if (inputSize != inputWidth) {
624 Error("AddBasicGRULayer", "Inconsistent input size with input layout - it should be %zu instead of %zu", inputSize, inputWidth);
625 }
626 if (timeSteps != inputHeight && timeSteps != inputDepth) {
627 Error("AddBasicGRULayer", "Inconsistent time steps with input layout - it should be %zu instead of %zu", timeSteps, inputHeight);
628 }
629
630 TBasicGRULayer<Architecture_t> *basicGRULayer =
631 new TBasicGRULayer<Architecture_t>(this->GetBatchSize(), stateSize, inputSize, timeSteps, rememberState, returnSequence, resetGateAfter,
634 fIsTraining, this->GetInitialization());
635 fLayers.push_back(basicGRULayer);
636 return basicGRULayer;
637}
638
639//______________________________________________________________________________
640template <typename Architecture_t, typename Layer_t>
642{
643 fLayers.push_back(basicGRULayer);
644}
645
646
647
648//DAE
649#ifdef HAVE_DAE
650
651//______________________________________________________________________________
652template <typename Architecture_t, typename Layer_t>
653TCorruptionLayer<Architecture_t> *TDeepNet<Architecture_t, Layer_t>::AddCorruptionLayer(size_t visibleUnits,
654 size_t hiddenUnits,
655 Scalar_t dropoutProbability,
656 Scalar_t corruptionLevel)
657{
658 size_t batchSize = this->GetBatchSize();
659
660 TCorruptionLayer<Architecture_t> *corruptionLayer =
661 new TCorruptionLayer<Architecture_t>(batchSize, visibleUnits, hiddenUnits, dropoutProbability, corruptionLevel);
662 fLayers.push_back(corruptionLayer);
663 return corruptionLayer;
664}
665//______________________________________________________________________________
666
667template <typename Architecture_t, typename Layer_t>
668void TDeepNet<Architecture_t, Layer_t>::AddCorruptionLayer(TCorruptionLayer<Architecture_t> *corruptionLayer)
669{
670 fLayers.push_back(corruptionLayer);
671}
672
673//______________________________________________________________________________
674template <typename Architecture_t, typename Layer_t>
675TCompressionLayer<Architecture_t> *TDeepNet<Architecture_t, Layer_t>::AddCompressionLayer(
676 size_t visibleUnits, size_t hiddenUnits, Scalar_t dropoutProbability, EActivationFunction f,
677 std::vector<Matrix_t> weights, std::vector<Matrix_t> biases)
678{
679 size_t batchSize = this->GetBatchSize();
680
681 TCompressionLayer<Architecture_t> *compressionLayer = new TCompressionLayer<Architecture_t>(
682 batchSize, visibleUnits, hiddenUnits, dropoutProbability, f, weights, biases);
683 fLayers.push_back(compressionLayer);
684 return compressionLayer;
685}
686//______________________________________________________________________________
687
688template <typename Architecture_t, typename Layer_t>
689void TDeepNet<Architecture_t, Layer_t>::AddCompressionLayer(TCompressionLayer<Architecture_t> *compressionLayer)
690{
691 fLayers.push_back(compressionLayer);
692}
693
694//______________________________________________________________________________
695template <typename Architecture_t, typename Layer_t>
696TReconstructionLayer<Architecture_t> *TDeepNet<Architecture_t, Layer_t>::AddReconstructionLayer(
697 size_t visibleUnits, size_t hiddenUnits, Scalar_t learningRate, EActivationFunction f, std::vector<Matrix_t> weights,
698 std::vector<Matrix_t> biases, Scalar_t corruptionLevel, Scalar_t dropoutProbability)
699{
700 size_t batchSize = this->GetBatchSize();
701
702 TReconstructionLayer<Architecture_t> *reconstructionLayer = new TReconstructionLayer<Architecture_t>(
703 batchSize, visibleUnits, hiddenUnits, learningRate, f, weights, biases, corruptionLevel, dropoutProbability);
704 fLayers.push_back(reconstructionLayer);
705 return reconstructionLayer;
706}
707//______________________________________________________________________________
708
709template <typename Architecture_t, typename Layer_t>
710void TDeepNet<Architecture_t, Layer_t>::AddReconstructionLayer(
711 TReconstructionLayer<Architecture_t> *reconstructionLayer)
712{
713 fLayers.push_back(reconstructionLayer);
714}
715
716//______________________________________________________________________________
717template <typename Architecture_t, typename Layer_t>
718TLogisticRegressionLayer<Architecture_t> *TDeepNet<Architecture_t, Layer_t>::AddLogisticRegressionLayer(
719 size_t inputUnits, size_t outputUnits, size_t testDataBatchSize, Scalar_t learningRate)
720{
721 size_t batchSize = this->GetBatchSize();
722
723 TLogisticRegressionLayer<Architecture_t> *logisticRegressionLayer =
724 new TLogisticRegressionLayer<Architecture_t>(batchSize, inputUnits, outputUnits, testDataBatchSize, learningRate);
725 fLayers.push_back(logisticRegressionLayer);
726 return logisticRegressionLayer;
727}
728//______________________________________________________________________________
729template <typename Architecture_t, typename Layer_t>
730void TDeepNet<Architecture_t, Layer_t>::AddLogisticRegressionLayer(
731 TLogisticRegressionLayer<Architecture_t> *logisticRegressionLayer)
732{
733 fLayers.push_back(logisticRegressionLayer);
734}
735#endif
736
737
738//______________________________________________________________________________
739template <typename Architecture_t, typename Layer_t>
741 Scalar_t dropoutProbability)
742{
743 size_t batchSize = this->GetBatchSize();
744 size_t inputWidth;
745 EInitialization init = this->GetInitialization();
746 ERegularization reg = this->GetRegularization();
747 Scalar_t decay = this->GetWeightDecay();
748
749 if (fLayers.size() == 0) {
750 inputWidth = this->GetInputWidth();
751 } else {
752 Layer_t *lastLayer = fLayers.back();
753 inputWidth = lastLayer->GetWidth();
754 }
755
756 TDenseLayer<Architecture_t> *denseLayer =
757 new TDenseLayer<Architecture_t>(batchSize, inputWidth, width, init, dropoutProbability, f, reg, decay);
758
759 fLayers.push_back(denseLayer);
760
761 return denseLayer;
762}
763
764//______________________________________________________________________________
765template <typename Architecture_t, typename Layer_t>
767{
768 fLayers.push_back(denseLayer);
769}
770
771//______________________________________________________________________________
772template <typename Architecture_t, typename Layer_t>
774 size_t width, bool flattening)
775{
776 size_t batchSize = this->GetBatchSize();
777 size_t inputDepth;
778 size_t inputHeight;
779 size_t inputWidth;
780 size_t outputNSlices;
781 size_t outputNRows;
782 size_t outputNCols;
783
784 if (fLayers.size() == 0) {
785 inputDepth = this->GetInputDepth();
786 inputHeight = this->GetInputHeight();
787 inputWidth = this->GetInputWidth();
788 } else {
789 Layer_t *lastLayer = fLayers.back();
790 inputDepth = lastLayer->GetDepth();
791 inputHeight = lastLayer->GetHeight();
792 inputWidth = lastLayer->GetWidth();
793 }
794
795 if (flattening) {
796 outputNSlices = 1;
797 outputNRows = this->GetBatchSize();
798 outputNCols = depth * height * width;
799 size_t inputNCols = inputDepth * inputHeight * inputWidth;
800 if (outputNCols != 0 && outputNCols != inputNCols ) {
801 Info("AddReshapeLayer","Dimensions not compatibles - product of input %zu x %zu x %zu should be equal to output %zu x %zu x %zu - Force flattening output to be %zu",
802 inputDepth, inputHeight, inputWidth, depth, height, width,inputNCols);
803 }
804 outputNCols = inputNCols;
805 depth = 1;
806 height = 1;
807 width = outputNCols;
808 } else {
809 outputNSlices = this->GetBatchSize();
810 outputNRows = depth;
811 outputNCols = height * width;
812 }
813
814 TReshapeLayer<Architecture_t> *reshapeLayer =
815 new TReshapeLayer<Architecture_t>(batchSize, inputDepth, inputHeight, inputWidth, depth, height, width,
816 outputNSlices, outputNRows, outputNCols, flattening);
817
818 fLayers.push_back(reshapeLayer);
819
820 return reshapeLayer;
821}
822
823//______________________________________________________________________________
824template <typename Architecture_t, typename Layer_t>
826{
827 int axis = -1;
828 size_t batchSize = this->GetBatchSize();
829 size_t inputDepth = 0;
830 size_t inputHeight = 0;
831 size_t inputWidth = 0;
832 // this is the shape of the output tensor (it is columnmajor by default)
833 // and it is normally (depth, hw, bsize) and for dense layers (bsize, w, 1)
834 std::vector<size_t> shape = {1, 1, 1};
835 if (fLayers.size() == 0) {
836 inputDepth = this->GetInputDepth();
837 inputHeight = this->GetInputHeight();
838 inputWidth = this->GetInputWidth();
839 // assume that is like for a dense layer
840 shape[0] = batchSize;
841 shape[1] = inputWidth;
842 shape[2] = 1;
843 } else {
844 Layer_t *lastLayer = fLayers.back();
845 inputDepth = lastLayer->GetDepth();
846 inputHeight = lastLayer->GetHeight();
847 inputWidth = lastLayer->GetWidth();
848 shape = lastLayer->GetOutput().GetShape();
849 if (dynamic_cast<TConvLayer<Architecture_t> *>(lastLayer) != nullptr ||
850 dynamic_cast<TMaxPoolLayer<Architecture_t> *>(lastLayer) != nullptr)
851 axis = 1; // use axis = channel axis for convolutional layer
852 if (shape.size() > 3) {
853 for (size_t i = 3; i < shape.size(); ++i)
854 shape[2] *= shape[i];
855 }
856 }
857 // std::cout << "addBNormLayer " << inputDepth << " , " << inputHeight << " , " << inputWidth << " , " << shape[0]
858 // << " " << shape[1] << " " << shape[2] << std::endl;
859
860 auto bnormLayer =
861 new TBatchNormLayer<Architecture_t>(batchSize, inputDepth, inputHeight, inputWidth, shape, axis, momentum, epsilon);
862
863 fLayers.push_back(bnormLayer);
864
865 return bnormLayer;
866}
867
868//______________________________________________________________________________
869template <typename Architecture_t, typename Layer_t>
871{
872 fLayers.push_back(reshapeLayer);
873}
874
875//______________________________________________________________________________
876template <typename Architecture_t, typename Layer_t>
878{
879 for (size_t i = 0; i < fLayers.size(); i++) {
880 fLayers[i]->Initialize();
881 }
882}
883
884//______________________________________________________________________________
885template <typename Architecture_t, typename Layer_t>
887{
888 for (size_t i = 0; i < fLayers.size(); i++) {
889 fLayers[i]->ResetTraining();
890 }
891}
892
893
894//______________________________________________________________________________
895template <typename Architecture_t, typename Layer_t>
897{
898 fLayers.front()->Forward(input, applyDropout);
899
900 for (size_t i = 1; i < fLayers.size(); i++) {
901 fLayers[i]->Forward(fLayers[i - 1]->GetOutput(), applyDropout);
902 //std::cout << "forward for layer " << i << std::endl;
903 // fLayers[i]->GetOutput()[0].Print();
904 }
905}
906
907
908#ifdef HAVE_DAE
909//_____________________________________________________________________________
910template <typename Architecture_t, typename Layer_t>
911auto TDeepNet<Architecture_t, Layer_t>::PreTrain(std::vector<Matrix_t> &input,
912 std::vector<size_t> numHiddenUnitsPerLayer, Scalar_t learningRate,
913 Scalar_t corruptionLevel, Scalar_t dropoutProbability, size_t epochs,
914 EActivationFunction f, bool applyDropout) -> void
915{
916 std::vector<Matrix_t> inp1;
917 std::vector<Matrix_t> inp2;
918 size_t numOfHiddenLayers = sizeof(numHiddenUnitsPerLayer) / sizeof(numHiddenUnitsPerLayer[0]);
919 // size_t batchSize = this->GetBatchSize();
920 size_t visibleUnits = (size_t)input[0].GetNrows();
921
922 AddCorruptionLayer(visibleUnits, numHiddenUnitsPerLayer[0], dropoutProbability, corruptionLevel);
923 fLayers.back()->Initialize();
924 fLayers.back()->Forward(input, applyDropout);
925 // fLayers.back()->Print();
926
927 AddCompressionLayer(visibleUnits, numHiddenUnitsPerLayer[0], dropoutProbability, f, fLayers.back()->GetWeights(),
928 fLayers.back()->GetBiases());
929 fLayers.back()->Initialize();
930 fLayers.back()->Forward(fLayers[fLayers.size() - 2]->GetOutput(), applyDropout); // as we have to pass corrupt input
931
932 AddReconstructionLayer(visibleUnits, numHiddenUnitsPerLayer[0], learningRate, f, fLayers.back()->GetWeights(),
933 fLayers.back()->GetBiases(), corruptionLevel, dropoutProbability);
934 fLayers.back()->Initialize();
935 fLayers.back()->Forward(fLayers[fLayers.size() - 2]->GetOutput(),
936 applyDropout); // as we have to pass compressed Input
937 fLayers.back()->Backward(fLayers[fLayers.size() - 2]->GetOutput(), inp1, fLayers[fLayers.size() - 3]->GetOutput(),
938 input);
939 // three layers are added, now pointer is on third layer
940 size_t weightsSize = fLayers.back()->GetWeights().size();
941 size_t biasesSize = fLayers.back()->GetBiases().size();
942 for (size_t epoch = 0; epoch < epochs - 1; epoch++) {
943 // fLayers[fLayers.size() - 3]->Forward(input,applyDropout);
944 for (size_t j = 0; j < weightsSize; j++) {
945 Architecture_t::Copy(fLayers[fLayers.size() - 2]->GetWeightsAt(j), fLayers.back()->GetWeightsAt(j));
946 }
947 for (size_t j = 0; j < biasesSize; j++) {
948 Architecture_t::Copy(fLayers[fLayers.size() - 2]->GetBiasesAt(j), fLayers.back()->GetBiasesAt(j));
949 }
950 fLayers[fLayers.size() - 2]->Forward(fLayers[fLayers.size() - 3]->GetOutput(), applyDropout);
951 fLayers[fLayers.size() - 1]->Forward(fLayers[fLayers.size() - 2]->GetOutput(), applyDropout);
952 fLayers[fLayers.size() - 1]->Backward(fLayers[fLayers.size() - 2]->GetOutput(), inp1,
953 fLayers[fLayers.size() - 3]->GetOutput(), input);
954 }
955 fLayers.back()->Print();
956
957 for (size_t i = 1; i < numOfHiddenLayers; i++) {
958
959 AddCorruptionLayer(numHiddenUnitsPerLayer[i - 1], numHiddenUnitsPerLayer[i], dropoutProbability, corruptionLevel);
960 fLayers.back()->Initialize();
961 fLayers.back()->Forward(fLayers[fLayers.size() - 3]->GetOutput(),
962 applyDropout); // as we have to pass compressed Input
963
964 AddCompressionLayer(numHiddenUnitsPerLayer[i - 1], numHiddenUnitsPerLayer[i], dropoutProbability, f,
965 fLayers.back()->GetWeights(), fLayers.back()->GetBiases());
966 fLayers.back()->Initialize();
967 fLayers.back()->Forward(fLayers[fLayers.size() - 2]->GetOutput(), applyDropout);
968
969 AddReconstructionLayer(numHiddenUnitsPerLayer[i - 1], numHiddenUnitsPerLayer[i], learningRate, f,
970 fLayers.back()->GetWeights(), fLayers.back()->GetBiases(), corruptionLevel,
971 dropoutProbability);
972 fLayers.back()->Initialize();
973 fLayers.back()->Forward(fLayers[fLayers.size() - 2]->GetOutput(),
974 applyDropout); // as we have to pass compressed Input
975 fLayers.back()->Backward(fLayers[fLayers.size() - 2]->GetOutput(), inp1, fLayers[fLayers.size() - 3]->GetOutput(),
976 fLayers[fLayers.size() - 5]->GetOutput());
977
978 // three layers are added, now pointer is on third layer
979 size_t _weightsSize = fLayers.back()->GetWeights().size();
980 size_t _biasesSize = fLayers.back()->GetBiases().size();
981 for (size_t epoch = 0; epoch < epochs - 1; epoch++) {
982 // fLayers[fLayers.size() - 3]->Forward(input,applyDropout);
983 for (size_t j = 0; j < _weightsSize; j++) {
984 Architecture_t::Copy(fLayers[fLayers.size() - 2]->GetWeightsAt(j), fLayers.back()->GetWeightsAt(j));
985 }
986 for (size_t j = 0; j < _biasesSize; j++) {
987 Architecture_t::Copy(fLayers[fLayers.size() - 2]->GetBiasesAt(j), fLayers.back()->GetBiasesAt(j));
988 }
989 fLayers[fLayers.size() - 2]->Forward(fLayers[fLayers.size() - 3]->GetOutput(), applyDropout);
990 fLayers[fLayers.size() - 1]->Forward(fLayers[fLayers.size() - 2]->GetOutput(), applyDropout);
991 fLayers[fLayers.size() - 1]->Backward(fLayers[fLayers.size() - 2]->GetOutput(), inp1,
992 fLayers[fLayers.size() - 3]->GetOutput(),
993 fLayers[fLayers.size() - 5]->GetOutput());
994 }
995 fLayers.back()->Print();
996 }
997}
998
999//______________________________________________________________________________
1000template <typename Architecture_t, typename Layer_t>
1001auto TDeepNet<Architecture_t, Layer_t>::FineTune(std::vector<Matrix_t> &input, std::vector<Matrix_t> &testInput,
1002 std::vector<Matrix_t> &inputLabel, size_t outputUnits,
1003 size_t testDataBatchSize, Scalar_t learningRate, size_t epochs) -> void
1004{
1005 std::vector<Matrix_t> inp1;
1006 std::vector<Matrix_t> inp2;
1007 if (fLayers.size() == 0) // only Logistic Regression Layer
1008 {
1009 size_t inputUnits = input[0].GetNrows();
1010
1011 AddLogisticRegressionLayer(inputUnits, outputUnits, testDataBatchSize, learningRate);
1012 fLayers.back()->Initialize();
1013 for (size_t i = 0; i < epochs; i++) {
1014 fLayers.back()->Backward(inputLabel, inp1, input, inp2);
1015 }
1016 fLayers.back()->Forward(input, false);
1017 fLayers.back()->Print();
1018 } else { // if used after any other layer
1019 size_t inputUnits = fLayers.back()->GetOutputAt(0).GetNrows();
1020 AddLogisticRegressionLayer(inputUnits, outputUnits, testDataBatchSize, learningRate);
1021 fLayers.back()->Initialize();
1022 for (size_t i = 0; i < epochs; i++) {
1023 fLayers.back()->Backward(inputLabel, inp1, fLayers[fLayers.size() - 2]->GetOutput(), inp2);
1024 }
1025 fLayers.back()->Forward(testInput, false);
1026 fLayers.back()->Print();
1027 }
1028}
1029#endif
1030
1031//______________________________________________________________________________
1032template <typename Architecture_t, typename Layer_t>
1034 const Matrix_t &weights) -> void
1035{
1036 //Tensor_t inp1;
1037 //Tensor_t inp2;
1038 // Last layer should be dense layer
1039 Matrix_t last_actgrad = fLayers.back()->GetActivationGradientsAt(0);
1040 Matrix_t last_output = fLayers.back()->GetOutputAt(0);
1041 evaluateGradients<Architecture_t>(last_actgrad, this->GetLossFunction(), groundTruth,
1042 last_output, weights);
1043
1044 for (size_t i = fLayers.size() - 1; i > 0; i--) {
1045 auto &activation_gradient_backward = fLayers[i - 1]->GetActivationGradients();
1046 auto &activations_backward = fLayers[i - 1]->GetOutput();
1047 fLayers[i]->Backward(activation_gradient_backward, activations_backward);
1048 }
1049
1050 // need to have a dummy tensor (size=0) to pass for activation gradient backward which
1051 // are not computed for the first layer
1052 Tensor_t dummy;
1053 fLayers[0]->Backward(dummy, input);
1054}
1055
1056#ifdef USE_PARALLEL_DEEPNET
1057
1058//______________________________________________________________________________
1059template <typename Architecture_t, typename Layer_t>
1061 std::vector<TTensorBatch<Architecture_t>> &batches,
1062 bool applyDropout) -> void
1063{
1064 size_t depth = this->GetDepth();
1065
1066 // The first layer of each deep net
1067 for (size_t i = 0; i < nets.size(); i++) {
1068 nets[i].GetLayerAt(0)->Forward(batches[i].GetInput(), applyDropout);
1069 }
1070
1071 // The i'th layer of each deep net
1072 for (size_t i = 1; i < depth; i++) {
1073 for (size_t j = 0; j < nets.size(); j++) {
1074 nets[j].GetLayerAt(i)->Forward(nets[j].GetLayerAt(i - 1)->GetOutput(), applyDropout);
1075 }
1076 }
1077}
1078
1079//______________________________________________________________________________
1080template <typename Architecture_t, typename Layer_t>
1081auto TDeepNet<Architecture_t, Layer_t>::ParallelBackward(std::vector<TDeepNet<Architecture_t, Layer_t>> &nets,
1082 std::vector<TTensorBatch<Architecture_t>> &batches,
1083 Scalar_t learningRate) -> void
1084{
1085 std::vector<Matrix_t> inp1;
1086 std::vector<Matrix_t> inp2;
1087 size_t depth = this->GetDepth();
1088
1089 // Evaluate the gradients of the last layers in each deep net
1090 for (size_t i = 0; i < nets.size(); i++) {
1091 evaluateGradients<Architecture_t>(nets[i].GetLayerAt(depth - 1)->GetActivationGradientsAt(0),
1092 nets[i].GetLossFunction(), batches[i].GetOutput(),
1093 nets[i].GetLayerAt(depth - 1)->GetOutputAt(0), batches[i].GetWeights());
1094 }
1095
1096 // Backpropagate the error in i'th layer of each deep net
1097 for (size_t i = depth - 1; i > 0; i--) {
1098 for (size_t j = 0; j < nets.size(); j++) {
1099 nets[j].GetLayerAt(i)->Backward(nets[j].GetLayerAt(i - 1)->GetActivationGradients(),
1100 nets[j].GetLayerAt(i - 1)->GetOutput(), inp1, inp2);
1101 }
1102 }
1103
1104 std::vector<Matrix_t> dummy;
1105
1106 // First layer of each deep net
1107 for (size_t i = 0; i < nets.size(); i++) {
1108 nets[i].GetLayerAt(0)->Backward(dummy, batches[i].GetInput(), inp1, inp2);
1109 }
1110
1111 // Update and copy
1112 for (size_t i = 0; i < nets.size(); i++) {
1113 for (size_t j = 0; j < depth; j++) {
1114 Layer_t *masterLayer = this->GetLayerAt(j);
1115 Layer_t *layer = nets[i].GetLayerAt(j);
1116
1117 masterLayer->UpdateWeights(layer->GetWeightGradients(), learningRate);
1118 layer->CopyWeights(masterLayer->GetWeights());
1119
1120 masterLayer->UpdateBiases(layer->GetBiasGradients(), learningRate);
1121 layer->CopyBiases(masterLayer->GetBiases());
1122 }
1123 }
1124}
1125
1126//______________________________________________________________________________
1127template <typename Architecture_t, typename Layer_t>
1128auto TDeepNet<Architecture_t, Layer_t>::ParallelBackwardMomentum(std::vector<TDeepNet<Architecture_t, Layer_t>> &nets,
1129 std::vector<TTensorBatch<Architecture_t>> &batches,
1130 Scalar_t learningRate, Scalar_t momentum) -> void
1131{
1132 std::vector<Matrix_t> inp1;
1133 std::vector<Matrix_t> inp2;
1134 size_t depth = this->GetDepth();
1135
1136 // Evaluate the gradients of the last layers in each deep net
1137 for (size_t i = 0; i < nets.size(); i++) {
1138 evaluateGradients<Architecture_t>(nets[i].GetLayerAt(depth - 1)->GetActivationGradientsAt(0),
1139 nets[i].GetLossFunction(), batches[i].GetOutput(),
1140 nets[i].GetLayerAt(depth - 1)->GetOutputAt(0), batches[i].GetWeights());
1141 }
1142
1143 // Backpropagate the error in i'th layer of each deep net
1144 for (size_t i = depth - 1; i > 0; i--) {
1145 Layer_t *masterLayer = this->GetLayerAt(i);
1146
1147 for (size_t j = 0; j < nets.size(); j++) {
1148 Layer_t *layer = nets[j].GetLayerAt(i);
1149
1150 layer->Backward(nets[j].GetLayerAt(i - 1)->GetActivationGradients(), nets[j].GetLayerAt(i - 1)->GetOutput(),
1151 inp1, inp2);
1152 masterLayer->UpdateWeightGradients(layer->GetWeightGradients(), learningRate / momentum);
1153 masterLayer->UpdateBiasGradients(layer->GetBiasGradients(), learningRate / momentum);
1154 }
1155
1156 masterLayer->UpdateWeightGradients(masterLayer->GetWeightGradients(), 1.0 - momentum);
1157 masterLayer->UpdateBiasGradients(masterLayer->GetBiasGradients(), 1.0 - momentum);
1158 }
1159
1160 std::vector<Matrix_t> dummy;
1161
1162 // First layer of each deep net
1163 Layer_t *masterFirstLayer = this->GetLayerAt(0);
1164 for (size_t i = 0; i < nets.size(); i++) {
1165 Layer_t *layer = nets[i].GetLayerAt(0);
1166
1167 layer->Backward(dummy, batches[i].GetInput(), inp1, inp2);
1168
1169 masterFirstLayer->UpdateWeightGradients(layer->GetWeightGradients(), learningRate / momentum);
1170 masterFirstLayer->UpdateBiasGradients(layer->GetBiasGradients(), learningRate / momentum);
1171 }
1172
1173 masterFirstLayer->UpdateWeightGradients(masterFirstLayer->GetWeightGradients(), 1.0 - momentum);
1174 masterFirstLayer->UpdateBiasGradients(masterFirstLayer->GetBiasGradients(), 1.0 - momentum);
1175
1176 for (size_t i = 0; i < depth; i++) {
1177 Layer_t *masterLayer = this->GetLayerAt(i);
1178 masterLayer->Update(1.0);
1179
1180 for (size_t j = 0; j < nets.size(); j++) {
1181 Layer_t *layer = nets[j].GetLayerAt(i);
1182
1183 layer->CopyWeights(masterLayer->GetWeights());
1184 layer->CopyBiases(masterLayer->GetBiases());
1185 }
1186 }
1187}
1188
1189//______________________________________________________________________________
1190template <typename Architecture_t, typename Layer_t>
1191auto TDeepNet<Architecture_t, Layer_t>::ParallelBackwardNestorov(std::vector<TDeepNet<Architecture_t, Layer_t>> &nets,
1192 std::vector<TTensorBatch<Architecture_t>> &batches,
1193 Scalar_t learningRate, Scalar_t momentum) -> void
1194{
1195 std::cout << "Parallel Backward Nestorov" << std::endl;
1196 std::vector<Matrix_t> inp1;
1197 std::vector<Matrix_t> inp2;
1198 size_t depth = this->GetDepth();
1199
1200 // Evaluate the gradients of the last layers in each deep net
1201 for (size_t i = 0; i < nets.size(); i++) {
1202 evaluateGradients<Architecture_t>(nets[i].GetLayerAt(depth - 1)->GetActivationGradientsAt(0),
1203 nets[i].GetLossFunction(), batches[i].GetOutput(),
1204 nets[i].GetLayerAt(depth - 1)->GetOutputAt(0), batches[i].GetWeights());
1205 }
1206
1207 // Backpropagate the error in i'th layer of each deep net
1208 for (size_t i = depth - 1; i > 0; i--) {
1209 for (size_t j = 0; j < nets.size(); j++) {
1210 Layer_t *layer = nets[j].GetLayerAt(i);
1211
1212 layer->Backward(nets[j].GetLayerAt(i - 1)->GetActivationGradients(), nets[j].GetLayerAt(i - 1)->GetOutput(),
1213 inp1, inp2);
1214 }
1215 }
1216
1217 std::vector<Matrix_t> dummy;
1218
1219 // First layer of each deep net
1220 for (size_t i = 0; i < nets.size(); i++) {
1221 Layer_t *layer = nets[i].GetLayerAt(0);
1222 layer->Backward(dummy, batches[i].GetInput(), inp1, inp2);
1223 }
1224
1225 for (size_t i = 0; i < depth; i++) {
1226 Layer_t *masterLayer = this->GetLayerAt(i);
1227 for (size_t j = 0; j < nets.size(); j++) {
1228 Layer_t *layer = nets[j].GetLayerAt(i);
1229
1230 layer->CopyWeights(masterLayer->GetWeights());
1231 layer->CopyBiases(masterLayer->GetBiases());
1232
1233 layer->UpdateWeights(masterLayer->GetWeightGradients(), 1.0);
1234 layer->UpdateBiases(masterLayer->GetBiasGradients(), 1.0);
1235 }
1236
1237 for (size_t j = 0; j < nets.size(); j++) {
1238 Layer_t *layer = nets[j].GetLayerAt(i);
1239
1240 masterLayer->UpdateWeightGradients(layer->GetWeightGradients(), learningRate / momentum);
1241 masterLayer->UpdateBiasGradients(layer->GetBiasGradients(), learningRate / momentum);
1242 }
1243
1244 masterLayer->UpdateWeightGradients(masterLayer->GetWeightGradients(), 1.0 - momentum);
1245 masterLayer->UpdateBiasGradients(masterLayer->GetBiasGradients(), 1.0 - momentum);
1246
1247 masterLayer->Update(1.0);
1248 }
1249}
1250#endif // use parallel deep net
1251
1252//______________________________________________________________________________
1253template <typename Architecture_t, typename Layer_t>
1255{
1256 for (size_t i = 0; i < fLayers.size(); i++) {
1257 fLayers[i]->Update(learningRate);
1258 }
1259}
1260
1261//______________________________________________________________________________
1262template <typename Architecture_t, typename Layer_t>
1263auto TDeepNet<Architecture_t, Layer_t>::Loss(const Matrix_t &groundTruth, const Matrix_t &weights,
1264 bool includeRegularization) const -> Scalar_t
1265{
1266 // Last layer should not be deep
1267 auto loss = evaluate<Architecture_t>(this->GetLossFunction(), groundTruth, fLayers.back()->GetOutputAt(0), weights);
1268
1269 includeRegularization &= (this->GetRegularization() != ERegularization::kNone);
1270 if (includeRegularization) {
1271 loss += RegularizationTerm();
1272 }
1273
1274 return loss;
1275}
1276
1277//______________________________________________________________________________
1278template <typename Architecture_t, typename Layer_t>
1280 const Matrix_t &weights, bool inTraining, bool includeRegularization)
1281 -> Scalar_t
1282{
1283 Forward(input, inTraining);
1284 return Loss(groundTruth, weights, includeRegularization);
1285}
1286
1287//______________________________________________________________________________
1288template <typename Architecture_t, typename Layer_t>
1290{
1291 Scalar_t reg = 0.0;
1292 for (size_t i = 0; i < fLayers.size(); i++) {
1293 for (size_t j = 0; j < (fLayers[i]->GetWeights()).size(); j++) {
1294 reg += regularization<Architecture_t>(fLayers[i]->GetWeightsAt(j), this->GetRegularization());
1295 }
1296 }
1297 return this->GetWeightDecay() * reg;
1298}
1299
1300
1301//______________________________________________________________________________
1302template <typename Architecture_t, typename Layer_t>
1304{
1305 // Last layer should not be deep (assume output is a matrix)
1306 evaluate<Architecture_t>(predictions, f, fLayers.back()->GetOutputAt(0));
1307}
1308
1309//______________________________________________________________________________
1310template <typename Architecture_t, typename Layer_t>
1312 EOutputFunction f) -> void
1313{
1314 Forward(input, false);
1315 // Last layer should not be deep
1316 evaluate<Architecture_t>(predictions, f, fLayers.back()->GetOutputAt(0));
1317}
1318
1319//______________________________________________________________________________
1320template <typename Architecture_t, typename Layer_t>
1322{
1323 std::cout << "DEEP NEURAL NETWORK: Depth = " << this->GetDepth();
1324 std::cout << " Input = ( " << this->GetInputDepth();
1325 std::cout << ", " << this->GetInputHeight();
1326 std::cout << ", " << this->GetInputWidth() << " )";
1327 std::cout << " Batch size = " << this->GetBatchSize();
1328 std::cout << " Loss function = " << static_cast<char>(this->GetLossFunction()) << std::endl;
1329
1330 //std::cout << "\t Layers: " << std::endl;
1331
1332 for (size_t i = 0; i < fLayers.size(); i++) {
1333 std::cout << "\tLayer " << i << "\t";
1334 fLayers[i]->Print();
1335 }
1336}
1337
1338//______________________________________________________________________________
1339template <typename Architecture_t, typename Layer_t>
1341 const std::vector<Double_t> & probabilities)
1342{
1343 for (size_t i = 0; i < fLayers.size(); i++) {
1344 if (i < probabilities.size()) {
1345 fLayers[i]->SetDropoutProbability(probabilities[i]);
1346 } else {
1347 fLayers[i]->SetDropoutProbability(1.0);
1348 }
1349 }
1350}
1351
1352
1353} // namespace DNN
1354} // namespace TMVA
1355
1356#endif
#define f(i)
Definition: RSha256.hxx:104
#define R(a, b, c, d, e, f, g, h, i)
Definition: RSha256.hxx:110
void Info(const char *location, const char *msgfmt,...)
Use this function for informational messages.
Definition: TError.cxx:220
void Error(const char *location, const char *msgfmt,...)
Use this function in case an error occurred.
Definition: TError.cxx:187
void Fatal(const char *location, const char *msgfmt,...)
Use this function in case of a fatal error. It will abort the program.
Definition: TError.cxx:245
Option_t Option_t TPoint TPoint const char GetTextMagnitude GetFillStyle GetLineColor GetLineWidth GetMarkerStyle GetTextAlign GetTextColor GetTextSize void input
Option_t Option_t width
Option_t Option_t TPoint TPoint const char GetTextMagnitude GetFillStyle GetLineColor GetLineWidth GetMarkerStyle GetTextAlign GetTextColor GetTextSize void reg
Option_t Option_t TPoint TPoint const char GetTextMagnitude GetFillStyle GetLineColor GetLineWidth GetMarkerStyle GetTextAlign GetTextColor GetTextSize void char Point_t Rectangle_t height
Generic Max Pooling Layer class.
Definition: MaxPoolLayer.h:59
Layer implementing Batch Normalization.
Generic Deep Neural Network class.
Definition: DeepNet.h:73
const std::vector< Layer_t * > & GetLayers() const
Definition: DeepNet.h:331
void AddDenseLayer(TDenseLayer< Architecture_t > *denseLayer)
Function for adding Dense Layer in the Deep Neural Network, when the layer is already created.
Definition: DeepNet.h:766
size_t GetBatchHeight() const
Definition: DeepNet.h:343
void SetBatchDepth(size_t batchDepth)
Definition: DeepNet.h:361
void Forward(Tensor_t &input, bool applyDropout=false)
Function that executes the entire forward pass in the network.
Definition: DeepNet.h:896
void SetLossFunction(ELossFunction J)
Definition: DeepNet.h:364
size_t fBatchHeight
The height of the batch used for training/testing.
Definition: DeepNet.h:94
ERegularization GetRegularization() const
Definition: DeepNet.h:350
void AddBasicGRULayer(TBasicGRULayer< Architecture_t > *basicGRULayer)
Function for adding GRU Layer in the Deep Neural Network, when the layer is already created.
Definition: DeepNet.h:641
std::vector< Layer_t * > & GetLayers()
Definition: DeepNet.h:330
typename Architecture_t::Scalar_t Scalar_t
Definition: DeepNet.h:78
void Initialize()
DAE functions.
Definition: DeepNet.h:877
size_t GetBatchSize() const
Getters.
Definition: DeepNet.h:337
size_t GetDepth() const
Definition: DeepNet.h:326
Scalar_t GetWeightDecay() const
Definition: DeepNet.h:351
size_t GetInputDepth() const
Definition: DeepNet.h:338
TBatchNormLayer< Architecture_t > * AddBatchNormLayer(Scalar_t momentum=-1, Scalar_t epsilon=0.0001)
Function for adding a Batch Normalization layer with given parameters.
Definition: DeepNet.h:825
void Backward(const Tensor_t &input, const Matrix_t &groundTruth, const Matrix_t &weights)
Function that executes the entire backward pass in the network.
Definition: DeepNet.h:1033
std::vector< Layer_t * > fLayers
The layers consisting the DeepNet.
Definition: DeepNet.h:86
size_t fBatchDepth
The depth of the batch used for training/testing.
Definition: DeepNet.h:93
size_t fInputDepth
The depth of the input.
Definition: DeepNet.h:89
Layer_t * GetLayerAt(size_t i)
Get the layer in the vector of layers at position i.
Definition: DeepNet.h:322
void Print() const
Print the Deep Net Info.
Definition: DeepNet.h:1321
TBasicGRULayer< Architecture_t > * AddBasicGRULayer(size_t stateSize, size_t inputSize, size_t timeSteps, bool rememberState=false, bool returnSequence=false, bool resetGateAfter=false)
Function for adding GRU Layer in the Deep Neural Network, with given parameters.
Definition: DeepNet.h:608
void SetWeightDecay(Scalar_t weightDecay)
Definition: DeepNet.h:367
void AddReshapeLayer(TReshapeLayer< Architecture_t > *reshapeLayer)
Function for adding Reshape Layer in the Deep Neural Network, when the layer is already created.
Definition: DeepNet.h:870
void Clear()
Remove all layers from the network.
Definition: DeepNet.h:334
Scalar_t RegularizationTerm() const
Function for computing the regularizaton term to be added to the loss function
Definition: DeepNet.h:1289
TDenseLayer< Architecture_t > * AddDenseLayer(size_t width, EActivationFunction f, Scalar_t dropoutProbability=1.0)
Function for adding Dense Connected Layer in the Deep Neural Network, with a given width,...
Definition: DeepNet.h:740
TDeepNet(size_t BatchSize, size_t InputDepth, size_t InputHeight, size_t InputWidth, size_t BatchDepth, size_t BatchHeight, size_t BatchWidth, ELossFunction fJ, EInitialization fI=EInitialization::kZero, ERegularization fR=ERegularization::kNone, Scalar_t fWeightDecay=0.0, bool isTraining=false)
Constructor.
Definition: DeepNet.h:388
void Prediction(Matrix_t &predictions, Tensor_t &input, EOutputFunction f)
Prediction for the given inputs, based on what network learned.
Definition: DeepNet.h:1311
void SetInputDepth(size_t inputDepth)
Definition: DeepNet.h:358
bool IsTraining() const
Definition: DeepNet.h:346
size_t GetInputHeight() const
Definition: DeepNet.h:339
size_t fBatchSize
Batch size used for training and evaluation.
Definition: DeepNet.h:88
void Prediction(Matrix_t &predictions, EOutputFunction f) const
Prediction based on activations stored in the last layer.
Definition: DeepNet.h:1303
size_t fInputWidth
The width of the input.
Definition: DeepNet.h:91
void SetInputHeight(size_t inputHeight)
Definition: DeepNet.h:359
size_t GetBatchWidth() const
Definition: DeepNet.h:344
void AddBasicRNNLayer(TBasicRNNLayer< Architecture_t > *basicRNNLayer)
Function for adding Vanilla RNN when the layer is already created.
Definition: DeepNet.h:560
TBasicLSTMLayer< Architecture_t > * AddBasicLSTMLayer(size_t stateSize, size_t inputSize, size_t timeSteps, bool rememberState=false, bool returnSequence=false)
Function for adding LSTM Layer in the Deep Neural Network, with given parameters.
Definition: DeepNet.h:567
void AddMaxPoolLayer(CNN::TMaxPoolLayer< Architecture_t > *maxPoolLayer)
Function for adding Max Pooling layer in the Deep Neural Network, when the layer is already created.
Definition: DeepNet.h:517
TMaxPoolLayer< Architecture_t > * AddMaxPoolLayer(size_t frameHeight, size_t frameWidth, size_t strideRows, size_t strideCols, Scalar_t dropoutProbability=1.0)
Function for adding Pooling layer in the Deep Neural Network, with a given filter height and width,...
Definition: DeepNet.h:485
Scalar_t fWeightDecay
The weight decay factor.
Definition: DeepNet.h:102
Scalar_t Loss(const Matrix_t &groundTruth, const Matrix_t &weights, bool includeRegularization=true) const
Function for evaluating the loss, based on the activations stored in the last layer.
Definition: DeepNet.h:1263
TConvLayer< Architecture_t > * AddConvLayer(size_t depth, size_t filterHeight, size_t filterWidth, size_t strideRows, size_t strideCols, size_t paddingHeight, size_t paddingWidth, EActivationFunction f, Scalar_t dropoutProbability=1.0)
Function for adding Convolution layer in the Deep Neural Network, with a given depth,...
Definition: DeepNet.h:439
ERegularization fR
The regularization used for the network.
Definition: DeepNet.h:101
void ResetTraining()
Function that reset some training flags after looping all the events but not the weights.
Definition: DeepNet.h:886
size_t GetInputWidth() const
Definition: DeepNet.h:340
bool isInteger(Scalar_t x) const
Definition: DeepNet.h:82
size_t GetOutputWidth() const
Definition: DeepNet.h:327
bool fIsTraining
Is the network training?
Definition: DeepNet.h:97
TReshapeLayer< Architecture_t > * AddReshapeLayer(size_t depth, size_t height, size_t width, bool flattening)
Function for adding Reshape Layer in the Deep Neural Network, with a given height and width.
Definition: DeepNet.h:773
void SetBatchSize(size_t batchSize)
Setters.
Definition: DeepNet.h:357
void AddConvLayer(TConvLayer< Architecture_t > *convLayer)
Function for adding Convolution Layer in the Deep Neural Network, when the layer is already created.
Definition: DeepNet.h:478
size_t fInputHeight
The height of the input.
Definition: DeepNet.h:90
void SetRegularization(ERegularization R)
Definition: DeepNet.h:366
TDeepNet(const TDeepNet &)
Copy-constructor.
Definition: DeepNet.h:400
size_t fBatchWidth
The width of the batch used for training/testing.
Definition: DeepNet.h:95
typename Architecture_t::Tensor_t Tensor_t
Definition: DeepNet.h:76
ELossFunction fJ
The loss function of the network.
Definition: DeepNet.h:99
TBasicRNNLayer< Architecture_t > * AddBasicRNNLayer(size_t stateSize, size_t inputSize, size_t timeSteps, bool rememberState=false, bool returnSequence=false, EActivationFunction f=EActivationFunction::kTanh)
Function for adding Recurrent Layer in the Deep Neural Network, with given parameters.
Definition: DeepNet.h:524
~TDeepNet()
Destructor.
Definition: DeepNet.h:411
void SetBatchWidth(size_t batchWidth)
Definition: DeepNet.h:363
void SetDropoutProbabilities(const std::vector< Double_t > &probabilities)
Definition: DeepNet.h:1340
TDeepNet()
Default Constructor.
Definition: DeepNet.h:378
void SetBatchHeight(size_t batchHeight)
Definition: DeepNet.h:362
void Update(Scalar_t learningRate)
Function that will update the weights and biases in the layers that contain weights and biases.
Definition: DeepNet.h:1254
ELossFunction GetLossFunction() const
Definition: DeepNet.h:348
size_t calculateDimension(int imgDim, int fltDim, int padding, int stride)
Definition: DeepNet.h:421
const Layer_t * GetLayerAt(size_t i) const
Definition: DeepNet.h:323
void SetInitialization(EInitialization I)
Definition: DeepNet.h:365
EInitialization GetInitialization() const
Definition: DeepNet.h:349
void SetInputWidth(size_t inputWidth)
Definition: DeepNet.h:360
typename Architecture_t::Matrix_t Matrix_t
Definition: DeepNet.h:77
void AddBasicLSTMLayer(TBasicLSTMLayer< Architecture_t > *basicLSTMLayer)
Function for adding LSTM Layer in the Deep Neural Network, when the layer is already created.
Definition: DeepNet.h:600
Scalar_t Loss(Tensor_t &input, const Matrix_t &groundTruth, const Matrix_t &weights, bool inTraining=false, bool includeRegularization=true)
Function for evaluating the loss, based on the propagation of the given input.
Definition: DeepNet.h:1279
EInitialization fI
The initialization method of the network.
Definition: DeepNet.h:100
size_t GetBatchDepth() const
Definition: DeepNet.h:342
Generic layer class.
Definition: DenseLayer.h:59
RVec< PromoteType< T > > floor(const RVec< T > &v)
Definition: RVec.hxx:1773
Double_t x[n]
Definition: legend1.C:17
#define I(x, y, z)
void Copy(void *source, void *dest)
void Print(std::ostream &os, const OptionType &opt)
void init()
Inspect hardware capabilities, and load the optimal library for RooFit computations.
EInitialization
Definition: Functions.h:72
EOutputFunction
Enum that represents output functions.
Definition: Functions.h:46
double weightDecay(double error, ItWeight itWeight, ItWeight itWeightEnd, double factorWeightDecay, EnumRegularization eRegularization)
compute the weight decay for regularization (L1 or L2)
Definition: NeuralNet.icc:498
ERegularization
Enum representing the regularization type applied for a given layer.
Definition: Functions.h:65
EActivationFunction
Enum that represents layer activation functions.
Definition: Functions.h:32
ELossFunction
Enum that represents objective functions for the net, i.e.
Definition: Functions.h:57
create variable transformations
double epsilon
Definition: triangle.c:618