Logo ROOT  
Reference Guide
DeepNet.h
Go to the documentation of this file.
1// @(#)root/tmva/tmva/dnn:$Id$
2// Author: Vladimir Ilievski
3
4/**********************************************************************************
5 * Project: TMVA - a Root-integrated toolkit for multivariate data analysis *
6 * Package: TMVA *
7 * Class : TDeepNet *
8 * Web : http://tmva.sourceforge.net *
9 * *
10 * Description: *
11 * Deep Neural Network *
12 * *
13 * Authors (alphabetical): *
14 * Akshay Vashistha <akshayvashistha1995@gmail.com> - CERN, Switzerland *
15 * Vladimir Ilievski <ilievski.vladimir@live.com> - CERN, Switzerland *
16 * Saurav Shekhar <sauravshekhar01@gmail.com> - CERN, Switzerland *
17 * *
18 * Copyright (c) 2005-2015: *
19 * CERN, Switzerland *
20 * U. of Victoria, Canada *
21 * MPI-K Heidelberg, Germany *
22 * U. of Bonn, Germany *
23 * *
24 * Redistribution and use in source and binary forms, with or without *
25 * modification, are permitted according to the terms listed in LICENSE *
26 * (http://tmva.sourceforge.net/LICENSE) *
27 **********************************************************************************/
28
29#ifndef TMVA_DNN_DEEPNET
30#define TMVA_DNN_DEEPNET
31
32#include "TString.h"
33
34#include "TMVA/DNN/Functions.h"
36
38#include "TMVA/DNN/DenseLayer.h"
41
44
46
47#ifdef HAVE_DAE
48#include "TMVA/DNN/DAE/CompressionLayer.h"
49#include "TMVA/DNN/DAE/CorruptionLayer.h"
50#include "TMVA/DNN/DAE/ReconstructionLayer.h"
51#include "TMVA/DNN/DAE/LogisticRegressionLayer.h"
52#endif
53
54#include <vector>
55#include <cmath>
56
57
58namespace TMVA {
59namespace DNN {
60
61 using namespace CNN;
62 using namespace RNN;
63 //using namespace DAE;
64
65/** \class TDeepNet
66 Generic Deep Neural Network class.
67 This classs encapsulates the information for all types of Deep Neural Networks.
68 \tparam Architecture The Architecture type that holds the
69 architecture-specific data types.
70 */
71template <typename Architecture_t, typename Layer_t = VGeneralLayer<Architecture_t>>
72class TDeepNet {
73public:
74
75 using Tensor_t = typename Architecture_t::Tensor_t;
76 using Matrix_t = typename Architecture_t::Matrix_t;
77 using Scalar_t = typename Architecture_t::Scalar_t;
78
79
80private:
81 bool inline isInteger(Scalar_t x) const { return x == floor(x); }
82 size_t calculateDimension(int imgDim, int fltDim, int padding, int stride);
83
84private:
85 std::vector<Layer_t *> fLayers; ///< The layers consisting the DeepNet
86
87 size_t fBatchSize; ///< Batch size used for training and evaluation.
88 size_t fInputDepth; ///< The depth of the input.
89 size_t fInputHeight; ///< The height of the input.
90 size_t fInputWidth; ///< The width of the input.
91
92 size_t fBatchDepth; ///< The depth of the batch used for training/testing.
93 size_t fBatchHeight; ///< The height of the batch used for training/testing.
94 size_t fBatchWidth; ///< The width of the batch used for training/testing.
95
96 bool fIsTraining; ///< Is the network training?
97
98 ELossFunction fJ; ///< The loss function of the network.
99 EInitialization fI; ///< The initialization method of the network.
100 ERegularization fR; ///< The regularization used for the network.
101 Scalar_t fWeightDecay; ///< The weight decay factor.
102
103public:
104 /*! Default Constructor */
106
107 /*! Constructor */
108 TDeepNet(size_t BatchSize, size_t InputDepth, size_t InputHeight, size_t InputWidth, size_t BatchDepth,
109 size_t BatchHeight, size_t BatchWidth, ELossFunction fJ, EInitialization fI = EInitialization::kZero,
110 ERegularization fR = ERegularization::kNone, Scalar_t fWeightDecay = 0.0, bool isTraining = false);
111
112 /*! Copy-constructor */
114
115 /*! Destructor */
117
118 /*! Function for adding Convolution layer in the Deep Neural Network,
119 * with a given depth, filter height and width, striding in rows and columns,
120 * the zero paddings, as well as the activation function and the dropout
121 * probability. Based on these parameters, it calculates the width and height
122 * of the convolutional layer. */
123 TConvLayer<Architecture_t> *AddConvLayer(size_t depth, size_t filterHeight, size_t filterWidth, size_t strideRows,
124 size_t strideCols, size_t paddingHeight, size_t paddingWidth,
125 EActivationFunction f, Scalar_t dropoutProbability = 1.0);
126
127 /*! Function for adding Convolution Layer in the Deep Neural Network,
128 * when the layer is already created. */
130
131 /*! Function for adding Pooling layer in the Deep Neural Network,
132 * with a given filter height and width, striding in rows and columns as
133 * well as the dropout probability. The depth is same as the previous
134 * layer depth. Based on these parameters, it calculates the width and
135 * height of the pooling layer. */
136 TMaxPoolLayer<Architecture_t> *AddMaxPoolLayer(size_t frameHeight, size_t frameWidth, size_t strideRows,
137 size_t strideCols, Scalar_t dropoutProbability = 1.0);
138 /*! Function for adding Max Pooling layer in the Deep Neural Network,
139 * when the layer is already created. */
141
142
143 /*! Function for adding Recurrent Layer in the Deep Neural Network,
144 * with given parameters */
145 TBasicRNNLayer<Architecture_t> *AddBasicRNNLayer(size_t stateSize, size_t inputSize, size_t timeSteps,
146 bool rememberState = false,EActivationFunction f = EActivationFunction::kTanh);
147
148 /*! Function for adding Vanilla RNN when the layer is already created
149 */
151
152 /*! Function for adding Dense Connected Layer in the Deep Neural Network,
153 * with a given width, activation function and dropout probability.
154 * Based on the previous layer dimensions, it calculates the input width
155 * of the fully connected layer. */
157
158 /*! Function for adding Dense Layer in the Deep Neural Network, when
159 * the layer is already created. */
161
162 /*! Function for adding Reshape Layer in the Deep Neural Network, with a given
163 * height and width. It will take every matrix from the previous layer and
164 * reshape it to a matrix with new dimensions. */
165 TReshapeLayer<Architecture_t> *AddReshapeLayer(size_t depth, size_t height, size_t width, bool flattening);
166
167 /*! Function for adding a Batch Normalization layer with given parameters */
169
170 /*! Function for adding Reshape Layer in the Deep Neural Network, when
171 * the layer is already created. */
173
174#ifdef HAVE_DAE /// DAE functions
175 /*! Function for adding Corruption layer in the Deep Neural Network,
176 * with given number of visibleUnits and hiddenUnits. It corrupts input
177 * according to given corruptionLevel and dropoutProbability. */
178 TCorruptionLayer<Architecture_t> *AddCorruptionLayer(size_t visibleUnits, size_t hiddenUnits,
179 Scalar_t dropoutProbability, Scalar_t corruptionLevel);
180
181 /*! Function for adding Corruption Layer in the Deep Neural Network,
182 * when the layer is already created. */
183 void AddCorruptionLayer(TCorruptionLayer<Architecture_t> *corruptionLayer);
184
185 /*! Function for adding Compression layer in the Deep Neural Network,
186 * with given number of visibleUnits and hiddenUnits. It compresses the input units
187 * taking weights and biases from prev layers. */
188 TCompressionLayer<Architecture_t> *AddCompressionLayer(size_t visibleUnits, size_t hiddenUnits,
189 Scalar_t dropoutProbability, EActivationFunction f,
190 std::vector<Matrix_t> weights, std::vector<Matrix_t> biases);
191
192 /*! Function for adding Compression Layer in the Deep Neural Network, when
193 * the layer is already created. */
194 void AddCompressionLayer(TCompressionLayer<Architecture_t> *compressionLayer);
195
196 /*! Function for adding Reconstruction layer in the Deep Neural Network,
197 * with given number of visibleUnits and hiddenUnits. It reconstructs the input units
198 * taking weights and biases from prev layers. Same corruptionLevel and dropoutProbability
199 * must be passed as in corruptionLayer. */
200 TReconstructionLayer<Architecture_t> *AddReconstructionLayer(size_t visibleUnits, size_t hiddenUnits,
201 Scalar_t learningRate, EActivationFunction f,
202 std::vector<Matrix_t> weights,
203 std::vector<Matrix_t> biases, Scalar_t corruptionLevel,
204 Scalar_t dropoutProbability);
205
206 /*! Function for adding Reconstruction Layer in the Deep Neural Network, when
207 * the layer is already created. */
208 void AddReconstructionLayer(TReconstructionLayer<Architecture_t> *reconstructionLayer);
209
210 /*! Function for adding logisticRegressionLayer in the Deep Neural Network,
211 * with given number of inputUnits and outputUnits. It classifies the outputUnits. */
212 TLogisticRegressionLayer<Architecture_t> *AddLogisticRegressionLayer(size_t inputUnits, size_t outputUnits,
213 size_t testDataBatchSize,
214 Scalar_t learningRate);
215
216 /*! Function for adding logisticRegressionLayer in the Deep Neural Network, when
217 * the layer is already created. */
218 void AddLogisticRegressionLayer(TLogisticRegressionLayer<Architecture_t> *logisticRegressionLayer);
219
220 /* To train the Deep AutoEncoder network with required number of Corruption, Compression and Reconstruction
221 * layers. */
222 void PreTrain(std::vector<Matrix_t> &input, std::vector<size_t> numHiddenUnitsPerLayer, Scalar_t learningRate,
223 Scalar_t corruptionLevel, Scalar_t dropoutProbability, size_t epochs, EActivationFunction f,
224 bool applyDropout = false);
225
226 /* To classify outputLabel in Deep AutoEncoder. Should be used after PreTrain if required.
227 * Currently, it used Logistic Regression Layer. Otherwise we can use any other classification layer also.
228 */
229 void FineTune(std::vector<Matrix_t> &input, std::vector<Matrix_t> &testInput, std::vector<Matrix_t> &outputLabel,
230 size_t outputUnits, size_t testDataBatchSize, Scalar_t learningRate, size_t epochs);
231#endif
232
233 /*! Function for initialization of the Neural Net. */
235
236 /*! Function that executes the entire forward pass in the network. */
237 void Forward(Tensor_t &input, bool applyDropout = false);
238
239 /*! Function that reset some training flags after looping all the events but not the weights*/
241
242
243
244 /*! Function that executes the entire backward pass in the network. */
245 void Backward(const Tensor_t &input, const Matrix_t &groundTruth, const Matrix_t &weights);
246
247
248#ifdef USE_PARALLEL_DEEPNET
249 /*! Function for parallel forward in the vector of deep nets, where the master
250 * net is the net calling this function. There is one batch for one deep net.*/
251 void ParallelForward(std::vector<TDeepNet<Architecture_t, Layer_t>> &nets,
252 std::vector<TTensorBatch<Architecture_t>> &batches, bool applyDropout = false);
253
254 /*! Function for parallel backward in the vector of deep nets, where the master
255 * net is the net calling this function and getting the updates from the other nets.
256 * There is one batch for one deep net.*/
257 void ParallelBackward(std::vector<TDeepNet<Architecture_t, Layer_t>> &nets,
258 std::vector<TTensorBatch<Architecture_t>> &batches, Scalar_t learningRate);
259
260 /*! Function for parallel backward in the vector of deep nets, where the master
261 * net is the net calling this function and getting the updates from the other nets,
262 * following the momentum strategy. There is one batch for one deep net.*/
263 void ParallelBackwardMomentum(std::vector<TDeepNet<Architecture_t, Layer_t>> &nets,
264 std::vector<TTensorBatch<Architecture_t>> &batches, Scalar_t learningRate,
265 Scalar_t momentum);
266
267 /*! Function for parallel backward in the vector of deep nets, where the master
268 * net is the net calling this function and getting the updates from the other nets,
269 * following the Nestorov momentum strategy. There is one batch for one deep net.*/
270 void ParallelBackwardNestorov(std::vector<TDeepNet<Architecture_t, Layer_t>> &nets,
271 std::vector<TTensorBatch<Architecture_t>> &batches, Scalar_t learningRate,
272 Scalar_t momentum);
273
274#endif // endif use parallel deepnet
275
276 /*! Function that will update the weights and biases in the layers that
277 * contain weights and biases. */
278 void Update(Scalar_t learningRate);
279
280 /*! Function for evaluating the loss, based on the activations stored
281 * in the last layer. */
282 Scalar_t Loss(const Matrix_t &groundTruth, const Matrix_t &weights, bool includeRegularization = true) const;
283
284 /*! Function for evaluating the loss, based on the propagation of the given input. */
285 Scalar_t Loss(Tensor_t &input, const Matrix_t &groundTruth, const Matrix_t &weights,
286 bool inTraining = false, bool includeRegularization = true);
287
288 /*! Function for computing the regularizaton term to be added to the loss function */
290
291 /*! Prediction based on activations stored in the last layer. */
292 void Prediction(Matrix_t &predictions, EOutputFunction f) const;
293
294 /*! Prediction for the given inputs, based on what network learned. */
295 void Prediction(Matrix_t &predictions, Tensor_t & input, EOutputFunction f);
296
297 /*! Print the Deep Net Info */
298 void Print() const;
299
300 /*! Get the layer in the vector of layers at poistion i */
301 inline Layer_t *GetLayerAt(size_t i) { return fLayers[i]; }
302 inline const Layer_t *GetLayerAt(size_t i) const { return fLayers[i]; }
303
304 /* Depth and the output width of the network. */
305 inline size_t GetDepth() const { return fLayers.size(); }
306 inline size_t GetOutputWidth() const { return fLayers.back()->GetWidth(); }
307
308 /* Return a reference to the layers. */
309 inline std::vector<Layer_t *> &GetLayers() { return fLayers; }
310 inline const std::vector<Layer_t *> &GetLayers() const { return fLayers; }
311
312 /*! Remove all layers from the network. */
313 inline void Clear() { fLayers.clear(); }
314
315 /*! Getters */
316 inline size_t GetBatchSize() const { return fBatchSize; }
317 inline size_t GetInputDepth() const { return fInputDepth; }
318 inline size_t GetInputHeight() const { return fInputHeight; }
319 inline size_t GetInputWidth() const { return fInputWidth; }
320
321 inline size_t GetBatchDepth() const { return fBatchDepth; }
322 inline size_t GetBatchHeight() const { return fBatchHeight; }
323 inline size_t GetBatchWidth() const { return fBatchWidth; }
324
325 inline bool IsTraining() const { return fIsTraining; }
326
327 inline ELossFunction GetLossFunction() const { return fJ; }
328 inline EInitialization GetInitialization() const { return fI; }
329 inline ERegularization GetRegularization() const { return fR; }
330 inline Scalar_t GetWeightDecay() const { return fWeightDecay; }
331
332 /*! Setters */
333 // FIXME many of these won't work as the data structure storing activations
334 // and gradients have not changed in all the layers, also params in layers
335 // have not changed either
336 inline void SetBatchSize(size_t batchSize) { fBatchSize = batchSize; }
337 inline void SetInputDepth(size_t inputDepth) { fInputDepth = inputDepth; }
338 inline void SetInputHeight(size_t inputHeight) { fInputHeight = inputHeight; }
339 inline void SetInputWidth(size_t inputWidth) { fInputWidth = inputWidth; }
340 inline void SetBatchDepth(size_t batchDepth) { fBatchDepth = batchDepth; }
341 inline void SetBatchHeight(size_t batchHeight) { fBatchHeight = batchHeight; }
342 inline void SetBatchWidth(size_t batchWidth) { fBatchWidth = batchWidth; }
343 inline void SetLossFunction(ELossFunction J) { fJ = J; }
347
348 void SetDropoutProbabilities(const std::vector<Double_t> & probabilities);
349
350};
351
352//
353// Deep Net Class - Implementation
354//
355//______________________________________________________________________________
356template <typename Architecture_t, typename Layer_t>
358 : fLayers(), fBatchSize(0), fInputDepth(0), fInputHeight(0), fInputWidth(0), fBatchDepth(0), fBatchHeight(0),
359 fBatchWidth(0), fJ(ELossFunction::kMeanSquaredError), fI(EInitialization::kZero), fR(ERegularization::kNone),
360 fIsTraining(true), fWeightDecay(0.0)
361{
362 // Nothing to do here.
363}
364
365//______________________________________________________________________________
366template <typename Architecture_t, typename Layer_t>
367TDeepNet<Architecture_t, Layer_t>::TDeepNet(size_t batchSize, size_t inputDepth, size_t inputHeight, size_t inputWidth,
368 size_t batchDepth, size_t batchHeight, size_t batchWidth, ELossFunction J,
370 : fLayers(), fBatchSize(batchSize), fInputDepth(inputDepth), fInputHeight(inputHeight), fInputWidth(inputWidth),
371 fBatchDepth(batchDepth), fBatchHeight(batchHeight), fBatchWidth(batchWidth), fIsTraining(isTraining), fJ(J), fI(I),
372 fR(R), fWeightDecay(weightDecay)
373{
374 // Nothing to do here.
375}
376
377//______________________________________________________________________________
378template <typename Architecture_t, typename Layer_t>
380 : fLayers(), fBatchSize(deepNet.fBatchSize), fInputDepth(deepNet.fInputDepth), fInputHeight(deepNet.fInputHeight),
381 fInputWidth(deepNet.fInputWidth), fBatchDepth(deepNet.fBatchDepth), fBatchHeight(deepNet.fBatchHeight),
382 fBatchWidth(deepNet.fBatchWidth), fIsTraining(deepNet.fIsTraining), fJ(deepNet.fJ), fI(deepNet.fI), fR(deepNet.fR),
383 fWeightDecay(deepNet.fWeightDecay)
384{
385 // Nothing to do here.
386}
387
388//______________________________________________________________________________
389template <typename Architecture_t, typename Layer_t>
391{
392 // Relese the layers memory
393 for (auto layer : fLayers)
394 delete layer;
395 fLayers.clear();
396}
397
398//______________________________________________________________________________
399template <typename Architecture_t, typename Layer_t>
400auto TDeepNet<Architecture_t, Layer_t>::calculateDimension(int imgDim, int fltDim, int padding, int stride) -> size_t
401{
402 Scalar_t dimension = ((imgDim - fltDim + 2 * padding) / stride) + 1;
403 if (!isInteger(dimension) || dimension <= 0) {
404 this->Print();
405 int iLayer = fLayers.size();
406 Fatal("calculateDimension","Not compatible hyper parameters for layer %d - (imageDim, filterDim, padding, stride) %d , %d , %d , %d",
407 iLayer, imgDim, fltDim, padding, stride);
408 // std::cout << " calculateDimension - Not compatible hyper parameters (imgDim, fltDim, padding, stride)"
409 // << imgDim << " , " << fltDim << " , " << padding << " , " << stride<< " resulting dim is " << dimension << std::endl;
410 // std::exit(EXIT_FAILURE);
411 }
412
413 return (size_t)dimension;
414}
415
416//______________________________________________________________________________
417template <typename Architecture_t, typename Layer_t>
419 size_t filterWidth, size_t strideRows,
420 size_t strideCols, size_t paddingHeight,
421 size_t paddingWidth, EActivationFunction f,
422 Scalar_t dropoutProbability)
423{
424 // All variables defining a convolutional layer
425 size_t batchSize = this->GetBatchSize();
426 size_t inputDepth;
427 size_t inputHeight;
428 size_t inputWidth;
429 EInitialization init = this->GetInitialization();
430 ERegularization reg = this->GetRegularization();
431 Scalar_t decay = this->GetWeightDecay();
432
433 if (fLayers.size() == 0) {
434 inputDepth = this->GetInputDepth();
435 inputHeight = this->GetInputHeight();
436 inputWidth = this->GetInputWidth();
437 } else {
438 Layer_t *lastLayer = fLayers.back();
439 inputDepth = lastLayer->GetDepth();
440 inputHeight = lastLayer->GetHeight();
441 inputWidth = lastLayer->GetWidth();
442 }
443
444
445
446 // Create the conv layer
448 batchSize, inputDepth, inputHeight, inputWidth, depth, init, filterHeight, filterWidth, strideRows,
449 strideCols, paddingHeight, paddingWidth, dropoutProbability, f, reg, decay);
450
451 fLayers.push_back(convLayer);
452 return convLayer;
453}
454
455//______________________________________________________________________________
456template <typename Architecture_t, typename Layer_t>
458{
459 fLayers.push_back(convLayer);
460}
461
462//______________________________________________________________________________
463template <typename Architecture_t, typename Layer_t>
465 size_t strideRows, size_t strideCols,
466 Scalar_t dropoutProbability)
467{
468 size_t batchSize = this->GetBatchSize();
469 size_t inputDepth;
470 size_t inputHeight;
471 size_t inputWidth;
472
473 if (fLayers.size() == 0) {
474 inputDepth = this->GetInputDepth();
475 inputHeight = this->GetInputHeight();
476 inputWidth = this->GetInputWidth();
477 } else {
478 Layer_t *lastLayer = fLayers.back();
479 inputDepth = lastLayer->GetDepth();
480 inputHeight = lastLayer->GetHeight();
481 inputWidth = lastLayer->GetWidth();
482 }
483
485 batchSize, inputDepth, inputHeight, inputWidth, frameHeight, frameWidth,
486 strideRows, strideCols, dropoutProbability);
487
488 // But this creates a copy or what?
489 fLayers.push_back(maxPoolLayer);
490
491 return maxPoolLayer;
492}
493
494//______________________________________________________________________________
495template <typename Architecture_t, typename Layer_t>
497{
498 fLayers.push_back(maxPoolLayer);
499}
500
501//______________________________________________________________________________
502template <typename Architecture_t, typename Layer_t>
504 size_t timeSteps,
505 bool rememberState, EActivationFunction f)
506{
507
508 // should check if input and time size are consistent
509
510 //std::cout << "Create RNN " << fLayers.size() << " " << this->GetInputHeight() << " " << this->GetInputWidth() << std::endl;
511 size_t inputHeight, inputWidth, inputDepth;
512 if (fLayers.size() == 0) {
513 inputHeight = this->GetInputHeight();
514 inputWidth = this->GetInputWidth();
515 inputDepth = this->GetInputDepth();
516 } else {
517 Layer_t *lastLayer = fLayers.back();
518 inputHeight = lastLayer->GetHeight();
519 inputWidth = lastLayer->GetWidth();
520 inputDepth = lastLayer->GetDepth();
521 }
522 if (inputSize != inputWidth) {
523 Error("AddBasicRNNLayer","Inconsistent input size with input layout - it should be %zu instead of %zu",inputSize, inputWidth);
524 }
525 if (timeSteps != inputHeight || timeSteps != inputDepth) {
526 Error("AddBasicRNNLayer","Inconsistent time steps with input layout - it should be %zu instead of %zu",timeSteps, inputHeight);
527 }
528
529 TBasicRNNLayer<Architecture_t> *basicRNNLayer =
530 new TBasicRNNLayer<Architecture_t>(this->GetBatchSize(), stateSize, inputSize, timeSteps, rememberState,
531 f, fIsTraining, this->GetInitialization());
532 fLayers.push_back(basicRNNLayer);
533 return basicRNNLayer;
534}
535
536//______________________________________________________________________________
537template <typename Architecture_t, typename Layer_t>
539{
540 fLayers.push_back(basicRNNLayer);
541}
542
543//DAE
544#ifdef HAVE_DAE
545
546//______________________________________________________________________________
547template <typename Architecture_t, typename Layer_t>
548TCorruptionLayer<Architecture_t> *TDeepNet<Architecture_t, Layer_t>::AddCorruptionLayer(size_t visibleUnits,
549 size_t hiddenUnits,
550 Scalar_t dropoutProbability,
551 Scalar_t corruptionLevel)
552{
553 size_t batchSize = this->GetBatchSize();
554
555 TCorruptionLayer<Architecture_t> *corruptionLayer =
556 new TCorruptionLayer<Architecture_t>(batchSize, visibleUnits, hiddenUnits, dropoutProbability, corruptionLevel);
557 fLayers.push_back(corruptionLayer);
558 return corruptionLayer;
559}
560//______________________________________________________________________________
561
562template <typename Architecture_t, typename Layer_t>
563void TDeepNet<Architecture_t, Layer_t>::AddCorruptionLayer(TCorruptionLayer<Architecture_t> *corruptionLayer)
564{
565 fLayers.push_back(corruptionLayer);
566}
567
568//______________________________________________________________________________
569template <typename Architecture_t, typename Layer_t>
570TCompressionLayer<Architecture_t> *TDeepNet<Architecture_t, Layer_t>::AddCompressionLayer(
571 size_t visibleUnits, size_t hiddenUnits, Scalar_t dropoutProbability, EActivationFunction f,
572 std::vector<Matrix_t> weights, std::vector<Matrix_t> biases)
573{
574 size_t batchSize = this->GetBatchSize();
575
576 TCompressionLayer<Architecture_t> *compressionLayer = new TCompressionLayer<Architecture_t>(
577 batchSize, visibleUnits, hiddenUnits, dropoutProbability, f, weights, biases);
578 fLayers.push_back(compressionLayer);
579 return compressionLayer;
580}
581//______________________________________________________________________________
582
583template <typename Architecture_t, typename Layer_t>
584void TDeepNet<Architecture_t, Layer_t>::AddCompressionLayer(TCompressionLayer<Architecture_t> *compressionLayer)
585{
586 fLayers.push_back(compressionLayer);
587}
588
589//______________________________________________________________________________
590template <typename Architecture_t, typename Layer_t>
591TReconstructionLayer<Architecture_t> *TDeepNet<Architecture_t, Layer_t>::AddReconstructionLayer(
592 size_t visibleUnits, size_t hiddenUnits, Scalar_t learningRate, EActivationFunction f, std::vector<Matrix_t> weights,
593 std::vector<Matrix_t> biases, Scalar_t corruptionLevel, Scalar_t dropoutProbability)
594{
595 size_t batchSize = this->GetBatchSize();
596
597 TReconstructionLayer<Architecture_t> *reconstructionLayer = new TReconstructionLayer<Architecture_t>(
598 batchSize, visibleUnits, hiddenUnits, learningRate, f, weights, biases, corruptionLevel, dropoutProbability);
599 fLayers.push_back(reconstructionLayer);
600 return reconstructionLayer;
601}
602//______________________________________________________________________________
603
604template <typename Architecture_t, typename Layer_t>
605void TDeepNet<Architecture_t, Layer_t>::AddReconstructionLayer(
606 TReconstructionLayer<Architecture_t> *reconstructionLayer)
607{
608 fLayers.push_back(reconstructionLayer);
609}
610
611//______________________________________________________________________________
612template <typename Architecture_t, typename Layer_t>
613TLogisticRegressionLayer<Architecture_t> *TDeepNet<Architecture_t, Layer_t>::AddLogisticRegressionLayer(
614 size_t inputUnits, size_t outputUnits, size_t testDataBatchSize, Scalar_t learningRate)
615{
616 size_t batchSize = this->GetBatchSize();
617
618 TLogisticRegressionLayer<Architecture_t> *logisticRegressionLayer =
619 new TLogisticRegressionLayer<Architecture_t>(batchSize, inputUnits, outputUnits, testDataBatchSize, learningRate);
620 fLayers.push_back(logisticRegressionLayer);
621 return logisticRegressionLayer;
622}
623//______________________________________________________________________________
624template <typename Architecture_t, typename Layer_t>
625void TDeepNet<Architecture_t, Layer_t>::AddLogisticRegressionLayer(
626 TLogisticRegressionLayer<Architecture_t> *logisticRegressionLayer)
627{
628 fLayers.push_back(logisticRegressionLayer);
629}
630#endif
631
632
633//______________________________________________________________________________
634template <typename Architecture_t, typename Layer_t>
636 Scalar_t dropoutProbability)
637{
638 size_t batchSize = this->GetBatchSize();
639 size_t inputWidth;
640 EInitialization init = this->GetInitialization();
641 ERegularization reg = this->GetRegularization();
642 Scalar_t decay = this->GetWeightDecay();
643
644 if (fLayers.size() == 0) {
645 inputWidth = this->GetInputWidth();
646 } else {
647 Layer_t *lastLayer = fLayers.back();
648 inputWidth = lastLayer->GetWidth();
649 }
650
651 TDenseLayer<Architecture_t> *denseLayer =
652 new TDenseLayer<Architecture_t>(batchSize, inputWidth, width, init, dropoutProbability, f, reg, decay);
653
654 fLayers.push_back(denseLayer);
655
656 return denseLayer;
657}
658
659//______________________________________________________________________________
660template <typename Architecture_t, typename Layer_t>
662{
663 fLayers.push_back(denseLayer);
664}
665
666//______________________________________________________________________________
667template <typename Architecture_t, typename Layer_t>
669 size_t width, bool flattening)
670{
671 size_t batchSize = this->GetBatchSize();
672 size_t inputDepth;
673 size_t inputHeight;
674 size_t inputWidth;
675 size_t outputNSlices;
676 size_t outputNRows;
677 size_t outputNCols;
678
679 if (fLayers.size() == 0) {
680 inputDepth = this->GetInputDepth();
681 inputHeight = this->GetInputHeight();
682 inputWidth = this->GetInputWidth();
683 } else {
684 Layer_t *lastLayer = fLayers.back();
685 inputDepth = lastLayer->GetDepth();
686 inputHeight = lastLayer->GetHeight();
687 inputWidth = lastLayer->GetWidth();
688 }
689
690 if (flattening) {
691 outputNSlices = 1;
692 outputNRows = this->GetBatchSize();
693 outputNCols = depth * height * width;
694 size_t inputNCols = inputDepth * inputHeight * inputWidth;
695 if (outputNCols != 0 && outputNCols != inputNCols ) {
696 Info("AddReshapeLayer","Dimensions not compatibles - product of input %zu x %zu x %zu should be equal to output %zu x %zu x %zu - Force flattening output to be %zu",
697 inputDepth, inputHeight, inputWidth, depth, height, width,inputNCols);
698 }
699 outputNCols = inputNCols;
700 depth = 1;
701 height = 1;
702 width = outputNCols;
703 } else {
704 outputNSlices = this->GetBatchSize();
705 outputNRows = depth;
706 outputNCols = height * width;
707 }
708
709 TReshapeLayer<Architecture_t> *reshapeLayer =
710 new TReshapeLayer<Architecture_t>(batchSize, inputDepth, inputHeight, inputWidth, depth, height, width,
711 outputNSlices, outputNRows, outputNCols, flattening);
712
713 fLayers.push_back(reshapeLayer);
714
715 return reshapeLayer;
716}
717
718//______________________________________________________________________________
719template <typename Architecture_t, typename Layer_t>
721{
722 int axis = -1;
723 size_t batchSize = this->GetBatchSize();
724 size_t inputDepth = 0;
725 size_t inputHeight = 0;
726 size_t inputWidth = 0;
727 // this is the shape of the output tensor (it is columnmajor by default)
728 // and it is normally (depth, hw, bsize) and for dense layers (bsize, w, 1)
729 std::vector<size_t> shape = {1, 1, 1};
730 if (fLayers.size() == 0) {
731 inputDepth = this->GetInputDepth();
732 inputHeight = this->GetInputHeight();
733 inputWidth = this->GetInputWidth();
734 // assume that is like for a dense layer
735 shape[0] = batchSize;
736 shape[1] = inputWidth;
737 shape[2] = 1;
738 } else {
739 Layer_t *lastLayer = fLayers.back();
740 inputDepth = lastLayer->GetDepth();
741 inputHeight = lastLayer->GetHeight();
742 inputWidth = lastLayer->GetWidth();
743 shape = lastLayer->GetOutput().GetShape();
744 if (dynamic_cast<TConvLayer<Architecture_t> *>(lastLayer) != nullptr ||
745 dynamic_cast<TMaxPoolLayer<Architecture_t> *>(lastLayer) != nullptr)
746 axis = 1; // use axis = channel axis for convolutional layer
747 if (shape.size() > 3) {
748 for (size_t i = 3; i < shape.size(); ++i)
749 shape[2] *= shape[i];
750 }
751 // if (axis == 1) {
752 // shape[0] = batchSize;
753 // shape[1] = inputDepth;
754 // shape[2] = inputHeight * inputWidth;
755 // }
756 // for RNN ?
757 }
758 std::cout << "addBNormLayer " << inputDepth << " , " << inputHeight << " , " << inputWidth << " , " << shape[0]
759 << " " << shape[1] << " " << shape[2] << std::endl;
760
761 auto bnormLayer =
762 new TBatchNormLayer<Architecture_t>(batchSize, inputDepth, inputHeight, inputWidth, shape, axis, momentum, epsilon);
763
764 fLayers.push_back(bnormLayer);
765
766 return bnormLayer;
767}
768
769//______________________________________________________________________________
770template <typename Architecture_t, typename Layer_t>
772{
773 fLayers.push_back(reshapeLayer);
774}
775
776//______________________________________________________________________________
777template <typename Architecture_t, typename Layer_t>
779{
780 for (size_t i = 0; i < fLayers.size(); i++) {
781 fLayers[i]->Initialize();
782 }
783}
784
785//______________________________________________________________________________
786template <typename Architecture_t, typename Layer_t>
788{
789 for (size_t i = 0; i < fLayers.size(); i++) {
790 fLayers[i]->ResetTraining();
791 }
792}
793
794
795//______________________________________________________________________________
796template <typename Architecture_t, typename Layer_t>
797auto TDeepNet<Architecture_t, Layer_t>::Forward( Tensor_t &input, bool applyDropout) -> void
798{
799 fLayers.front()->Forward(input, applyDropout);
800
801 for (size_t i = 1; i < fLayers.size(); i++) {
802 fLayers[i]->Forward(fLayers[i - 1]->GetOutput(), applyDropout);
803 //std::cout << "forward for layer " << i << std::endl;
804 // fLayers[i]->GetOutput()[0].Print();
805 }
806}
807
808
809#ifdef HAVE_DAE
810//_____________________________________________________________________________
811template <typename Architecture_t, typename Layer_t>
812auto TDeepNet<Architecture_t, Layer_t>::PreTrain(std::vector<Matrix_t> &input,
813 std::vector<size_t> numHiddenUnitsPerLayer, Scalar_t learningRate,
814 Scalar_t corruptionLevel, Scalar_t dropoutProbability, size_t epochs,
815 EActivationFunction f, bool applyDropout) -> void
816{
817 std::vector<Matrix_t> inp1;
818 std::vector<Matrix_t> inp2;
819 size_t numOfHiddenLayers = sizeof(numHiddenUnitsPerLayer) / sizeof(numHiddenUnitsPerLayer[0]);
820 // size_t batchSize = this->GetBatchSize();
821 size_t visibleUnits = (size_t)input[0].GetNrows();
822
823 AddCorruptionLayer(visibleUnits, numHiddenUnitsPerLayer[0], dropoutProbability, corruptionLevel);
824 fLayers.back()->Initialize();
825 fLayers.back()->Forward(input, applyDropout);
826 // fLayers.back()->Print();
827
828 AddCompressionLayer(visibleUnits, numHiddenUnitsPerLayer[0], dropoutProbability, f, fLayers.back()->GetWeights(),
829 fLayers.back()->GetBiases());
830 fLayers.back()->Initialize();
831 fLayers.back()->Forward(fLayers[fLayers.size() - 2]->GetOutput(), applyDropout); // as we have to pass corrupt input
832
833 AddReconstructionLayer(visibleUnits, numHiddenUnitsPerLayer[0], learningRate, f, fLayers.back()->GetWeights(),
834 fLayers.back()->GetBiases(), corruptionLevel, dropoutProbability);
835 fLayers.back()->Initialize();
836 fLayers.back()->Forward(fLayers[fLayers.size() - 2]->GetOutput(),
837 applyDropout); // as we have to pass compressed Input
838 fLayers.back()->Backward(fLayers[fLayers.size() - 2]->GetOutput(), inp1, fLayers[fLayers.size() - 3]->GetOutput(),
839 input);
840 // three layers are added, now pointer is on third layer
841 size_t weightsSize = fLayers.back()->GetWeights().size();
842 size_t biasesSize = fLayers.back()->GetBiases().size();
843 for (size_t epoch = 0; epoch < epochs - 1; epoch++) {
844 // fLayers[fLayers.size() - 3]->Forward(input,applyDropout);
845 for (size_t j = 0; j < weightsSize; j++) {
846 Architecture_t::Copy(fLayers[fLayers.size() - 2]->GetWeightsAt(j), fLayers.back()->GetWeightsAt(j));
847 }
848 for (size_t j = 0; j < biasesSize; j++) {
849 Architecture_t::Copy(fLayers[fLayers.size() - 2]->GetBiasesAt(j), fLayers.back()->GetBiasesAt(j));
850 }
851 fLayers[fLayers.size() - 2]->Forward(fLayers[fLayers.size() - 3]->GetOutput(), applyDropout);
852 fLayers[fLayers.size() - 1]->Forward(fLayers[fLayers.size() - 2]->GetOutput(), applyDropout);
853 fLayers[fLayers.size() - 1]->Backward(fLayers[fLayers.size() - 2]->GetOutput(), inp1,
854 fLayers[fLayers.size() - 3]->GetOutput(), input);
855 }
856 fLayers.back()->Print();
857
858 for (size_t i = 1; i < numOfHiddenLayers; i++) {
859
860 AddCorruptionLayer(numHiddenUnitsPerLayer[i - 1], numHiddenUnitsPerLayer[i], dropoutProbability, corruptionLevel);
861 fLayers.back()->Initialize();
862 fLayers.back()->Forward(fLayers[fLayers.size() - 3]->GetOutput(),
863 applyDropout); // as we have to pass compressed Input
864
865 AddCompressionLayer(numHiddenUnitsPerLayer[i - 1], numHiddenUnitsPerLayer[i], dropoutProbability, f,
866 fLayers.back()->GetWeights(), fLayers.back()->GetBiases());
867 fLayers.back()->Initialize();
868 fLayers.back()->Forward(fLayers[fLayers.size() - 2]->GetOutput(), applyDropout);
869
870 AddReconstructionLayer(numHiddenUnitsPerLayer[i - 1], numHiddenUnitsPerLayer[i], learningRate, f,
871 fLayers.back()->GetWeights(), fLayers.back()->GetBiases(), corruptionLevel,
872 dropoutProbability);
873 fLayers.back()->Initialize();
874 fLayers.back()->Forward(fLayers[fLayers.size() - 2]->GetOutput(),
875 applyDropout); // as we have to pass compressed Input
876 fLayers.back()->Backward(fLayers[fLayers.size() - 2]->GetOutput(), inp1, fLayers[fLayers.size() - 3]->GetOutput(),
877 fLayers[fLayers.size() - 5]->GetOutput());
878
879 // three layers are added, now pointer is on third layer
880 size_t _weightsSize = fLayers.back()->GetWeights().size();
881 size_t _biasesSize = fLayers.back()->GetBiases().size();
882 for (size_t epoch = 0; epoch < epochs - 1; epoch++) {
883 // fLayers[fLayers.size() - 3]->Forward(input,applyDropout);
884 for (size_t j = 0; j < _weightsSize; j++) {
885 Architecture_t::Copy(fLayers[fLayers.size() - 2]->GetWeightsAt(j), fLayers.back()->GetWeightsAt(j));
886 }
887 for (size_t j = 0; j < _biasesSize; j++) {
888 Architecture_t::Copy(fLayers[fLayers.size() - 2]->GetBiasesAt(j), fLayers.back()->GetBiasesAt(j));
889 }
890 fLayers[fLayers.size() - 2]->Forward(fLayers[fLayers.size() - 3]->GetOutput(), applyDropout);
891 fLayers[fLayers.size() - 1]->Forward(fLayers[fLayers.size() - 2]->GetOutput(), applyDropout);
892 fLayers[fLayers.size() - 1]->Backward(fLayers[fLayers.size() - 2]->GetOutput(), inp1,
893 fLayers[fLayers.size() - 3]->GetOutput(),
894 fLayers[fLayers.size() - 5]->GetOutput());
895 }
896 fLayers.back()->Print();
897 }
898}
899
900//______________________________________________________________________________
901template <typename Architecture_t, typename Layer_t>
902auto TDeepNet<Architecture_t, Layer_t>::FineTune(std::vector<Matrix_t> &input, std::vector<Matrix_t> &testInput,
903 std::vector<Matrix_t> &inputLabel, size_t outputUnits,
904 size_t testDataBatchSize, Scalar_t learningRate, size_t epochs) -> void
905{
906 std::vector<Matrix_t> inp1;
907 std::vector<Matrix_t> inp2;
908 if (fLayers.size() == 0) // only Logistic Regression Layer
909 {
910 size_t inputUnits = input[0].GetNrows();
911
912 AddLogisticRegressionLayer(inputUnits, outputUnits, testDataBatchSize, learningRate);
913 fLayers.back()->Initialize();
914 for (size_t i = 0; i < epochs; i++) {
915 fLayers.back()->Backward(inputLabel, inp1, input, inp2);
916 }
917 fLayers.back()->Forward(input, false);
918 fLayers.back()->Print();
919 } else { // if used after any other layer
920 size_t inputUnits = fLayers.back()->GetOutputAt(0).GetNrows();
921 AddLogisticRegressionLayer(inputUnits, outputUnits, testDataBatchSize, learningRate);
922 fLayers.back()->Initialize();
923 for (size_t i = 0; i < epochs; i++) {
924 fLayers.back()->Backward(inputLabel, inp1, fLayers[fLayers.size() - 2]->GetOutput(), inp2);
925 }
926 fLayers.back()->Forward(testInput, false);
927 fLayers.back()->Print();
928 }
929}
930#endif
931
932//______________________________________________________________________________
933template <typename Architecture_t, typename Layer_t>
935 const Matrix_t &weights) -> void
936{
937 //Tensor_t inp1;
938 //Tensor_t inp2;
939 // Last layer should be dense layer
940 Matrix_t last_actgrad = fLayers.back()->GetActivationGradientsAt(0);
941 Matrix_t last_output = fLayers.back()->GetOutputAt(0);
942 evaluateGradients<Architecture_t>(last_actgrad, this->GetLossFunction(), groundTruth,
943 last_output, weights);
944
945 for (size_t i = fLayers.size() - 1; i > 0; i--) {
946 auto &activation_gradient_backward = fLayers[i - 1]->GetActivationGradients();
947 auto &activations_backward = fLayers[i - 1]->GetOutput();
948 fLayers[i]->Backward(activation_gradient_backward, activations_backward);
949 }
950
951 // need to have a dummy tensor (size=0) to pass for activation gradient backward which
952 // are not computed for the first layer
954 fLayers[0]->Backward(dummy, input);
955}
956
957#ifdef USE_PARALLEL_DEEPNET
958
959//______________________________________________________________________________
960template <typename Architecture_t, typename Layer_t>
962 std::vector<TTensorBatch<Architecture_t>> &batches,
963 bool applyDropout) -> void
964{
965 size_t depth = this->GetDepth();
966
967 // The first layer of each deep net
968 for (size_t i = 0; i < nets.size(); i++) {
969 nets[i].GetLayerAt(0)->Forward(batches[i].GetInput(), applyDropout);
970 }
971
972 // The i'th layer of each deep net
973 for (size_t i = 1; i < depth; i++) {
974 for (size_t j = 0; j < nets.size(); j++) {
975 nets[j].GetLayerAt(i)->Forward(nets[j].GetLayerAt(i - 1)->GetOutput(), applyDropout);
976 }
977 }
978}
979
980//______________________________________________________________________________
981template <typename Architecture_t, typename Layer_t>
982auto TDeepNet<Architecture_t, Layer_t>::ParallelBackward(std::vector<TDeepNet<Architecture_t, Layer_t>> &nets,
983 std::vector<TTensorBatch<Architecture_t>> &batches,
984 Scalar_t learningRate) -> void
985{
986 std::vector<Matrix_t> inp1;
987 std::vector<Matrix_t> inp2;
988 size_t depth = this->GetDepth();
989
990 // Evaluate the gradients of the last layers in each deep net
991 for (size_t i = 0; i < nets.size(); i++) {
992 evaluateGradients<Architecture_t>(nets[i].GetLayerAt(depth - 1)->GetActivationGradientsAt(0),
993 nets[i].GetLossFunction(), batches[i].GetOutput(),
994 nets[i].GetLayerAt(depth - 1)->GetOutputAt(0), batches[i].GetWeights());
995 }
996
997 // Backpropagate the error in i'th layer of each deep net
998 for (size_t i = depth - 1; i > 0; i--) {
999 for (size_t j = 0; j < nets.size(); j++) {
1000 nets[j].GetLayerAt(i)->Backward(nets[j].GetLayerAt(i - 1)->GetActivationGradients(),
1001 nets[j].GetLayerAt(i - 1)->GetOutput(), inp1, inp2);
1002 }
1003 }
1004
1005 std::vector<Matrix_t> dummy;
1006
1007 // First layer of each deep net
1008 for (size_t i = 0; i < nets.size(); i++) {
1009 nets[i].GetLayerAt(0)->Backward(dummy, batches[i].GetInput(), inp1, inp2);
1010 }
1011
1012 // Update and copy
1013 for (size_t i = 0; i < nets.size(); i++) {
1014 for (size_t j = 0; j < depth; j++) {
1015 Layer_t *masterLayer = this->GetLayerAt(j);
1016 Layer_t *layer = nets[i].GetLayerAt(j);
1017
1018 masterLayer->UpdateWeights(layer->GetWeightGradients(), learningRate);
1019 layer->CopyWeights(masterLayer->GetWeights());
1020
1021 masterLayer->UpdateBiases(layer->GetBiasGradients(), learningRate);
1022 layer->CopyBiases(masterLayer->GetBiases());
1023 }
1024 }
1025}
1026
1027//______________________________________________________________________________
1028template <typename Architecture_t, typename Layer_t>
1029auto TDeepNet<Architecture_t, Layer_t>::ParallelBackwardMomentum(std::vector<TDeepNet<Architecture_t, Layer_t>> &nets,
1030 std::vector<TTensorBatch<Architecture_t>> &batches,
1031 Scalar_t learningRate, Scalar_t momentum) -> void
1032{
1033 std::vector<Matrix_t> inp1;
1034 std::vector<Matrix_t> inp2;
1035 size_t depth = this->GetDepth();
1036
1037 // Evaluate the gradients of the last layers in each deep net
1038 for (size_t i = 0; i < nets.size(); i++) {
1039 evaluateGradients<Architecture_t>(nets[i].GetLayerAt(depth - 1)->GetActivationGradientsAt(0),
1040 nets[i].GetLossFunction(), batches[i].GetOutput(),
1041 nets[i].GetLayerAt(depth - 1)->GetOutputAt(0), batches[i].GetWeights());
1042 }
1043
1044 // Backpropagate the error in i'th layer of each deep net
1045 for (size_t i = depth - 1; i > 0; i--) {
1046 Layer_t *masterLayer = this->GetLayerAt(i);
1047
1048 for (size_t j = 0; j < nets.size(); j++) {
1049 Layer_t *layer = nets[j].GetLayerAt(i);
1050
1051 layer->Backward(nets[j].GetLayerAt(i - 1)->GetActivationGradients(), nets[j].GetLayerAt(i - 1)->GetOutput(),
1052 inp1, inp2);
1053 masterLayer->UpdateWeightGradients(layer->GetWeightGradients(), learningRate / momentum);
1054 masterLayer->UpdateBiasGradients(layer->GetBiasGradients(), learningRate / momentum);
1055 }
1056
1057 masterLayer->UpdateWeightGradients(masterLayer->GetWeightGradients(), 1.0 - momentum);
1058 masterLayer->UpdateBiasGradients(masterLayer->GetBiasGradients(), 1.0 - momentum);
1059 }
1060
1061 std::vector<Matrix_t> dummy;
1062
1063 // First layer of each deep net
1064 Layer_t *masterFirstLayer = this->GetLayerAt(0);
1065 for (size_t i = 0; i < nets.size(); i++) {
1066 Layer_t *layer = nets[i].GetLayerAt(0);
1067
1068 layer->Backward(dummy, batches[i].GetInput(), inp1, inp2);
1069
1070 masterFirstLayer->UpdateWeightGradients(layer->GetWeightGradients(), learningRate / momentum);
1071 masterFirstLayer->UpdateBiasGradients(layer->GetBiasGradients(), learningRate / momentum);
1072 }
1073
1074 masterFirstLayer->UpdateWeightGradients(masterFirstLayer->GetWeightGradients(), 1.0 - momentum);
1075 masterFirstLayer->UpdateBiasGradients(masterFirstLayer->GetBiasGradients(), 1.0 - momentum);
1076
1077 for (size_t i = 0; i < depth; i++) {
1078 Layer_t *masterLayer = this->GetLayerAt(i);
1079 masterLayer->Update(1.0);
1080
1081 for (size_t j = 0; j < nets.size(); j++) {
1082 Layer_t *layer = nets[j].GetLayerAt(i);
1083
1084 layer->CopyWeights(masterLayer->GetWeights());
1085 layer->CopyBiases(masterLayer->GetBiases());
1086 }
1087 }
1088}
1089
1090//______________________________________________________________________________
1091template <typename Architecture_t, typename Layer_t>
1092auto TDeepNet<Architecture_t, Layer_t>::ParallelBackwardNestorov(std::vector<TDeepNet<Architecture_t, Layer_t>> &nets,
1093 std::vector<TTensorBatch<Architecture_t>> &batches,
1094 Scalar_t learningRate, Scalar_t momentum) -> void
1095{
1096 std::cout << "Parallel Backward Nestorov" << std::endl;
1097 std::vector<Matrix_t> inp1;
1098 std::vector<Matrix_t> inp2;
1099 size_t depth = this->GetDepth();
1100
1101 // Evaluate the gradients of the last layers in each deep net
1102 for (size_t i = 0; i < nets.size(); i++) {
1103 evaluateGradients<Architecture_t>(nets[i].GetLayerAt(depth - 1)->GetActivationGradientsAt(0),
1104 nets[i].GetLossFunction(), batches[i].GetOutput(),
1105 nets[i].GetLayerAt(depth - 1)->GetOutputAt(0), batches[i].GetWeights());
1106 }
1107
1108 // Backpropagate the error in i'th layer of each deep net
1109 for (size_t i = depth - 1; i > 0; i--) {
1110 for (size_t j = 0; j < nets.size(); j++) {
1111 Layer_t *layer = nets[j].GetLayerAt(i);
1112
1113 layer->Backward(nets[j].GetLayerAt(i - 1)->GetActivationGradients(), nets[j].GetLayerAt(i - 1)->GetOutput(),
1114 inp1, inp2);
1115 }
1116 }
1117
1118 std::vector<Matrix_t> dummy;
1119
1120 // First layer of each deep net
1121 for (size_t i = 0; i < nets.size(); i++) {
1122 Layer_t *layer = nets[i].GetLayerAt(0);
1123 layer->Backward(dummy, batches[i].GetInput(), inp1, inp2);
1124 }
1125
1126 for (size_t i = 0; i < depth; i++) {
1127 Layer_t *masterLayer = this->GetLayerAt(i);
1128 for (size_t j = 0; j < nets.size(); j++) {
1129 Layer_t *layer = nets[j].GetLayerAt(i);
1130
1131 layer->CopyWeights(masterLayer->GetWeights());
1132 layer->CopyBiases(masterLayer->GetBiases());
1133
1134 layer->UpdateWeights(masterLayer->GetWeightGradients(), 1.0);
1135 layer->UpdateBiases(masterLayer->GetBiasGradients(), 1.0);
1136 }
1137
1138 for (size_t j = 0; j < nets.size(); j++) {
1139 Layer_t *layer = nets[j].GetLayerAt(i);
1140
1141 masterLayer->UpdateWeightGradients(layer->GetWeightGradients(), learningRate / momentum);
1142 masterLayer->UpdateBiasGradients(layer->GetBiasGradients(), learningRate / momentum);
1143 }
1144
1145 masterLayer->UpdateWeightGradients(masterLayer->GetWeightGradients(), 1.0 - momentum);
1146 masterLayer->UpdateBiasGradients(masterLayer->GetBiasGradients(), 1.0 - momentum);
1147
1148 masterLayer->Update(1.0);
1149 }
1150}
1151#endif // use parallel deep net
1152
1153//______________________________________________________________________________
1154template <typename Architecture_t, typename Layer_t>
1156{
1157 for (size_t i = 0; i < fLayers.size(); i++) {
1158 fLayers[i]->Update(learningRate);
1159 }
1160}
1161
1162//______________________________________________________________________________
1163template <typename Architecture_t, typename Layer_t>
1164auto TDeepNet<Architecture_t, Layer_t>::Loss(const Matrix_t &groundTruth, const Matrix_t &weights,
1165 bool includeRegularization) const -> Scalar_t
1166{
1167 // Last layer should not be deep
1168 auto loss = evaluate<Architecture_t>(this->GetLossFunction(), groundTruth, fLayers.back()->GetOutputAt(0), weights);
1169
1170 includeRegularization &= (this->GetRegularization() != ERegularization::kNone);
1171 if (includeRegularization) {
1172 loss += RegularizationTerm();
1173 }
1174
1175 return loss;
1176}
1177
1178//______________________________________________________________________________
1179template <typename Architecture_t, typename Layer_t>
1181 const Matrix_t &weights, bool inTraining, bool includeRegularization)
1182 -> Scalar_t
1183{
1184 Forward(input, inTraining);
1185 return Loss(groundTruth, weights, includeRegularization);
1186}
1187
1188//______________________________________________________________________________
1189template <typename Architecture_t, typename Layer_t>
1191{
1192 Scalar_t reg = 0.0;
1193 for (size_t i = 0; i < fLayers.size(); i++) {
1194 for (size_t j = 0; j < (fLayers[i]->GetWeights()).size(); j++) {
1195 reg += regularization<Architecture_t>(fLayers[i]->GetWeightsAt(j), this->GetRegularization());
1196 }
1197 }
1198 return this->GetWeightDecay() * reg;
1199}
1200
1201
1202//______________________________________________________________________________
1203template <typename Architecture_t, typename Layer_t>
1205{
1206 // Last layer should not be deep (assume output is a matrix)
1207 evaluate<Architecture_t>(predictions, f, fLayers.back()->GetOutputAt(0));
1208}
1209
1210//______________________________________________________________________________
1211template <typename Architecture_t, typename Layer_t>
1213 EOutputFunction f) -> void
1214{
1215 Forward(input, false);
1216 // Last layer should not be deep
1217 evaluate<Architecture_t>(predictions, f, fLayers.back()->GetOutputAt(0));
1218}
1219
1220//______________________________________________________________________________
1221template <typename Architecture_t, typename Layer_t>
1223{
1224 std::cout << "DEEP NEURAL NETWORK: Depth = " << this->GetDepth();
1225 std::cout << " Input = ( " << this->GetInputDepth();
1226 std::cout << ", " << this->GetInputHeight();
1227 std::cout << ", " << this->GetInputWidth() << " )";
1228 std::cout << " Batch size = " << this->GetBatchSize();
1229 std::cout << " Loss function = " << static_cast<char>(this->GetLossFunction()) << std::endl;
1230
1231 //std::cout << "\t Layers: " << std::endl;
1232
1233 for (size_t i = 0; i < fLayers.size(); i++) {
1234 std::cout << "\tLayer " << i << "\t";
1235 fLayers[i]->Print();
1236 }
1237}
1238
1239//______________________________________________________________________________
1240template <typename Architecture_t, typename Layer_t>
1242 const std::vector<Double_t> & probabilities)
1243{
1244 for (size_t i = 0; i < fLayers.size(); i++) {
1245 if (i < probabilities.size()) {
1246 fLayers[i]->SetDropoutProbability(probabilities[i]);
1247 } else {
1248 fLayers[i]->SetDropoutProbability(1.0);
1249 }
1250 }
1251}
1252
1253
1254} // namespace DNN
1255} // namespace TMVA
1256
1257#endif
#define f(i)
Definition: RSha256.hxx:104
#define R(a, b, c, d, e, f, g, h, i)
Definition: RSha256.hxx:110
static RooMathCoreReg dummy
include TDocParser_001 C image html pict1_TDocParser_001 png width
Definition: TDocParser.cxx:121
void Info(const char *location, const char *msgfmt,...)
void Error(const char *location, const char *msgfmt,...)
void Fatal(const char *location, const char *msgfmt,...)
double floor(double)
Generic Max Pooling Layer class.
Definition: MaxPoolLayer.h:59
Layer implementing Batch Normalization.
Generic Deep Neural Network class.
Definition: DeepNet.h:72
const std::vector< Layer_t * > & GetLayers() const
Definition: DeepNet.h:310
void AddDenseLayer(TDenseLayer< Architecture_t > *denseLayer)
Function for adding Dense Layer in the Deep Neural Network, when the layer is already created.
Definition: DeepNet.h:661
size_t GetBatchHeight() const
Definition: DeepNet.h:322
void SetBatchDepth(size_t batchDepth)
Definition: DeepNet.h:340
void Forward(Tensor_t &input, bool applyDropout=false)
Function that executes the entire forward pass in the network.
Definition: DeepNet.h:797
void SetLossFunction(ELossFunction J)
Definition: DeepNet.h:343
size_t fBatchHeight
The height of the batch used for training/testing.
Definition: DeepNet.h:93
ERegularization GetRegularization() const
Definition: DeepNet.h:329
std::vector< Layer_t * > & GetLayers()
Definition: DeepNet.h:309
typename Architecture_t::Scalar_t Scalar_t
Definition: DeepNet.h:77
void Initialize()
DAE functions.
Definition: DeepNet.h:778
size_t GetBatchSize() const
Getters.
Definition: DeepNet.h:316
size_t GetDepth() const
Definition: DeepNet.h:305
Scalar_t GetWeightDecay() const
Definition: DeepNet.h:330
size_t GetInputDepth() const
Definition: DeepNet.h:317
TBatchNormLayer< Architecture_t > * AddBatchNormLayer(Scalar_t momentum=-1, Scalar_t epsilon=0.0001)
Function for adding a Batch Normalization layer with given parameters.
Definition: DeepNet.h:720
void Backward(const Tensor_t &input, const Matrix_t &groundTruth, const Matrix_t &weights)
Function that executes the entire backward pass in the network.
Definition: DeepNet.h:934
std::vector< Layer_t * > fLayers
The layers consisting the DeepNet.
Definition: DeepNet.h:85
size_t fBatchDepth
The depth of the batch used for training/testing.
Definition: DeepNet.h:92
size_t fInputDepth
The depth of the input.
Definition: DeepNet.h:88
Layer_t * GetLayerAt(size_t i)
Get the layer in the vector of layers at poistion i.
Definition: DeepNet.h:301
void Print() const
Print the Deep Net Info.
Definition: DeepNet.h:1222
void SetWeightDecay(Scalar_t weightDecay)
Definition: DeepNet.h:346
void AddReshapeLayer(TReshapeLayer< Architecture_t > *reshapeLayer)
Function for adding Reshape Layer in the Deep Neural Network, when the layer is already created.
Definition: DeepNet.h:771
void Clear()
Remove all layers from the network.
Definition: DeepNet.h:313
Scalar_t RegularizationTerm() const
Function for computing the regularizaton term to be added to the loss function
Definition: DeepNet.h:1190
TDenseLayer< Architecture_t > * AddDenseLayer(size_t width, EActivationFunction f, Scalar_t dropoutProbability=1.0)
Function for adding Dense Connected Layer in the Deep Neural Network, with a given width,...
Definition: DeepNet.h:635
TDeepNet(size_t BatchSize, size_t InputDepth, size_t InputHeight, size_t InputWidth, size_t BatchDepth, size_t BatchHeight, size_t BatchWidth, ELossFunction fJ, EInitialization fI=EInitialization::kZero, ERegularization fR=ERegularization::kNone, Scalar_t fWeightDecay=0.0, bool isTraining=false)
Constructor.
Definition: DeepNet.h:367
void Prediction(Matrix_t &predictions, Tensor_t &input, EOutputFunction f)
Prediction for the given inputs, based on what network learned.
Definition: DeepNet.h:1212
void SetInputDepth(size_t inputDepth)
Definition: DeepNet.h:337
bool IsTraining() const
Definition: DeepNet.h:325
size_t GetInputHeight() const
Definition: DeepNet.h:318
size_t fBatchSize
Batch size used for training and evaluation.
Definition: DeepNet.h:87
void Prediction(Matrix_t &predictions, EOutputFunction f) const
Prediction based on activations stored in the last layer.
Definition: DeepNet.h:1204
TBasicRNNLayer< Architecture_t > * AddBasicRNNLayer(size_t stateSize, size_t inputSize, size_t timeSteps, bool rememberState=false, EActivationFunction f=EActivationFunction::kTanh)
Function for adding Recurrent Layer in the Deep Neural Network, with given parameters.
Definition: DeepNet.h:503
size_t fInputWidth
The width of the input.
Definition: DeepNet.h:90
void SetInputHeight(size_t inputHeight)
Definition: DeepNet.h:338
size_t GetBatchWidth() const
Definition: DeepNet.h:323
void AddBasicRNNLayer(TBasicRNNLayer< Architecture_t > *basicRNNLayer)
Function for adding Vanilla RNN when the layer is already created.
Definition: DeepNet.h:538
void AddMaxPoolLayer(CNN::TMaxPoolLayer< Architecture_t > *maxPoolLayer)
Function for adding Max Pooling layer in the Deep Neural Network, when the layer is already created.
Definition: DeepNet.h:496
TMaxPoolLayer< Architecture_t > * AddMaxPoolLayer(size_t frameHeight, size_t frameWidth, size_t strideRows, size_t strideCols, Scalar_t dropoutProbability=1.0)
Function for adding Pooling layer in the Deep Neural Network, with a given filter height and width,...
Definition: DeepNet.h:464
Scalar_t fWeightDecay
The weight decay factor.
Definition: DeepNet.h:101
Scalar_t Loss(const Matrix_t &groundTruth, const Matrix_t &weights, bool includeRegularization=true) const
Function for evaluating the loss, based on the activations stored in the last layer.
Definition: DeepNet.h:1164
TConvLayer< Architecture_t > * AddConvLayer(size_t depth, size_t filterHeight, size_t filterWidth, size_t strideRows, size_t strideCols, size_t paddingHeight, size_t paddingWidth, EActivationFunction f, Scalar_t dropoutProbability=1.0)
Function for adding Convolution layer in the Deep Neural Network, with a given depth,...
Definition: DeepNet.h:418
ERegularization fR
The regularization used for the network.
Definition: DeepNet.h:100
void ResetTraining()
Function that reset some training flags after looping all the events but not the weights.
Definition: DeepNet.h:787
size_t GetInputWidth() const
Definition: DeepNet.h:319
bool isInteger(Scalar_t x) const
Definition: DeepNet.h:81
size_t GetOutputWidth() const
Definition: DeepNet.h:306
bool fIsTraining
Is the network training?
Definition: DeepNet.h:96
TReshapeLayer< Architecture_t > * AddReshapeLayer(size_t depth, size_t height, size_t width, bool flattening)
Function for adding Reshape Layer in the Deep Neural Network, with a given height and width.
Definition: DeepNet.h:668
void SetBatchSize(size_t batchSize)
Setters.
Definition: DeepNet.h:336
void AddConvLayer(TConvLayer< Architecture_t > *convLayer)
Function for adding Convolution Layer in the Deep Neural Network, when the layer is already created.
Definition: DeepNet.h:457
size_t fInputHeight
The height of the input.
Definition: DeepNet.h:89
void SetRegularization(ERegularization R)
Definition: DeepNet.h:345
TDeepNet(const TDeepNet &)
Copy-constructor.
Definition: DeepNet.h:379
size_t fBatchWidth
The width of the batch used for training/testing.
Definition: DeepNet.h:94
typename Architecture_t::Tensor_t Tensor_t
Definition: DeepNet.h:75
ELossFunction fJ
The loss function of the network.
Definition: DeepNet.h:98
~TDeepNet()
Destructor.
Definition: DeepNet.h:390
void SetBatchWidth(size_t batchWidth)
Definition: DeepNet.h:342
void SetDropoutProbabilities(const std::vector< Double_t > &probabilities)
Definition: DeepNet.h:1241
TDeepNet()
Default Constructor.
Definition: DeepNet.h:357
void SetBatchHeight(size_t batchHeight)
Definition: DeepNet.h:341
void Update(Scalar_t learningRate)
Function that will update the weights and biases in the layers that contain weights and biases.
Definition: DeepNet.h:1155
ELossFunction GetLossFunction() const
Definition: DeepNet.h:327
size_t calculateDimension(int imgDim, int fltDim, int padding, int stride)
Definition: DeepNet.h:400
const Layer_t * GetLayerAt(size_t i) const
Definition: DeepNet.h:302
void SetInitialization(EInitialization I)
Definition: DeepNet.h:344
EInitialization GetInitialization() const
Definition: DeepNet.h:328
void SetInputWidth(size_t inputWidth)
Definition: DeepNet.h:339
typename Architecture_t::Matrix_t Matrix_t
Definition: DeepNet.h:76
Scalar_t Loss(Tensor_t &input, const Matrix_t &groundTruth, const Matrix_t &weights, bool inTraining=false, bool includeRegularization=true)
Function for evaluating the loss, based on the propagation of the given input.
Definition: DeepNet.h:1180
EInitialization fI
The initialization method of the network.
Definition: DeepNet.h:99
size_t GetBatchDepth() const
Definition: DeepNet.h:321
Generic layer class.
Definition: DenseLayer.h:57
Double_t x[n]
Definition: legend1.C:17
#define I(x, y, z)
EvaluateInfo init(std::vector< RooRealProxy > parameters, std::vector< ArrayWrapper * > wrappers, std::vector< double * > arrays, size_t begin, size_t batchSize)
void Copy(void *source, void *dest)
void Print(std::ostream &os, const OptionType &opt)
EInitialization
Definition: Functions.h:70
EOutputFunction
Enum that represents output functions.
Definition: Functions.h:44
double weightDecay(double error, ItWeight itWeight, ItWeight itWeightEnd, double factorWeightDecay, EnumRegularization eRegularization)
compute the weight decay for regularization (L1 or L2)
Definition: NeuralNet.icc:498
ERegularization
Enum representing the regularization type applied for a given layer.
Definition: Functions.h:63
EActivationFunction
Enum that represents layer activation functions.
Definition: Functions.h:32
ELossFunction
Enum that represents objective functions for the net, i.e.
Definition: Functions.h:55
create variable transformations
REAL epsilon
Definition: triangle.c:617