Logo ROOT   6.18/05
Reference Guide
DeepNet.h
Go to the documentation of this file.
1// @(#)root/tmva/tmva/dnn:$Id$
2// Author: Vladimir Ilievski
3
4/**********************************************************************************
5 * Project: TMVA - a Root-integrated toolkit for multivariate data analysis *
6 * Package: TMVA *
7 * Class : TDeepNet *
8 * Web : http://tmva.sourceforge.net *
9 * *
10 * Description: *
11 * Deep Neural Network *
12 * *
13 * Authors (alphabetical): *
14 * Akshay Vashistha <akshayvashistha1995@gmail.com> - CERN, Switzerland *
15 * Vladimir Ilievski <ilievski.vladimir@live.com> - CERN, Switzerland *
16 * Saurav Shekhar <sauravshekhar01@gmail.com> - CERN, Switzerland *
17 * *
18 * Copyright (c) 2005-2015: *
19 * CERN, Switzerland *
20 * U. of Victoria, Canada *
21 * MPI-K Heidelberg, Germany *
22 * U. of Bonn, Germany *
23 * *
24 * Redistribution and use in source and binary forms, with or without *
25 * modification, are permitted according to the terms listed in LICENSE *
26 * (http://tmva.sourceforge.net/LICENSE) *
27 **********************************************************************************/
28
29#ifndef TMVA_DNN_DEEPNET
30#define TMVA_DNN_DEEPNET
31
32#include "TString.h"
33
34#include "TMVA/DNN/Functions.h"
36
38#include "TMVA/DNN/DenseLayer.h"
40
43
45
46#ifdef HAVE_DAE
47#include "TMVA/DNN/DAE/CompressionLayer.h"
48#include "TMVA/DNN/DAE/CorruptionLayer.h"
49#include "TMVA/DNN/DAE/ReconstructionLayer.h"
50#include "TMVA/DNN/DAE/LogisticRegressionLayer.h"
51#endif
52
53#include <vector>
54#include <cmath>
55
56
57namespace TMVA {
58namespace DNN {
59
60 using namespace CNN;
61 using namespace RNN;
62 //using namespace DAE;
63
64/** \class TDeepNet
65
66 Generic Deep Neural Network class.
67
68 This classs encapsulates the information for all types of Deep Neural Networks.
69
70 \tparam Architecture The Architecture type that holds the
71 architecture-specific data types.
72 */
73template <typename Architecture_t, typename Layer_t = VGeneralLayer<Architecture_t>>
74class TDeepNet {
75public:
76 using Matrix_t = typename Architecture_t::Matrix_t;
77 using Scalar_t = typename Architecture_t::Scalar_t;
78
79private:
80 bool inline isInteger(Scalar_t x) const { return x == floor(x); }
81 size_t calculateDimension(int imgDim, int fltDim, int padding, int stride);
82
83private:
84 std::vector<Layer_t *> fLayers; ///< The layers consisting the DeepNet
85
86 size_t fBatchSize; ///< Batch size used for training and evaluation.
87 size_t fInputDepth; ///< The depth of the input.
88 size_t fInputHeight; ///< The height of the input.
89 size_t fInputWidth; ///< The width of the input.
90
91 size_t fBatchDepth; ///< The depth of the batch used for training/testing.
92 size_t fBatchHeight; ///< The height of the batch used for training/testing.
93 size_t fBatchWidth; ///< The width of the batch used for training/testing.
94
95 bool fIsTraining; ///< Is the network training?
96
97 ELossFunction fJ; ///< The loss function of the network.
98 EInitialization fI; ///< The initialization method of the network.
99 ERegularization fR; ///< The regularization used for the network.
100 Scalar_t fWeightDecay; ///< The weight decay factor.
101
102public:
103 /*! Default Constructor */
105
106 /*! Constructor */
107 TDeepNet(size_t BatchSize, size_t InputDepth, size_t InputHeight, size_t InputWidth, size_t BatchDepth,
108 size_t BatchHeight, size_t BatchWidth, ELossFunction fJ, EInitialization fI = EInitialization::kZero,
109 ERegularization fR = ERegularization::kNone, Scalar_t fWeightDecay = 0.0, bool isTraining = false);
110
111 /*! Copy-constructor */
113
114 /*! Destructor */
116
117 /*! Function for adding Convolution layer in the Deep Neural Network,
118 * with a given depth, filter height and width, striding in rows and columns,
119 * the zero paddings, as well as the activation function and the dropout
120 * probability. Based on these parameters, it calculates the width and height
121 * of the convolutional layer. */
122 TConvLayer<Architecture_t> *AddConvLayer(size_t depth, size_t filterHeight, size_t filterWidth, size_t strideRows,
123 size_t strideCols, size_t paddingHeight, size_t paddingWidth,
124 EActivationFunction f, Scalar_t dropoutProbability = 1.0);
125
126 /*! Function for adding Convolution Layer in the Deep Neural Network,
127 * when the layer is already created. */
129
130 /*! Function for adding Pooling layer in the Deep Neural Network,
131 * with a given filter height and width, striding in rows and columns as
132 * well as the dropout probability. The depth is same as the previous
133 * layer depth. Based on these parameters, it calculates the width and
134 * height of the pooling layer. */
135 TMaxPoolLayer<Architecture_t> *AddMaxPoolLayer(size_t frameHeight, size_t frameWidth, size_t strideRows,
136 size_t strideCols, Scalar_t dropoutProbability = 1.0);
137 /*! Function for adding Max Pooling layer in the Deep Neural Network,
138 * when the layer is already created. */
140
141
142 /*! Function for adding Recurrent Layer in the Deep Neural Network,
143 * with given parameters */
144 TBasicRNNLayer<Architecture_t> *AddBasicRNNLayer(size_t stateSize, size_t inputSize, size_t timeSteps,
145 bool rememberState = false);
146
147 /*! Function for adding Vanilla RNN when the layer is already created
148 */
150
151 /*! Function for adding Dense Connected Layer in the Deep Neural Network,
152 * with a given width, activation function and dropout probability.
153 * Based on the previous layer dimensions, it calculates the input width
154 * of the fully connected layer. */
156
157 /*! Function for adding Dense Layer in the Deep Neural Network, when
158 * the layer is already created. */
160
161 /*! Function for adding Reshape Layer in the Deep Neural Network, with a given
162 * height and width. It will take every matrix from the previous layer and
163 * reshape it to a matrix with new dimensions. */
164 TReshapeLayer<Architecture_t> *AddReshapeLayer(size_t depth, size_t height, size_t width, bool flattening);
165
166 /*! Function for adding Reshape Layer in the Deep Neural Network, when
167 * the layer is already created. */
169
170#ifdef HAVE_DAE /// DAE functions
171 /*! Function for adding Corruption layer in the Deep Neural Network,
172 * with given number of visibleUnits and hiddenUnits. It corrupts input
173 * according to given corruptionLevel and dropoutProbability. */
174 TCorruptionLayer<Architecture_t> *AddCorruptionLayer(size_t visibleUnits, size_t hiddenUnits,
175 Scalar_t dropoutProbability, Scalar_t corruptionLevel);
176
177 /*! Function for adding Corruption Layer in the Deep Neural Network,
178 * when the layer is already created. */
179 void AddCorruptionLayer(TCorruptionLayer<Architecture_t> *corruptionLayer);
180
181 /*! Function for adding Compression layer in the Deep Neural Network,
182 * with given number of visibleUnits and hiddenUnits. It compresses the input units
183 * taking weights and biases from prev layers. */
184 TCompressionLayer<Architecture_t> *AddCompressionLayer(size_t visibleUnits, size_t hiddenUnits,
185 Scalar_t dropoutProbability, EActivationFunction f,
186 std::vector<Matrix_t> weights, std::vector<Matrix_t> biases);
187
188 /*! Function for adding Compression Layer in the Deep Neural Network, when
189 * the layer is already created. */
190 void AddCompressionLayer(TCompressionLayer<Architecture_t> *compressionLayer);
191
192 /*! Function for adding Reconstruction layer in the Deep Neural Network,
193 * with given number of visibleUnits and hiddenUnits. It reconstructs the input units
194 * taking weights and biases from prev layers. Same corruptionLevel and dropoutProbability
195 * must be passed as in corruptionLayer. */
196 TReconstructionLayer<Architecture_t> *AddReconstructionLayer(size_t visibleUnits, size_t hiddenUnits,
197 Scalar_t learningRate, EActivationFunction f,
198 std::vector<Matrix_t> weights,
199 std::vector<Matrix_t> biases, Scalar_t corruptionLevel,
200 Scalar_t dropoutProbability);
201
202 /*! Function for adding Reconstruction Layer in the Deep Neural Network, when
203 * the layer is already created. */
204 void AddReconstructionLayer(TReconstructionLayer<Architecture_t> *reconstructionLayer);
205
206 /*! Function for adding logisticRegressionLayer in the Deep Neural Network,
207 * with given number of inputUnits and outputUnits. It classifies the outputUnits. */
208 TLogisticRegressionLayer<Architecture_t> *AddLogisticRegressionLayer(size_t inputUnits, size_t outputUnits,
209 size_t testDataBatchSize,
210 Scalar_t learningRate);
211
212 /*! Function for adding logisticRegressionLayer in the Deep Neural Network, when
213 * the layer is already created. */
214 void AddLogisticRegressionLayer(TLogisticRegressionLayer<Architecture_t> *logisticRegressionLayer);
215
216 /* To train the Deep AutoEncoder network with required number of Corruption, Compression and Reconstruction
217 * layers. */
218 void PreTrain(std::vector<Matrix_t> &input, std::vector<size_t> numHiddenUnitsPerLayer, Scalar_t learningRate,
219 Scalar_t corruptionLevel, Scalar_t dropoutProbability, size_t epochs, EActivationFunction f,
220 bool applyDropout = false);
221
222 /* To classify outputLabel in Deep AutoEncoder. Should be used after PreTrain if required.
223 * Currently, it used Logistic Regression Layer. Otherwise we can use any other classification layer also.
224 */
225 void FineTune(std::vector<Matrix_t> &input, std::vector<Matrix_t> &testInput, std::vector<Matrix_t> &outputLabel,
226 size_t outputUnits, size_t testDataBatchSize, Scalar_t learningRate, size_t epochs);
227#endif
228
229 /*! Function for initialization of the Neural Net. */
231
232 /*! Function that executes the entire forward pass in the network. */
233 void Forward(std::vector<Matrix_t> &input, bool applyDropout = false);
234
235 /*! Function for parallel forward in the vector of deep nets, where the master
236 * net is the net calling this function. There is one batch for one deep net.*/
238 std::vector<TTensorBatch<Architecture_t>> &batches, bool applyDropout = false);
239
240 /*! Function that executes the entire backward pass in the network. */
241 void Backward(std::vector<Matrix_t> &input, const Matrix_t &groundTruth, const Matrix_t &weights);
242
243
244 /*! Function for parallel backward in the vector of deep nets, where the master
245 * net is the net calling this function and getting the updates from the other nets.
246 * There is one batch for one deep net.*/
248 std::vector<TTensorBatch<Architecture_t>> &batches, Scalar_t learningRate);
249
250 /*! Function for parallel backward in the vector of deep nets, where the master
251 * net is the net calling this function and getting the updates from the other nets,
252 * following the momentum strategy. There is one batch for one deep net.*/
254 std::vector<TTensorBatch<Architecture_t>> &batches, Scalar_t learningRate,
255 Scalar_t momentum);
256
257 /*! Function for parallel backward in the vector of deep nets, where the master
258 * net is the net calling this function and getting the updates from the other nets,
259 * following the Nestorov momentum strategy. There is one batch for one deep net.*/
261 std::vector<TTensorBatch<Architecture_t>> &batches, Scalar_t learningRate,
262 Scalar_t momentum);
263
264 /*! Function that will update the weights and biases in the layers that
265 * contain weights and biases. */
266 void Update(Scalar_t learningRate);
267
268 /*! Function for evaluating the loss, based on the activations stored
269 * in the last layer. */
270 Scalar_t Loss(const Matrix_t &groundTruth, const Matrix_t &weights, bool includeRegularization = true) const;
271
272 /*! Function for evaluating the loss, based on the propagation of the given input. */
273 Scalar_t Loss(std::vector<Matrix_t> &input, const Matrix_t &groundTruth, const Matrix_t &weights,
274 bool applyDropout = false, bool includeRegularization = true);
275
276 /*! Function for computing the regularizaton term to be added to the loss function */
278
279 /*! Prediction based on activations stored in the last layer. */
280 void Prediction(Matrix_t &predictions, EOutputFunction f) const;
281
282 /*! Prediction for the given inputs, based on what network learned. */
283 void Prediction(Matrix_t &predictions, std::vector<Matrix_t> input, EOutputFunction f);
284
285 /*! Print the Deep Net Info */
286 void Print() const;
287
288 /*! Get the layer in the vector of layers at poistion i */
289 inline Layer_t *GetLayerAt(size_t i) { return fLayers[i]; }
290 inline const Layer_t *GetLayerAt(size_t i) const { return fLayers[i]; }
291
292 /* Depth and the output width of the network. */
293 inline size_t GetDepth() const { return fLayers.size(); }
294 inline size_t GetOutputWidth() const { return fLayers.back()->GetWidth(); }
295
296 /* Return a reference to the layers. */
297 inline std::vector<Layer_t *> &GetLayers() { return fLayers; }
298 inline const std::vector<Layer_t *> &GetLayers() const { return fLayers; }
299
300 /*! Remove all layers from the network. */
301 inline void Clear() { fLayers.clear(); }
302
303 /*! Getters */
304 inline size_t GetBatchSize() const { return fBatchSize; }
305 inline size_t GetInputDepth() const { return fInputDepth; }
306 inline size_t GetInputHeight() const { return fInputHeight; }
307 inline size_t GetInputWidth() const { return fInputWidth; }
308
309 inline size_t GetBatchDepth() const { return fBatchDepth; }
310 inline size_t GetBatchHeight() const { return fBatchHeight; }
311 inline size_t GetBatchWidth() const { return fBatchWidth; }
312
313 inline bool IsTraining() const { return fIsTraining; }
314
315 inline ELossFunction GetLossFunction() const { return fJ; }
316 inline EInitialization GetInitialization() const { return fI; }
317 inline ERegularization GetRegularization() const { return fR; }
318 inline Scalar_t GetWeightDecay() const { return fWeightDecay; }
319
320 /*! Setters */
321 // FIXME many of these won't work as the data structure storing activations
322 // and gradients have not changed in all the layers, also params in layers
323 // have not changed either
324 inline void SetBatchSize(size_t batchSize) { fBatchSize = batchSize; }
325 inline void SetInputDepth(size_t inputDepth) { fInputDepth = inputDepth; }
326 inline void SetInputHeight(size_t inputHeight) { fInputHeight = inputHeight; }
327 inline void SetInputWidth(size_t inputWidth) { fInputWidth = inputWidth; }
328 inline void SetBatchDepth(size_t batchDepth) { fBatchDepth = batchDepth; }
329 inline void SetBatchHeight(size_t batchHeight) { fBatchHeight = batchHeight; }
330 inline void SetBatchWidth(size_t batchWidth) { fBatchWidth = batchWidth; }
331 inline void SetLossFunction(ELossFunction J) { fJ = J; }
335
336 void SetDropoutProbabilities(const std::vector<Double_t> & probabilities);
337
338};
339
340//
341// Deep Net Class - Implementation
342//
343//______________________________________________________________________________
344template <typename Architecture_t, typename Layer_t>
346 : fLayers(), fBatchSize(0), fInputDepth(0), fInputHeight(0), fInputWidth(0), fBatchDepth(0), fBatchHeight(0),
347 fBatchWidth(0), fJ(ELossFunction::kMeanSquaredError), fI(EInitialization::kZero), fR(ERegularization::kNone),
348 fIsTraining(true), fWeightDecay(0.0)
349{
350 // Nothing to do here.
351}
352
353//______________________________________________________________________________
354template <typename Architecture_t, typename Layer_t>
355TDeepNet<Architecture_t, Layer_t>::TDeepNet(size_t batchSize, size_t inputDepth, size_t inputHeight, size_t inputWidth,
356 size_t batchDepth, size_t batchHeight, size_t batchWidth, ELossFunction J,
358 : fLayers(), fBatchSize(batchSize), fInputDepth(inputDepth), fInputHeight(inputHeight), fInputWidth(inputWidth),
359 fBatchDepth(batchDepth), fBatchHeight(batchHeight), fBatchWidth(batchWidth), fIsTraining(isTraining), fJ(J), fI(I),
360 fR(R), fWeightDecay(weightDecay)
361{
362 // Nothing to do here.
363}
364
365//______________________________________________________________________________
366template <typename Architecture_t, typename Layer_t>
368 : fLayers(), fBatchSize(deepNet.fBatchSize), fInputDepth(deepNet.fInputDepth), fInputHeight(deepNet.fInputHeight),
369 fInputWidth(deepNet.fInputWidth), fBatchDepth(deepNet.fBatchDepth), fBatchHeight(deepNet.fBatchHeight),
370 fBatchWidth(deepNet.fBatchWidth), fIsTraining(deepNet.fIsTraining), fJ(deepNet.fJ), fI(deepNet.fI), fR(deepNet.fR),
371 fWeightDecay(deepNet.fWeightDecay)
372{
373 // Nothing to do here.
374}
375
376//______________________________________________________________________________
377template <typename Architecture_t, typename Layer_t>
379{
380 // Relese the layers memory
381}
382
383//______________________________________________________________________________
384template <typename Architecture_t, typename Layer_t>
385auto TDeepNet<Architecture_t, Layer_t>::calculateDimension(int imgDim, int fltDim, int padding, int stride) -> size_t
386{
387 Scalar_t dimension = ((imgDim - fltDim + 2 * padding) / stride) + 1;
388 if (!isInteger(dimension) || dimension <= 0) {
389 this->Print();
390 int iLayer = fLayers.size();
391 Fatal("calculateDimension","Not compatible hyper parameters for layer %d - (imageDim, filterDim, padding, stride) %d , %d , %d , %d",
392 iLayer, imgDim, fltDim, padding, stride);
393 // std::cout << " calculateDimension - Not compatible hyper parameters (imgDim, fltDim, padding, stride)"
394 // << imgDim << " , " << fltDim << " , " << padding << " , " << stride<< " resulting dim is " << dimension << std::endl;
395 // std::exit(EXIT_FAILURE);
396 }
397
398 return (size_t)dimension;
399}
400
401//______________________________________________________________________________
402template <typename Architecture_t, typename Layer_t>
404 size_t filterWidth, size_t strideRows,
405 size_t strideCols, size_t paddingHeight,
406 size_t paddingWidth, EActivationFunction f,
407 Scalar_t dropoutProbability)
408{
409 // All variables defining a convolutional layer
410 size_t batchSize = this->GetBatchSize();
411 size_t inputDepth;
412 size_t inputHeight;
413 size_t inputWidth;
414 EInitialization init = this->GetInitialization();
415 ERegularization reg = this->GetRegularization();
416 Scalar_t decay = this->GetWeightDecay();
417
418 if (fLayers.size() == 0) {
419 inputDepth = this->GetInputDepth();
420 inputHeight = this->GetInputHeight();
421 inputWidth = this->GetInputWidth();
422 } else {
423 Layer_t *lastLayer = fLayers.back();
424 inputDepth = lastLayer->GetDepth();
425 inputHeight = lastLayer->GetHeight();
426 inputWidth = lastLayer->GetWidth();
427 }
428
429
430
431 // Create the conv layer
433 batchSize, inputDepth, inputHeight, inputWidth, depth, init, filterHeight, filterWidth, strideRows,
434 strideCols, paddingHeight, paddingWidth, dropoutProbability, f, reg, decay);
435
436 fLayers.push_back(convLayer);
437 return convLayer;
438}
439
440//______________________________________________________________________________
441template <typename Architecture_t, typename Layer_t>
443{
444 fLayers.push_back(convLayer);
445}
446
447//______________________________________________________________________________
448template <typename Architecture_t, typename Layer_t>
450 size_t strideRows, size_t strideCols,
451 Scalar_t dropoutProbability)
452{
453 size_t batchSize = this->GetBatchSize();
454 size_t inputDepth;
455 size_t inputHeight;
456 size_t inputWidth;
457
458 if (fLayers.size() == 0) {
459 inputDepth = this->GetInputDepth();
460 inputHeight = this->GetInputHeight();
461 inputWidth = this->GetInputWidth();
462 } else {
463 Layer_t *lastLayer = fLayers.back();
464 inputDepth = lastLayer->GetDepth();
465 inputHeight = lastLayer->GetHeight();
466 inputWidth = lastLayer->GetWidth();
467 }
468
470 batchSize, inputDepth, inputHeight, inputWidth, frameHeight, frameWidth,
471 strideRows, strideCols, dropoutProbability);
472
473 // But this creates a copy or what?
474 fLayers.push_back(maxPoolLayer);
475
476 return maxPoolLayer;
477}
478
479//______________________________________________________________________________
480template <typename Architecture_t, typename Layer_t>
482{
483 fLayers.push_back(maxPoolLayer);
484}
485
486//______________________________________________________________________________
487template <typename Architecture_t, typename Layer_t>
489 size_t timeSteps,
490 bool rememberState)
491{
492
493 // should check if input and time size are consistent
494
495 //std::cout << "Create RNN " << fLayers.size() << " " << this->GetInputHeight() << " " << this->GetInputWidth() << std::endl;
496 size_t inputHeight, inputWidth;
497 if (fLayers.size() == 0) {
498 inputHeight = this->GetInputHeight();
499 inputWidth = this->GetInputWidth();
500 } else {
501 Layer_t *lastLayer = fLayers.back();
502 inputHeight = lastLayer->GetHeight();
503 inputWidth = lastLayer->GetWidth();
504 }
505 if (inputSize != inputWidth) {
506 Error("AddBasicRNNLayer","Inconsistent input size with input layout - it should be %zu instead of %zu",inputSize, inputWidth);
507 }
508 if (timeSteps != inputHeight) {
509 Error("AddBasicRNNLayer","Inconsistent time steps with input layout - it should be %zu instead of %zu",timeSteps, inputHeight);
510 }
511
512 TBasicRNNLayer<Architecture_t> *basicRNNLayer =
513 new TBasicRNNLayer<Architecture_t>(this->GetBatchSize(), stateSize, inputSize, timeSteps, rememberState,
514 DNN::EActivationFunction::kTanh, fIsTraining, this->GetInitialization());
515 fLayers.push_back(basicRNNLayer);
516 return basicRNNLayer;
517}
518
519//______________________________________________________________________________
520template <typename Architecture_t, typename Layer_t>
522{
523 fLayers.push_back(basicRNNLayer);
524}
525
526//DAE
527#ifdef HAVE_DAE
528
529//______________________________________________________________________________
530template <typename Architecture_t, typename Layer_t>
531TCorruptionLayer<Architecture_t> *TDeepNet<Architecture_t, Layer_t>::AddCorruptionLayer(size_t visibleUnits,
532 size_t hiddenUnits,
533 Scalar_t dropoutProbability,
534 Scalar_t corruptionLevel)
535{
536 size_t batchSize = this->GetBatchSize();
537
538 TCorruptionLayer<Architecture_t> *corruptionLayer =
539 new TCorruptionLayer<Architecture_t>(batchSize, visibleUnits, hiddenUnits, dropoutProbability, corruptionLevel);
540 fLayers.push_back(corruptionLayer);
541 return corruptionLayer;
542}
543//______________________________________________________________________________
544
545template <typename Architecture_t, typename Layer_t>
546void TDeepNet<Architecture_t, Layer_t>::AddCorruptionLayer(TCorruptionLayer<Architecture_t> *corruptionLayer)
547{
548 fLayers.push_back(corruptionLayer);
549}
550
551//______________________________________________________________________________
552template <typename Architecture_t, typename Layer_t>
553TCompressionLayer<Architecture_t> *TDeepNet<Architecture_t, Layer_t>::AddCompressionLayer(
554 size_t visibleUnits, size_t hiddenUnits, Scalar_t dropoutProbability, EActivationFunction f,
555 std::vector<Matrix_t> weights, std::vector<Matrix_t> biases)
556{
557 size_t batchSize = this->GetBatchSize();
558
559 TCompressionLayer<Architecture_t> *compressionLayer = new TCompressionLayer<Architecture_t>(
560 batchSize, visibleUnits, hiddenUnits, dropoutProbability, f, weights, biases);
561 fLayers.push_back(compressionLayer);
562 return compressionLayer;
563}
564//______________________________________________________________________________
565
566template <typename Architecture_t, typename Layer_t>
567void TDeepNet<Architecture_t, Layer_t>::AddCompressionLayer(TCompressionLayer<Architecture_t> *compressionLayer)
568{
569 fLayers.push_back(compressionLayer);
570}
571
572//______________________________________________________________________________
573template <typename Architecture_t, typename Layer_t>
574TReconstructionLayer<Architecture_t> *TDeepNet<Architecture_t, Layer_t>::AddReconstructionLayer(
575 size_t visibleUnits, size_t hiddenUnits, Scalar_t learningRate, EActivationFunction f, std::vector<Matrix_t> weights,
576 std::vector<Matrix_t> biases, Scalar_t corruptionLevel, Scalar_t dropoutProbability)
577{
578 size_t batchSize = this->GetBatchSize();
579
580 TReconstructionLayer<Architecture_t> *reconstructionLayer = new TReconstructionLayer<Architecture_t>(
581 batchSize, visibleUnits, hiddenUnits, learningRate, f, weights, biases, corruptionLevel, dropoutProbability);
582 fLayers.push_back(reconstructionLayer);
583 return reconstructionLayer;
584}
585//______________________________________________________________________________
586
587template <typename Architecture_t, typename Layer_t>
588void TDeepNet<Architecture_t, Layer_t>::AddReconstructionLayer(
589 TReconstructionLayer<Architecture_t> *reconstructionLayer)
590{
591 fLayers.push_back(reconstructionLayer);
592}
593
594//______________________________________________________________________________
595template <typename Architecture_t, typename Layer_t>
596TLogisticRegressionLayer<Architecture_t> *TDeepNet<Architecture_t, Layer_t>::AddLogisticRegressionLayer(
597 size_t inputUnits, size_t outputUnits, size_t testDataBatchSize, Scalar_t learningRate)
598{
599 size_t batchSize = this->GetBatchSize();
600
601 TLogisticRegressionLayer<Architecture_t> *logisticRegressionLayer =
602 new TLogisticRegressionLayer<Architecture_t>(batchSize, inputUnits, outputUnits, testDataBatchSize, learningRate);
603 fLayers.push_back(logisticRegressionLayer);
604 return logisticRegressionLayer;
605}
606//______________________________________________________________________________
607template <typename Architecture_t, typename Layer_t>
608void TDeepNet<Architecture_t, Layer_t>::AddLogisticRegressionLayer(
609 TLogisticRegressionLayer<Architecture_t> *logisticRegressionLayer)
610{
611 fLayers.push_back(logisticRegressionLayer);
612}
613#endif
614
615
616//______________________________________________________________________________
617template <typename Architecture_t, typename Layer_t>
619 Scalar_t dropoutProbability)
620{
621 size_t batchSize = this->GetBatchSize();
622 size_t inputWidth;
623 EInitialization init = this->GetInitialization();
624 ERegularization reg = this->GetRegularization();
625 Scalar_t decay = this->GetWeightDecay();
626
627 if (fLayers.size() == 0) {
628 inputWidth = this->GetInputWidth();
629 } else {
630 Layer_t *lastLayer = fLayers.back();
631 inputWidth = lastLayer->GetWidth();
632 }
633
634 TDenseLayer<Architecture_t> *denseLayer =
635 new TDenseLayer<Architecture_t>(batchSize, inputWidth, width, init, dropoutProbability, f, reg, decay);
636
637 fLayers.push_back(denseLayer);
638
639 return denseLayer;
640}
641
642//______________________________________________________________________________
643template <typename Architecture_t, typename Layer_t>
645{
646 fLayers.push_back(denseLayer);
647}
648
649//______________________________________________________________________________
650template <typename Architecture_t, typename Layer_t>
652 size_t width, bool flattening)
653{
654 size_t batchSize = this->GetBatchSize();
655 size_t inputDepth;
656 size_t inputHeight;
657 size_t inputWidth;
658 size_t outputNSlices;
659 size_t outputNRows;
660 size_t outputNCols;
661
662 if (fLayers.size() == 0) {
663 inputDepth = this->GetInputDepth();
664 inputHeight = this->GetInputHeight();
665 inputWidth = this->GetInputWidth();
666 } else {
667 Layer_t *lastLayer = fLayers.back();
668 inputDepth = lastLayer->GetDepth();
669 inputHeight = lastLayer->GetHeight();
670 inputWidth = lastLayer->GetWidth();
671 }
672
673 if (flattening) {
674 outputNSlices = 1;
675 outputNRows = this->GetBatchSize();
676 outputNCols = depth * height * width;
677 size_t inputNCols = inputDepth * inputHeight * inputWidth;
678 if (outputNCols != 0 && outputNCols != inputNCols ) {
679 Info("AddReshapeLayer","Dimensions not compatibles - product of input %zu x %zu x %zu should be equal to output %zu x %zu x %zu - Force flattening output to be %zu",
680 inputDepth, inputHeight, inputWidth, depth, height, width,inputNCols);
681 }
682 outputNCols = inputNCols;
683 depth = 1;
684 height = 1;
685 width = outputNCols;
686 } else {
687 outputNSlices = this->GetBatchSize();
688 outputNRows = depth;
689 outputNCols = height * width;
690 }
691
692 TReshapeLayer<Architecture_t> *reshapeLayer =
693 new TReshapeLayer<Architecture_t>(batchSize, inputDepth, inputHeight, inputWidth, depth, height, width,
694 outputNSlices, outputNRows, outputNCols, flattening);
695
696 fLayers.push_back(reshapeLayer);
697
698 return reshapeLayer;
699}
700
701//______________________________________________________________________________
702template <typename Architecture_t, typename Layer_t>
704{
705 fLayers.push_back(reshapeLayer);
706}
707
708//______________________________________________________________________________
709template <typename Architecture_t, typename Layer_t>
711{
712 for (size_t i = 0; i < fLayers.size(); i++) {
713 fLayers[i]->Initialize();
714 }
715}
716
717template <typename Architecture>
718auto debugTensor(const std::vector<typename Architecture::Matrix_t> &A, const std::string name = "tensor") -> void
719{
720 std::cout << name << "\n";
721 for (size_t l = 0; l < A.size(); ++l) {
722 for (size_t i = 0; i < A[l].GetNrows(); ++i) {
723 for (size_t j = 0; j < A[l].GetNcols(); ++j) {
724 std::cout << A[l](i, j) << " ";
725 }
726 std::cout << "\n";
727 }
728 std::cout << "********\n";
729 }
730}
731
732//______________________________________________________________________________
733template <typename Architecture_t, typename Layer_t>
734auto TDeepNet<Architecture_t, Layer_t>::Forward(std::vector<Matrix_t> &input, bool applyDropout) -> void
735{
736 fLayers.front()->Forward(input, applyDropout);
737
738 for (size_t i = 1; i < fLayers.size(); i++) {
739 fLayers[i]->Forward(fLayers[i - 1]->GetOutput(), applyDropout);
740 }
741}
742
743//______________________________________________________________________________
744template <typename Architecture_t, typename Layer_t>
746 std::vector<TTensorBatch<Architecture_t>> &batches,
747 bool applyDropout) -> void
748{
749 size_t depth = this->GetDepth();
750
751 // The first layer of each deep net
752 for (size_t i = 0; i < nets.size(); i++) {
753 nets[i].GetLayerAt(0)->Forward(batches[i].GetInput(), applyDropout);
754 }
755
756 // The i'th layer of each deep net
757 for (size_t i = 1; i < depth; i++) {
758 for (size_t j = 0; j < nets.size(); j++) {
759 nets[j].GetLayerAt(i)->Forward(nets[j].GetLayerAt(i - 1)->GetOutput(), applyDropout);
760 }
761 }
762}
763
764#ifdef HAVE_DAE
765//_____________________________________________________________________________
766template <typename Architecture_t, typename Layer_t>
767auto TDeepNet<Architecture_t, Layer_t>::PreTrain(std::vector<Matrix_t> &input,
768 std::vector<size_t> numHiddenUnitsPerLayer, Scalar_t learningRate,
769 Scalar_t corruptionLevel, Scalar_t dropoutProbability, size_t epochs,
770 EActivationFunction f, bool applyDropout) -> void
771{
772 std::vector<Matrix_t> inp1;
773 std::vector<Matrix_t> inp2;
774 size_t numOfHiddenLayers = sizeof(numHiddenUnitsPerLayer) / sizeof(numHiddenUnitsPerLayer[0]);
775 // size_t batchSize = this->GetBatchSize();
776 size_t visibleUnits = (size_t)input[0].GetNrows();
777
778 AddCorruptionLayer(visibleUnits, numHiddenUnitsPerLayer[0], dropoutProbability, corruptionLevel);
779 fLayers.back()->Initialize();
780 fLayers.back()->Forward(input, applyDropout);
781 // fLayers.back()->Print();
782
783 AddCompressionLayer(visibleUnits, numHiddenUnitsPerLayer[0], dropoutProbability, f, fLayers.back()->GetWeights(),
784 fLayers.back()->GetBiases());
785 fLayers.back()->Initialize();
786 fLayers.back()->Forward(fLayers[fLayers.size() - 2]->GetOutput(), applyDropout); // as we have to pass corrupt input
787
788 AddReconstructionLayer(visibleUnits, numHiddenUnitsPerLayer[0], learningRate, f, fLayers.back()->GetWeights(),
789 fLayers.back()->GetBiases(), corruptionLevel, dropoutProbability);
790 fLayers.back()->Initialize();
791 fLayers.back()->Forward(fLayers[fLayers.size() - 2]->GetOutput(),
792 applyDropout); // as we have to pass compressed Input
793 fLayers.back()->Backward(fLayers[fLayers.size() - 2]->GetOutput(), inp1, fLayers[fLayers.size() - 3]->GetOutput(),
794 input);
795 // three layers are added, now pointer is on third layer
796 size_t weightsSize = fLayers.back()->GetWeights().size();
797 size_t biasesSize = fLayers.back()->GetBiases().size();
798 for (size_t epoch = 0; epoch < epochs - 1; epoch++) {
799 // fLayers[fLayers.size() - 3]->Forward(input,applyDropout);
800 for (size_t j = 0; j < weightsSize; j++) {
801 Architecture_t::Copy(fLayers[fLayers.size() - 2]->GetWeightsAt(j), fLayers.back()->GetWeightsAt(j));
802 }
803 for (size_t j = 0; j < biasesSize; j++) {
804 Architecture_t::Copy(fLayers[fLayers.size() - 2]->GetBiasesAt(j), fLayers.back()->GetBiasesAt(j));
805 }
806 fLayers[fLayers.size() - 2]->Forward(fLayers[fLayers.size() - 3]->GetOutput(), applyDropout);
807 fLayers[fLayers.size() - 1]->Forward(fLayers[fLayers.size() - 2]->GetOutput(), applyDropout);
808 fLayers[fLayers.size() - 1]->Backward(fLayers[fLayers.size() - 2]->GetOutput(), inp1,
809 fLayers[fLayers.size() - 3]->GetOutput(), input);
810 }
811 fLayers.back()->Print();
812
813 for (size_t i = 1; i < numOfHiddenLayers; i++) {
814
815 AddCorruptionLayer(numHiddenUnitsPerLayer[i - 1], numHiddenUnitsPerLayer[i], dropoutProbability, corruptionLevel);
816 fLayers.back()->Initialize();
817 fLayers.back()->Forward(fLayers[fLayers.size() - 3]->GetOutput(),
818 applyDropout); // as we have to pass compressed Input
819
820 AddCompressionLayer(numHiddenUnitsPerLayer[i - 1], numHiddenUnitsPerLayer[i], dropoutProbability, f,
821 fLayers.back()->GetWeights(), fLayers.back()->GetBiases());
822 fLayers.back()->Initialize();
823 fLayers.back()->Forward(fLayers[fLayers.size() - 2]->GetOutput(), applyDropout);
824
825 AddReconstructionLayer(numHiddenUnitsPerLayer[i - 1], numHiddenUnitsPerLayer[i], learningRate, f,
826 fLayers.back()->GetWeights(), fLayers.back()->GetBiases(), corruptionLevel,
827 dropoutProbability);
828 fLayers.back()->Initialize();
829 fLayers.back()->Forward(fLayers[fLayers.size() - 2]->GetOutput(),
830 applyDropout); // as we have to pass compressed Input
831 fLayers.back()->Backward(fLayers[fLayers.size() - 2]->GetOutput(), inp1, fLayers[fLayers.size() - 3]->GetOutput(),
832 fLayers[fLayers.size() - 5]->GetOutput());
833
834 // three layers are added, now pointer is on third layer
835 size_t _weightsSize = fLayers.back()->GetWeights().size();
836 size_t _biasesSize = fLayers.back()->GetBiases().size();
837 for (size_t epoch = 0; epoch < epochs - 1; epoch++) {
838 // fLayers[fLayers.size() - 3]->Forward(input,applyDropout);
839 for (size_t j = 0; j < _weightsSize; j++) {
840 Architecture_t::Copy(fLayers[fLayers.size() - 2]->GetWeightsAt(j), fLayers.back()->GetWeightsAt(j));
841 }
842 for (size_t j = 0; j < _biasesSize; j++) {
843 Architecture_t::Copy(fLayers[fLayers.size() - 2]->GetBiasesAt(j), fLayers.back()->GetBiasesAt(j));
844 }
845 fLayers[fLayers.size() - 2]->Forward(fLayers[fLayers.size() - 3]->GetOutput(), applyDropout);
846 fLayers[fLayers.size() - 1]->Forward(fLayers[fLayers.size() - 2]->GetOutput(), applyDropout);
847 fLayers[fLayers.size() - 1]->Backward(fLayers[fLayers.size() - 2]->GetOutput(), inp1,
848 fLayers[fLayers.size() - 3]->GetOutput(),
849 fLayers[fLayers.size() - 5]->GetOutput());
850 }
851 fLayers.back()->Print();
852 }
853}
854
855//______________________________________________________________________________
856template <typename Architecture_t, typename Layer_t>
857auto TDeepNet<Architecture_t, Layer_t>::FineTune(std::vector<Matrix_t> &input, std::vector<Matrix_t> &testInput,
858 std::vector<Matrix_t> &inputLabel, size_t outputUnits,
859 size_t testDataBatchSize, Scalar_t learningRate, size_t epochs) -> void
860{
861 std::vector<Matrix_t> inp1;
862 std::vector<Matrix_t> inp2;
863 if (fLayers.size() == 0) // only Logistic Regression Layer
864 {
865 size_t inputUnits = input[0].GetNrows();
866
867 AddLogisticRegressionLayer(inputUnits, outputUnits, testDataBatchSize, learningRate);
868 fLayers.back()->Initialize();
869 for (size_t i = 0; i < epochs; i++) {
870 fLayers.back()->Backward(inputLabel, inp1, input, inp2);
871 }
872 fLayers.back()->Forward(input, false);
873 fLayers.back()->Print();
874 } else { // if used after any other layer
875 size_t inputUnits = fLayers.back()->GetOutputAt(0).GetNrows();
876 AddLogisticRegressionLayer(inputUnits, outputUnits, testDataBatchSize, learningRate);
877 fLayers.back()->Initialize();
878 for (size_t i = 0; i < epochs; i++) {
879 fLayers.back()->Backward(inputLabel, inp1, fLayers[fLayers.size() - 2]->GetOutput(), inp2);
880 }
881 fLayers.back()->Forward(testInput, false);
882 fLayers.back()->Print();
883 }
884}
885#endif
886
887//______________________________________________________________________________
888template <typename Architecture_t, typename Layer_t>
889auto TDeepNet<Architecture_t, Layer_t>::Backward(std::vector<Matrix_t> &input, const Matrix_t &groundTruth,
890 const Matrix_t &weights) -> void
891{
892 std::vector<Matrix_t> inp1;
893 std::vector<Matrix_t> inp2;
894 // Last layer should be dense layer
895 evaluateGradients<Architecture_t>(fLayers.back()->GetActivationGradientsAt(0), this->GetLossFunction(), groundTruth,
896 fLayers.back()->GetOutputAt(0), weights);
897 for (size_t i = fLayers.size() - 1; i > 0; i--) {
898 std::vector<Matrix_t> &activation_gradient_backward = fLayers[i - 1]->GetActivationGradients();
899 std::vector<Matrix_t> &activations_backward = fLayers[i - 1]->GetOutput();
900 fLayers[i]->Backward(activation_gradient_backward, activations_backward, inp1, inp2);
901 }
902
903 // need to have a dummy tensor (size=0) to pass for activation gradient backward which
904 // are not computed for the first layer
905 std::vector<Matrix_t> dummy;
906 fLayers[0]->Backward(dummy, input, inp1, inp2);
907}
908
909//______________________________________________________________________________
910template <typename Architecture_t, typename Layer_t>
912 std::vector<TTensorBatch<Architecture_t>> &batches,
913 Scalar_t learningRate) -> void
914{
915 std::vector<Matrix_t> inp1;
916 std::vector<Matrix_t> inp2;
917 size_t depth = this->GetDepth();
918
919 // Evaluate the gradients of the last layers in each deep net
920 for (size_t i = 0; i < nets.size(); i++) {
921 evaluateGradients<Architecture_t>(nets[i].GetLayerAt(depth - 1)->GetActivationGradientsAt(0),
922 nets[i].GetLossFunction(), batches[i].GetOutput(),
923 nets[i].GetLayerAt(depth - 1)->GetOutputAt(0), batches[i].GetWeights());
924 }
925
926 // Backpropagate the error in i'th layer of each deep net
927 for (size_t i = depth - 1; i > 0; i--) {
928 for (size_t j = 0; j < nets.size(); j++) {
929 nets[j].GetLayerAt(i)->Backward(nets[j].GetLayerAt(i - 1)->GetActivationGradients(),
930 nets[j].GetLayerAt(i - 1)->GetOutput(), inp1, inp2);
931 }
932 }
933
934 std::vector<Matrix_t> dummy;
935
936 // First layer of each deep net
937 for (size_t i = 0; i < nets.size(); i++) {
938 nets[i].GetLayerAt(0)->Backward(dummy, batches[i].GetInput(), inp1, inp2);
939 }
940
941 // Update and copy
942 for (size_t i = 0; i < nets.size(); i++) {
943 for (size_t j = 0; j < depth; j++) {
944 Layer_t *masterLayer = this->GetLayerAt(j);
945 Layer_t *layer = nets[i].GetLayerAt(j);
946
947 masterLayer->UpdateWeights(layer->GetWeightGradients(), learningRate);
948 layer->CopyWeights(masterLayer->GetWeights());
949
950 masterLayer->UpdateBiases(layer->GetBiasGradients(), learningRate);
951 layer->CopyBiases(masterLayer->GetBiases());
952 }
953 }
954}
955
956//______________________________________________________________________________
957template <typename Architecture_t, typename Layer_t>
959 std::vector<TTensorBatch<Architecture_t>> &batches,
960 Scalar_t learningRate, Scalar_t momentum) -> void
961{
962 std::vector<Matrix_t> inp1;
963 std::vector<Matrix_t> inp2;
964 size_t depth = this->GetDepth();
965
966 // Evaluate the gradients of the last layers in each deep net
967 for (size_t i = 0; i < nets.size(); i++) {
968 evaluateGradients<Architecture_t>(nets[i].GetLayerAt(depth - 1)->GetActivationGradientsAt(0),
969 nets[i].GetLossFunction(), batches[i].GetOutput(),
970 nets[i].GetLayerAt(depth - 1)->GetOutputAt(0), batches[i].GetWeights());
971 }
972
973 // Backpropagate the error in i'th layer of each deep net
974 for (size_t i = depth - 1; i > 0; i--) {
975 Layer_t *masterLayer = this->GetLayerAt(i);
976
977 for (size_t j = 0; j < nets.size(); j++) {
978 Layer_t *layer = nets[j].GetLayerAt(i);
979
980 layer->Backward(nets[j].GetLayerAt(i - 1)->GetActivationGradients(), nets[j].GetLayerAt(i - 1)->GetOutput(),
981 inp1, inp2);
982 masterLayer->UpdateWeightGradients(layer->GetWeightGradients(), learningRate / momentum);
983 masterLayer->UpdateBiasGradients(layer->GetBiasGradients(), learningRate / momentum);
984 }
985
986 masterLayer->UpdateWeightGradients(masterLayer->GetWeightGradients(), 1.0 - momentum);
987 masterLayer->UpdateBiasGradients(masterLayer->GetBiasGradients(), 1.0 - momentum);
988 }
989
990 std::vector<Matrix_t> dummy;
991
992 // First layer of each deep net
993 Layer_t *masterFirstLayer = this->GetLayerAt(0);
994 for (size_t i = 0; i < nets.size(); i++) {
995 Layer_t *layer = nets[i].GetLayerAt(0);
996
997 layer->Backward(dummy, batches[i].GetInput(), inp1, inp2);
998
999 masterFirstLayer->UpdateWeightGradients(layer->GetWeightGradients(), learningRate / momentum);
1000 masterFirstLayer->UpdateBiasGradients(layer->GetBiasGradients(), learningRate / momentum);
1001 }
1002
1003 masterFirstLayer->UpdateWeightGradients(masterFirstLayer->GetWeightGradients(), 1.0 - momentum);
1004 masterFirstLayer->UpdateBiasGradients(masterFirstLayer->GetBiasGradients(), 1.0 - momentum);
1005
1006 for (size_t i = 0; i < depth; i++) {
1007 Layer_t *masterLayer = this->GetLayerAt(i);
1008 masterLayer->Update(1.0);
1009
1010 for (size_t j = 0; j < nets.size(); j++) {
1011 Layer_t *layer = nets[j].GetLayerAt(i);
1012
1013 layer->CopyWeights(masterLayer->GetWeights());
1014 layer->CopyBiases(masterLayer->GetBiases());
1015 }
1016 }
1017}
1018
1019//______________________________________________________________________________
1020template <typename Architecture_t, typename Layer_t>
1022 std::vector<TTensorBatch<Architecture_t>> &batches,
1023 Scalar_t learningRate, Scalar_t momentum) -> void
1024{
1025 std::cout << "Parallel Backward Nestorov" << std::endl;
1026 std::vector<Matrix_t> inp1;
1027 std::vector<Matrix_t> inp2;
1028 size_t depth = this->GetDepth();
1029
1030 // Evaluate the gradients of the last layers in each deep net
1031 for (size_t i = 0; i < nets.size(); i++) {
1032 evaluateGradients<Architecture_t>(nets[i].GetLayerAt(depth - 1)->GetActivationGradientsAt(0),
1033 nets[i].GetLossFunction(), batches[i].GetOutput(),
1034 nets[i].GetLayerAt(depth - 1)->GetOutputAt(0), batches[i].GetWeights());
1035 }
1036
1037 // Backpropagate the error in i'th layer of each deep net
1038 for (size_t i = depth - 1; i > 0; i--) {
1039 for (size_t j = 0; j < nets.size(); j++) {
1040 Layer_t *layer = nets[j].GetLayerAt(i);
1041
1042 layer->Backward(nets[j].GetLayerAt(i - 1)->GetActivationGradients(), nets[j].GetLayerAt(i - 1)->GetOutput(),
1043 inp1, inp2);
1044 }
1045 }
1046
1047 std::vector<Matrix_t> dummy;
1048
1049 // First layer of each deep net
1050 for (size_t i = 0; i < nets.size(); i++) {
1051 Layer_t *layer = nets[i].GetLayerAt(0);
1052 layer->Backward(dummy, batches[i].GetInput(), inp1, inp2);
1053 }
1054
1055 for (size_t i = 0; i < depth; i++) {
1056 Layer_t *masterLayer = this->GetLayerAt(i);
1057 for (size_t j = 0; j < nets.size(); j++) {
1058 Layer_t *layer = nets[j].GetLayerAt(i);
1059
1060 layer->CopyWeights(masterLayer->GetWeights());
1061 layer->CopyBiases(masterLayer->GetBiases());
1062
1063 layer->UpdateWeights(masterLayer->GetWeightGradients(), 1.0);
1064 layer->UpdateBiases(masterLayer->GetBiasGradients(), 1.0);
1065 }
1066
1067 for (size_t j = 0; j < nets.size(); j++) {
1068 Layer_t *layer = nets[j].GetLayerAt(i);
1069
1070 masterLayer->UpdateWeightGradients(layer->GetWeightGradients(), learningRate / momentum);
1071 masterLayer->UpdateBiasGradients(layer->GetBiasGradients(), learningRate / momentum);
1072 }
1073
1074 masterLayer->UpdateWeightGradients(masterLayer->GetWeightGradients(), 1.0 - momentum);
1075 masterLayer->UpdateBiasGradients(masterLayer->GetBiasGradients(), 1.0 - momentum);
1076
1077 masterLayer->Update(1.0);
1078 }
1079}
1080
1081//______________________________________________________________________________
1082template <typename Architecture_t, typename Layer_t>
1084{
1085 for (size_t i = 0; i < fLayers.size(); i++) {
1086 fLayers[i]->Update(learningRate);
1087 }
1088}
1089
1090//______________________________________________________________________________
1091template <typename Architecture_t, typename Layer_t>
1092auto TDeepNet<Architecture_t, Layer_t>::Loss(const Matrix_t &groundTruth, const Matrix_t &weights,
1093 bool includeRegularization) const -> Scalar_t
1094{
1095 // Last layer should not be deep
1096 auto loss = evaluate<Architecture_t>(this->GetLossFunction(), groundTruth, fLayers.back()->GetOutputAt(0), weights);
1097
1098 includeRegularization &= (this->GetRegularization() != ERegularization::kNone);
1099 if (includeRegularization) {
1100 loss += RegularizationTerm();
1101 }
1102
1103 return loss;
1104}
1105
1106//______________________________________________________________________________
1107template <typename Architecture_t, typename Layer_t>
1108auto TDeepNet<Architecture_t, Layer_t>::Loss(std::vector<Matrix_t> &input, const Matrix_t &groundTruth,
1109 const Matrix_t &weights, bool applyDropout, bool includeRegularization)
1110 -> Scalar_t
1111{
1112 Forward(input, applyDropout);
1113 return Loss(groundTruth, weights, includeRegularization);
1114}
1115
1116//______________________________________________________________________________
1117template <typename Architecture_t, typename Layer_t>
1119{
1120 Scalar_t reg = 0.0;
1121 for (size_t i = 0; i < fLayers.size(); i++) {
1122 for (size_t j = 0; j < (fLayers[i]->GetWeights()).size(); j++) {
1123 reg += regularization<Architecture_t>(fLayers[i]->GetWeightsAt(j), this->GetRegularization());
1124 }
1125 }
1126 return this->GetWeightDecay() * reg;
1127}
1128
1129
1130//______________________________________________________________________________
1131template <typename Architecture_t, typename Layer_t>
1133{
1134 // Last layer should not be deep
1135 evaluate<Architecture_t>(predictions, f, fLayers.back()->GetOutputAt(0));
1136}
1137
1138//______________________________________________________________________________
1139template <typename Architecture_t, typename Layer_t>
1140auto TDeepNet<Architecture_t, Layer_t>::Prediction(Matrix_t &predictions, std::vector<Matrix_t> input,
1141 EOutputFunction f) -> void
1142{
1143 Forward(input, false);
1144 // Last layer should not be deep
1145 evaluate<Architecture_t>(predictions, f, fLayers.back()->GetOutputAt(0));
1146}
1147
1148//______________________________________________________________________________
1149template <typename Architecture_t, typename Layer_t>
1151{
1152 std::cout << "DEEP NEURAL NETWORK: Depth = " << this->GetDepth();
1153 std::cout << " Input = ( " << this->GetInputDepth();
1154 std::cout << ", " << this->GetInputHeight();
1155 std::cout << ", " << this->GetInputWidth() << " )";
1156 std::cout << " Batch size = " << this->GetBatchSize();
1157 std::cout << " Loss function = " << static_cast<char>(this->GetLossFunction()) << std::endl;
1158
1159 //std::cout << "\t Layers: " << std::endl;
1160
1161 for (size_t i = 0; i < fLayers.size(); i++) {
1162 std::cout << "\tLayer " << i << "\t";
1163 fLayers[i]->Print();
1164 }
1165}
1166
1167//______________________________________________________________________________
1168template <typename Architecture_t, typename Layer_t>
1170 const std::vector<Double_t> & probabilities)
1171{
1172 for (size_t i = 0; i < fLayers.size(); i++) {
1173 if (i < probabilities.size()) {
1174 fLayers[i]->SetDropoutProbability(probabilities[i]);
1175 } else {
1176 fLayers[i]->SetDropoutProbability(1.0);
1177 }
1178 }
1179}
1180
1181
1182} // namespace DNN
1183} // namespace TMVA
1184
1185#endif
#define f(i)
Definition: RSha256.hxx:104
#define R(a, b, c, d, e, f, g, h, i)
Definition: RSha256.hxx:110
static Int_t init()
static RooMathCoreReg dummy
include TDocParser_001 C image html pict1_TDocParser_001 png width
Definition: TDocParser.cxx:121
void Info(const char *location, const char *msgfmt,...)
void Error(const char *location, const char *msgfmt,...)
void Fatal(const char *location, const char *msgfmt,...)
char name[80]
Definition: TGX11.cxx:109
double floor(double)
Generic Max Pooling Layer class.
Definition: MaxPoolLayer.h:57
Generic Deep Neural Network class.
Definition: DeepNet.h:74
const std::vector< Layer_t * > & GetLayers() const
Definition: DeepNet.h:298
void AddDenseLayer(TDenseLayer< Architecture_t > *denseLayer)
Function for adding Dense Layer in the Deep Neural Network, when the layer is already created.
Definition: DeepNet.h:644
size_t GetBatchHeight() const
Definition: DeepNet.h:310
void SetBatchDepth(size_t batchDepth)
Definition: DeepNet.h:328
void ParallelBackward(std::vector< TDeepNet< Architecture_t, Layer_t > > &nets, std::vector< TTensorBatch< Architecture_t > > &batches, Scalar_t learningRate)
Function for parallel backward in the vector of deep nets, where the master net is the net calling th...
Definition: DeepNet.h:911
void SetLossFunction(ELossFunction J)
Definition: DeepNet.h:331
size_t fBatchHeight
The height of the batch used for training/testing.
Definition: DeepNet.h:92
ERegularization GetRegularization() const
Definition: DeepNet.h:317
std::vector< Layer_t * > & GetLayers()
Definition: DeepNet.h:297
typename Architecture_t::Scalar_t Scalar_t
Definition: DeepNet.h:77
void Initialize()
DAE functions.
Definition: DeepNet.h:710
size_t GetBatchSize() const
Getters.
Definition: DeepNet.h:304
size_t GetDepth() const
Definition: DeepNet.h:293
Scalar_t GetWeightDecay() const
Definition: DeepNet.h:318
size_t GetInputDepth() const
Definition: DeepNet.h:305
std::vector< Layer_t * > fLayers
The layers consisting the DeepNet.
Definition: DeepNet.h:84
size_t fBatchDepth
The depth of the batch used for training/testing.
Definition: DeepNet.h:91
size_t fInputDepth
The depth of the input.
Definition: DeepNet.h:87
Layer_t * GetLayerAt(size_t i)
Get the layer in the vector of layers at poistion i.
Definition: DeepNet.h:289
void Print() const
Print the Deep Net Info.
Definition: DeepNet.h:1150
void SetWeightDecay(Scalar_t weightDecay)
Definition: DeepNet.h:334
void AddReshapeLayer(TReshapeLayer< Architecture_t > *reshapeLayer)
Function for adding Reshape Layer in the Deep Neural Network, when the layer is already created.
Definition: DeepNet.h:703
void Clear()
Remove all layers from the network.
Definition: DeepNet.h:301
Scalar_t RegularizationTerm() const
Function for computing the regularizaton term to be added to the loss function
Definition: DeepNet.h:1118
TDenseLayer< Architecture_t > * AddDenseLayer(size_t width, EActivationFunction f, Scalar_t dropoutProbability=1.0)
Function for adding Dense Connected Layer in the Deep Neural Network, with a given width,...
Definition: DeepNet.h:618
void Prediction(Matrix_t &predictions, std::vector< Matrix_t > input, EOutputFunction f)
Prediction for the given inputs, based on what network learned.
Definition: DeepNet.h:1140
TDeepNet(size_t BatchSize, size_t InputDepth, size_t InputHeight, size_t InputWidth, size_t BatchDepth, size_t BatchHeight, size_t BatchWidth, ELossFunction fJ, EInitialization fI=EInitialization::kZero, ERegularization fR=ERegularization::kNone, Scalar_t fWeightDecay=0.0, bool isTraining=false)
Constructor.
Definition: DeepNet.h:355
void ParallelForward(std::vector< TDeepNet< Architecture_t, Layer_t > > &nets, std::vector< TTensorBatch< Architecture_t > > &batches, bool applyDropout=false)
Function for parallel forward in the vector of deep nets, where the master net is the net calling thi...
Definition: DeepNet.h:745
void SetInputDepth(size_t inputDepth)
Definition: DeepNet.h:325
bool IsTraining() const
Definition: DeepNet.h:313
size_t GetInputHeight() const
Definition: DeepNet.h:306
size_t fBatchSize
Batch size used for training and evaluation.
Definition: DeepNet.h:86
void Prediction(Matrix_t &predictions, EOutputFunction f) const
Prediction based on activations stored in the last layer.
Definition: DeepNet.h:1132
void ParallelBackwardMomentum(std::vector< TDeepNet< Architecture_t, Layer_t > > &nets, std::vector< TTensorBatch< Architecture_t > > &batches, Scalar_t learningRate, Scalar_t momentum)
Function for parallel backward in the vector of deep nets, where the master net is the net calling th...
Definition: DeepNet.h:958
size_t fInputWidth
The width of the input.
Definition: DeepNet.h:89
void SetInputHeight(size_t inputHeight)
Definition: DeepNet.h:326
size_t GetBatchWidth() const
Definition: DeepNet.h:311
void AddBasicRNNLayer(TBasicRNNLayer< Architecture_t > *basicRNNLayer)
Function for adding Vanilla RNN when the layer is already created.
Definition: DeepNet.h:521
void AddMaxPoolLayer(CNN::TMaxPoolLayer< Architecture_t > *maxPoolLayer)
Function for adding Max Pooling layer in the Deep Neural Network, when the layer is already created.
Definition: DeepNet.h:481
TMaxPoolLayer< Architecture_t > * AddMaxPoolLayer(size_t frameHeight, size_t frameWidth, size_t strideRows, size_t strideCols, Scalar_t dropoutProbability=1.0)
Function for adding Pooling layer in the Deep Neural Network, with a given filter height and width,...
Definition: DeepNet.h:449
Scalar_t fWeightDecay
The weight decay factor.
Definition: DeepNet.h:100
Scalar_t Loss(const Matrix_t &groundTruth, const Matrix_t &weights, bool includeRegularization=true) const
Function for evaluating the loss, based on the activations stored in the last layer.
Definition: DeepNet.h:1092
TConvLayer< Architecture_t > * AddConvLayer(size_t depth, size_t filterHeight, size_t filterWidth, size_t strideRows, size_t strideCols, size_t paddingHeight, size_t paddingWidth, EActivationFunction f, Scalar_t dropoutProbability=1.0)
Function for adding Convolution layer in the Deep Neural Network, with a given depth,...
Definition: DeepNet.h:403
ERegularization fR
The regularization used for the network.
Definition: DeepNet.h:99
size_t GetInputWidth() const
Definition: DeepNet.h:307
TBasicRNNLayer< Architecture_t > * AddBasicRNNLayer(size_t stateSize, size_t inputSize, size_t timeSteps, bool rememberState=false)
Function for adding Recurrent Layer in the Deep Neural Network, with given parameters.
Definition: DeepNet.h:488
void Backward(std::vector< Matrix_t > &input, const Matrix_t &groundTruth, const Matrix_t &weights)
Function that executes the entire backward pass in the network.
Definition: DeepNet.h:889
bool isInteger(Scalar_t x) const
Definition: DeepNet.h:80
size_t GetOutputWidth() const
Definition: DeepNet.h:294
bool fIsTraining
Is the network training?
Definition: DeepNet.h:95
TReshapeLayer< Architecture_t > * AddReshapeLayer(size_t depth, size_t height, size_t width, bool flattening)
Function for adding Reshape Layer in the Deep Neural Network, with a given height and width.
Definition: DeepNet.h:651
void SetBatchSize(size_t batchSize)
Setters.
Definition: DeepNet.h:324
void AddConvLayer(TConvLayer< Architecture_t > *convLayer)
Function for adding Convolution Layer in the Deep Neural Network, when the layer is already created.
Definition: DeepNet.h:442
size_t fInputHeight
The height of the input.
Definition: DeepNet.h:88
void SetRegularization(ERegularization R)
Definition: DeepNet.h:333
void Forward(std::vector< Matrix_t > &input, bool applyDropout=false)
Function that executes the entire forward pass in the network.
Definition: DeepNet.h:734
TDeepNet(const TDeepNet &)
Copy-constructor.
Definition: DeepNet.h:367
size_t fBatchWidth
The width of the batch used for training/testing.
Definition: DeepNet.h:93
ELossFunction fJ
The loss function of the network.
Definition: DeepNet.h:97
~TDeepNet()
Destructor.
Definition: DeepNet.h:378
void SetBatchWidth(size_t batchWidth)
Definition: DeepNet.h:330
Scalar_t Loss(std::vector< Matrix_t > &input, const Matrix_t &groundTruth, const Matrix_t &weights, bool applyDropout=false, bool includeRegularization=true)
Function for evaluating the loss, based on the propagation of the given input.
Definition: DeepNet.h:1108
void SetDropoutProbabilities(const std::vector< Double_t > &probabilities)
Definition: DeepNet.h:1169
TDeepNet()
Default Constructor.
Definition: DeepNet.h:345
void SetBatchHeight(size_t batchHeight)
Definition: DeepNet.h:329
void Update(Scalar_t learningRate)
Function that will update the weights and biases in the layers that contain weights and biases.
Definition: DeepNet.h:1083
ELossFunction GetLossFunction() const
Definition: DeepNet.h:315
size_t calculateDimension(int imgDim, int fltDim, int padding, int stride)
Definition: DeepNet.h:385
const Layer_t * GetLayerAt(size_t i) const
Definition: DeepNet.h:290
void SetInitialization(EInitialization I)
Definition: DeepNet.h:332
EInitialization GetInitialization() const
Definition: DeepNet.h:316
void ParallelBackwardNestorov(std::vector< TDeepNet< Architecture_t, Layer_t > > &nets, std::vector< TTensorBatch< Architecture_t > > &batches, Scalar_t learningRate, Scalar_t momentum)
Function for parallel backward in the vector of deep nets, where the master net is the net calling th...
Definition: DeepNet.h:1021
void SetInputWidth(size_t inputWidth)
Definition: DeepNet.h:327
typename Architecture_t::Matrix_t Matrix_t
Definition: DeepNet.h:76
EInitialization fI
The initialization method of the network.
Definition: DeepNet.h:98
size_t GetBatchDepth() const
Definition: DeepNet.h:309
Generic layer class.
Definition: DenseLayer.h:56
Double_t x[n]
Definition: legend1.C:17
#define I(x, y, z)
static double A[]
void Copy(void *source, void *dest)
void Print(std::ostream &os, const OptionType &opt)
EInitialization
Definition: Functions.h:70
EOutputFunction
Enum that represents output functions.
Definition: Functions.h:44
double weightDecay(double error, ItWeight itWeight, ItWeight itWeightEnd, double factorWeightDecay, EnumRegularization eRegularization)
compute the weight decay for regularization (L1 or L2)
Definition: NeuralNet.icc:496
ERegularization
Enum representing the regularization type applied for a given layer.
Definition: Functions.h:63
EActivationFunction
Enum that represents layer activation functions.
Definition: Functions.h:32
ELossFunction
Enum that represents objective functions for the net, i.e.
Definition: Functions.h:55
auto debugTensor(const std::vector< typename Architecture::Matrix_t > &A, const std::string name="tensor") -> void
Definition: DeepNet.h:718
create variable transformations
auto * l
Definition: textangle.C:4