29#ifndef TMVA_DNN_DEEPNET
30#define TMVA_DNN_DEEPNET
48#include "TMVA/DNN/DAE/CompressionLayer.h"
49#include "TMVA/DNN/DAE/CorruptionLayer.h"
50#include "TMVA/DNN/DAE/ReconstructionLayer.h"
51#include "TMVA/DNN/DAE/LogisticRegressionLayer.h"
71template <
typename Architecture_t,
typename Layer_t = VGeneralLayer<Architecture_t>>
75 using Tensor_t =
typename Architecture_t::Tensor_t;
76 using Matrix_t =
typename Architecture_t::Matrix_t;
77 using Scalar_t =
typename Architecture_t::Scalar_t;
108 TDeepNet(
size_t BatchSize,
size_t InputDepth,
size_t InputHeight,
size_t InputWidth,
size_t BatchDepth,
124 size_t strideCols,
size_t paddingHeight,
size_t paddingWidth,
137 size_t strideCols,
Scalar_t dropoutProbability = 1.0);
178 TCorruptionLayer<Architecture_t> *AddCorruptionLayer(
size_t visibleUnits,
size_t hiddenUnits,
183 void AddCorruptionLayer(TCorruptionLayer<Architecture_t> *corruptionLayer);
188 TCompressionLayer<Architecture_t> *AddCompressionLayer(
size_t visibleUnits,
size_t hiddenUnits,
190 std::vector<Matrix_t> weights, std::vector<Matrix_t> biases);
194 void AddCompressionLayer(TCompressionLayer<Architecture_t> *compressionLayer);
200 TReconstructionLayer<Architecture_t> *AddReconstructionLayer(
size_t visibleUnits,
size_t hiddenUnits,
202 std::vector<Matrix_t> weights,
203 std::vector<Matrix_t> biases,
Scalar_t corruptionLevel,
208 void AddReconstructionLayer(TReconstructionLayer<Architecture_t> *reconstructionLayer);
212 TLogisticRegressionLayer<Architecture_t> *AddLogisticRegressionLayer(
size_t inputUnits,
size_t outputUnits,
213 size_t testDataBatchSize,
218 void AddLogisticRegressionLayer(TLogisticRegressionLayer<Architecture_t> *logisticRegressionLayer);
222 void PreTrain(std::vector<Matrix_t> &input, std::vector<size_t> numHiddenUnitsPerLayer,
Scalar_t learningRate,
224 bool applyDropout =
false);
229 void FineTune(std::vector<Matrix_t> &input, std::vector<Matrix_t> &testInput, std::vector<Matrix_t> &outputLabel,
230 size_t outputUnits,
size_t testDataBatchSize,
Scalar_t learningRate,
size_t epochs);
248#ifdef USE_PARALLEL_DEEPNET
286 bool inTraining =
false,
bool includeRegularization =
true);
356template <
typename Architecture_t,
typename Layer_t>
358 : fLayers(), fBatchSize(0), fInputDepth(0), fInputHeight(0), fInputWidth(0), fBatchDepth(0), fBatchHeight(0),
360 fIsTraining(true), fWeightDecay(0.0)
366template <
typename Architecture_t,
typename Layer_t>
368 size_t batchDepth,
size_t batchHeight,
size_t batchWidth,
ELossFunction J,
370 : fLayers(), fBatchSize(batchSize), fInputDepth(inputDepth), fInputHeight(inputHeight), fInputWidth(inputWidth),
371 fBatchDepth(batchDepth), fBatchHeight(batchHeight), fBatchWidth(batchWidth), fIsTraining(isTraining), fJ(J), fI(
I),
378template <
typename Architecture_t,
typename Layer_t>
380 : fLayers(), fBatchSize(deepNet.fBatchSize), fInputDepth(deepNet.fInputDepth), fInputHeight(deepNet.fInputHeight),
381 fInputWidth(deepNet.fInputWidth), fBatchDepth(deepNet.fBatchDepth), fBatchHeight(deepNet.fBatchHeight),
382 fBatchWidth(deepNet.fBatchWidth), fIsTraining(deepNet.fIsTraining), fJ(deepNet.fJ), fI(deepNet.fI), fR(deepNet.fR),
383 fWeightDecay(deepNet.fWeightDecay)
389template <
typename Architecture_t,
typename Layer_t>
393 for (
auto layer : fLayers)
399template <
typename Architecture_t,
typename Layer_t>
402 Scalar_t dimension = ((imgDim - fltDim + 2 * padding) / stride) + 1;
403 if (!isInteger(dimension) || dimension <= 0) {
405 int iLayer = fLayers.size();
406 Fatal(
"calculateDimension",
"Not compatible hyper parameters for layer %d - (imageDim, filterDim, padding, stride) %d , %d , %d , %d",
407 iLayer, imgDim, fltDim, padding, stride);
413 return (
size_t)dimension;
417template <
typename Architecture_t,
typename Layer_t>
419 size_t filterWidth,
size_t strideRows,
420 size_t strideCols,
size_t paddingHeight,
425 size_t batchSize = this->GetBatchSize();
431 Scalar_t decay = this->GetWeightDecay();
433 if (fLayers.size() == 0) {
434 inputDepth = this->GetInputDepth();
435 inputHeight = this->GetInputHeight();
436 inputWidth = this->GetInputWidth();
438 Layer_t *lastLayer = fLayers.back();
439 inputDepth = lastLayer->GetDepth();
440 inputHeight = lastLayer->GetHeight();
441 inputWidth = lastLayer->GetWidth();
448 batchSize, inputDepth, inputHeight, inputWidth, depth,
init, filterHeight, filterWidth, strideRows,
449 strideCols, paddingHeight, paddingWidth, dropoutProbability,
f, reg, decay);
451 fLayers.push_back(convLayer);
456template <
typename Architecture_t,
typename Layer_t>
459 fLayers.push_back(convLayer);
463template <
typename Architecture_t,
typename Layer_t>
465 size_t strideRows,
size_t strideCols,
468 size_t batchSize = this->GetBatchSize();
473 if (fLayers.size() == 0) {
474 inputDepth = this->GetInputDepth();
475 inputHeight = this->GetInputHeight();
476 inputWidth = this->GetInputWidth();
478 Layer_t *lastLayer = fLayers.back();
479 inputDepth = lastLayer->GetDepth();
480 inputHeight = lastLayer->GetHeight();
481 inputWidth = lastLayer->GetWidth();
485 batchSize, inputDepth, inputHeight, inputWidth, frameHeight, frameWidth,
486 strideRows, strideCols, dropoutProbability);
489 fLayers.push_back(maxPoolLayer);
495template <
typename Architecture_t,
typename Layer_t>
498 fLayers.push_back(maxPoolLayer);
502template <
typename Architecture_t,
typename Layer_t>
511 size_t inputHeight, inputWidth, inputDepth;
512 if (fLayers.size() == 0) {
513 inputHeight = this->GetInputHeight();
514 inputWidth = this->GetInputWidth();
515 inputDepth = this->GetInputDepth();
517 Layer_t *lastLayer = fLayers.back();
518 inputHeight = lastLayer->GetHeight();
519 inputWidth = lastLayer->GetWidth();
520 inputDepth = lastLayer->GetDepth();
522 if (inputSize != inputWidth) {
523 Error(
"AddBasicRNNLayer",
"Inconsistent input size with input layout - it should be %zu instead of %zu",inputSize, inputWidth);
525 if (timeSteps != inputHeight || timeSteps != inputDepth) {
526 Error(
"AddBasicRNNLayer",
"Inconsistent time steps with input layout - it should be %zu instead of %zu",timeSteps, inputHeight);
531 f, fIsTraining, this->GetInitialization());
532 fLayers.push_back(basicRNNLayer);
533 return basicRNNLayer;
537template <
typename Architecture_t,
typename Layer_t>
540 fLayers.push_back(basicRNNLayer);
547template <
typename Architecture_t,
typename Layer_t>
550 Scalar_t dropoutProbability,
551 Scalar_t corruptionLevel)
553 size_t batchSize = this->GetBatchSize();
555 TCorruptionLayer<Architecture_t> *corruptionLayer =
556 new TCorruptionLayer<Architecture_t>(batchSize, visibleUnits, hiddenUnits, dropoutProbability, corruptionLevel);
557 fLayers.push_back(corruptionLayer);
558 return corruptionLayer;
562template <
typename Architecture_t,
typename Layer_t>
565 fLayers.push_back(corruptionLayer);
569template <
typename Architecture_t,
typename Layer_t>
570TCompressionLayer<Architecture_t> *TDeepNet<Architecture_t, Layer_t>::AddCompressionLayer(
572 std::vector<Matrix_t> weights, std::vector<Matrix_t> biases)
574 size_t batchSize = this->GetBatchSize();
576 TCompressionLayer<Architecture_t> *compressionLayer =
new TCompressionLayer<Architecture_t>(
577 batchSize, visibleUnits, hiddenUnits, dropoutProbability,
f, weights, biases);
578 fLayers.push_back(compressionLayer);
579 return compressionLayer;
583template <
typename Architecture_t,
typename Layer_t>
584void TDeepNet<Architecture_t, Layer_t>::AddCompressionLayer(TCompressionLayer<Architecture_t> *compressionLayer)
586 fLayers.push_back(compressionLayer);
590template <
typename Architecture_t,
typename Layer_t>
591TReconstructionLayer<Architecture_t> *TDeepNet<Architecture_t, Layer_t>::AddReconstructionLayer(
592 size_t visibleUnits,
size_t hiddenUnits, Scalar_t learningRate,
EActivationFunction f, std::vector<Matrix_t> weights,
593 std::vector<Matrix_t> biases, Scalar_t corruptionLevel, Scalar_t dropoutProbability)
595 size_t batchSize = this->GetBatchSize();
597 TReconstructionLayer<Architecture_t> *reconstructionLayer =
new TReconstructionLayer<Architecture_t>(
598 batchSize, visibleUnits, hiddenUnits, learningRate,
f, weights, biases, corruptionLevel, dropoutProbability);
599 fLayers.push_back(reconstructionLayer);
600 return reconstructionLayer;
604template <
typename Architecture_t,
typename Layer_t>
605void TDeepNet<Architecture_t, Layer_t>::AddReconstructionLayer(
606 TReconstructionLayer<Architecture_t> *reconstructionLayer)
608 fLayers.push_back(reconstructionLayer);
612template <
typename Architecture_t,
typename Layer_t>
613TLogisticRegressionLayer<Architecture_t> *TDeepNet<Architecture_t, Layer_t>::AddLogisticRegressionLayer(
614 size_t inputUnits,
size_t outputUnits,
size_t testDataBatchSize, Scalar_t learningRate)
616 size_t batchSize = this->GetBatchSize();
618 TLogisticRegressionLayer<Architecture_t> *logisticRegressionLayer =
619 new TLogisticRegressionLayer<Architecture_t>(batchSize, inputUnits, outputUnits, testDataBatchSize, learningRate);
620 fLayers.push_back(logisticRegressionLayer);
621 return logisticRegressionLayer;
624template <
typename Architecture_t,
typename Layer_t>
625void TDeepNet<Architecture_t, Layer_t>::AddLogisticRegressionLayer(
626 TLogisticRegressionLayer<Architecture_t> *logisticRegressionLayer)
628 fLayers.push_back(logisticRegressionLayer);
634template <
typename Architecture_t,
typename Layer_t>
638 size_t batchSize = this->GetBatchSize();
642 Scalar_t decay = this->GetWeightDecay();
644 if (fLayers.size() == 0) {
645 inputWidth = this->GetInputWidth();
647 Layer_t *lastLayer = fLayers.back();
648 inputWidth = lastLayer->GetWidth();
654 fLayers.push_back(denseLayer);
660template <
typename Architecture_t,
typename Layer_t>
663 fLayers.push_back(denseLayer);
667template <
typename Architecture_t,
typename Layer_t>
669 size_t width,
bool flattening)
671 size_t batchSize = this->GetBatchSize();
675 size_t outputNSlices;
679 if (fLayers.size() == 0) {
680 inputDepth = this->GetInputDepth();
681 inputHeight = this->GetInputHeight();
682 inputWidth = this->GetInputWidth();
684 Layer_t *lastLayer = fLayers.back();
685 inputDepth = lastLayer->GetDepth();
686 inputHeight = lastLayer->GetHeight();
687 inputWidth = lastLayer->GetWidth();
692 outputNRows = this->GetBatchSize();
693 outputNCols = depth * height *
width;
694 size_t inputNCols = inputDepth * inputHeight * inputWidth;
695 if (outputNCols != 0 && outputNCols != inputNCols ) {
696 Info(
"AddReshapeLayer",
"Dimensions not compatibles - product of input %zu x %zu x %zu should be equal to output %zu x %zu x %zu - Force flattening output to be %zu",
697 inputDepth, inputHeight, inputWidth, depth, height,
width,inputNCols);
699 outputNCols = inputNCols;
704 outputNSlices = this->GetBatchSize();
706 outputNCols = height *
width;
711 outputNSlices, outputNRows, outputNCols, flattening);
713 fLayers.push_back(reshapeLayer);
719template <
typename Architecture_t,
typename Layer_t>
723 size_t batchSize = this->GetBatchSize();
724 size_t inputDepth = 0;
725 size_t inputHeight = 0;
726 size_t inputWidth = 0;
729 std::vector<size_t> shape = {1, 1, 1};
730 if (fLayers.size() == 0) {
731 inputDepth = this->GetInputDepth();
732 inputHeight = this->GetInputHeight();
733 inputWidth = this->GetInputWidth();
735 shape[0] = batchSize;
736 shape[1] = inputWidth;
739 Layer_t *lastLayer = fLayers.back();
740 inputDepth = lastLayer->GetDepth();
741 inputHeight = lastLayer->GetHeight();
742 inputWidth = lastLayer->GetWidth();
743 shape = lastLayer->GetOutput().GetShape();
747 if (shape.size() > 3) {
748 for (
size_t i = 3; i < shape.size(); ++i)
749 shape[2] *= shape[i];
758 std::cout <<
"addBNormLayer " << inputDepth <<
" , " << inputHeight <<
" , " << inputWidth <<
" , " << shape[0]
759 <<
" " << shape[1] <<
" " << shape[2] << std::endl;
764 fLayers.push_back(bnormLayer);
770template <
typename Architecture_t,
typename Layer_t>
773 fLayers.push_back(reshapeLayer);
777template <
typename Architecture_t,
typename Layer_t>
780 for (
size_t i = 0; i < fLayers.size(); i++) {
781 fLayers[i]->Initialize();
786template <
typename Architecture_t,
typename Layer_t>
789 for (
size_t i = 0; i < fLayers.size(); i++) {
790 fLayers[i]->ResetTraining();
796template <
typename Architecture_t,
typename Layer_t>
799 fLayers.front()->Forward(input, applyDropout);
801 for (
size_t i = 1; i < fLayers.size(); i++) {
802 fLayers[i]->Forward(fLayers[i - 1]->GetOutput(), applyDropout);
811template <
typename Architecture_t,
typename Layer_t>
813 std::vector<size_t> numHiddenUnitsPerLayer, Scalar_t learningRate,
814 Scalar_t corruptionLevel, Scalar_t dropoutProbability,
size_t epochs,
817 std::vector<Matrix_t> inp1;
818 std::vector<Matrix_t> inp2;
819 size_t numOfHiddenLayers =
sizeof(numHiddenUnitsPerLayer) /
sizeof(numHiddenUnitsPerLayer[0]);
821 size_t visibleUnits = (size_t)input[0].GetNrows();
823 AddCorruptionLayer(visibleUnits, numHiddenUnitsPerLayer[0], dropoutProbability, corruptionLevel);
824 fLayers.back()->Initialize();
825 fLayers.back()->Forward(input, applyDropout);
828 AddCompressionLayer(visibleUnits, numHiddenUnitsPerLayer[0], dropoutProbability,
f, fLayers.back()->GetWeights(),
829 fLayers.back()->GetBiases());
830 fLayers.back()->Initialize();
831 fLayers.back()->Forward(fLayers[fLayers.size() - 2]->GetOutput(), applyDropout);
833 AddReconstructionLayer(visibleUnits, numHiddenUnitsPerLayer[0], learningRate,
f, fLayers.back()->GetWeights(),
834 fLayers.back()->GetBiases(), corruptionLevel, dropoutProbability);
835 fLayers.back()->Initialize();
836 fLayers.back()->Forward(fLayers[fLayers.size() - 2]->GetOutput(),
838 fLayers.back()->Backward(fLayers[fLayers.size() - 2]->GetOutput(), inp1, fLayers[fLayers.size() - 3]->GetOutput(),
841 size_t weightsSize = fLayers.back()->GetWeights().size();
842 size_t biasesSize = fLayers.back()->GetBiases().size();
843 for (
size_t epoch = 0; epoch < epochs - 1; epoch++) {
845 for (
size_t j = 0; j < weightsSize; j++) {
846 Architecture_t::Copy(fLayers[fLayers.size() - 2]->GetWeightsAt(j), fLayers.back()->GetWeightsAt(j));
848 for (
size_t j = 0; j < biasesSize; j++) {
849 Architecture_t::Copy(fLayers[fLayers.size() - 2]->GetBiasesAt(j), fLayers.back()->GetBiasesAt(j));
851 fLayers[fLayers.size() - 2]->Forward(fLayers[fLayers.size() - 3]->GetOutput(), applyDropout);
852 fLayers[fLayers.size() - 1]->Forward(fLayers[fLayers.size() - 2]->GetOutput(), applyDropout);
853 fLayers[fLayers.size() - 1]->Backward(fLayers[fLayers.size() - 2]->GetOutput(), inp1,
854 fLayers[fLayers.size() - 3]->GetOutput(), input);
856 fLayers.back()->Print();
858 for (
size_t i = 1; i < numOfHiddenLayers; i++) {
860 AddCorruptionLayer(numHiddenUnitsPerLayer[i - 1], numHiddenUnitsPerLayer[i], dropoutProbability, corruptionLevel);
861 fLayers.back()->Initialize();
862 fLayers.back()->Forward(fLayers[fLayers.size() - 3]->GetOutput(),
865 AddCompressionLayer(numHiddenUnitsPerLayer[i - 1], numHiddenUnitsPerLayer[i], dropoutProbability,
f,
866 fLayers.back()->GetWeights(), fLayers.back()->GetBiases());
867 fLayers.back()->Initialize();
868 fLayers.back()->Forward(fLayers[fLayers.size() - 2]->GetOutput(), applyDropout);
870 AddReconstructionLayer(numHiddenUnitsPerLayer[i - 1], numHiddenUnitsPerLayer[i], learningRate,
f,
871 fLayers.back()->GetWeights(), fLayers.back()->GetBiases(), corruptionLevel,
873 fLayers.back()->Initialize();
874 fLayers.back()->Forward(fLayers[fLayers.size() - 2]->GetOutput(),
876 fLayers.back()->Backward(fLayers[fLayers.size() - 2]->GetOutput(), inp1, fLayers[fLayers.size() - 3]->GetOutput(),
877 fLayers[fLayers.size() - 5]->GetOutput());
880 size_t _weightsSize = fLayers.back()->GetWeights().size();
881 size_t _biasesSize = fLayers.back()->GetBiases().size();
882 for (
size_t epoch = 0; epoch < epochs - 1; epoch++) {
884 for (
size_t j = 0; j < _weightsSize; j++) {
885 Architecture_t::Copy(fLayers[fLayers.size() - 2]->GetWeightsAt(j), fLayers.back()->GetWeightsAt(j));
887 for (
size_t j = 0; j < _biasesSize; j++) {
888 Architecture_t::Copy(fLayers[fLayers.size() - 2]->GetBiasesAt(j), fLayers.back()->GetBiasesAt(j));
890 fLayers[fLayers.size() - 2]->Forward(fLayers[fLayers.size() - 3]->GetOutput(), applyDropout);
891 fLayers[fLayers.size() - 1]->Forward(fLayers[fLayers.size() - 2]->GetOutput(), applyDropout);
892 fLayers[fLayers.size() - 1]->Backward(fLayers[fLayers.size() - 2]->GetOutput(), inp1,
893 fLayers[fLayers.size() - 3]->GetOutput(),
894 fLayers[fLayers.size() - 5]->GetOutput());
896 fLayers.back()->Print();
901template <
typename Architecture_t,
typename Layer_t>
902auto TDeepNet<Architecture_t, Layer_t>::FineTune(std::vector<Matrix_t> &input, std::vector<Matrix_t> &testInput,
903 std::vector<Matrix_t> &inputLabel,
size_t outputUnits,
904 size_t testDataBatchSize, Scalar_t learningRate,
size_t epochs) ->
void
906 std::vector<Matrix_t> inp1;
907 std::vector<Matrix_t> inp2;
908 if (fLayers.size() == 0)
910 size_t inputUnits = input[0].GetNrows();
912 AddLogisticRegressionLayer(inputUnits, outputUnits, testDataBatchSize, learningRate);
913 fLayers.back()->Initialize();
914 for (
size_t i = 0; i < epochs; i++) {
915 fLayers.back()->Backward(inputLabel, inp1, input, inp2);
917 fLayers.back()->Forward(input,
false);
918 fLayers.back()->Print();
920 size_t inputUnits = fLayers.back()->GetOutputAt(0).GetNrows();
921 AddLogisticRegressionLayer(inputUnits, outputUnits, testDataBatchSize, learningRate);
922 fLayers.back()->Initialize();
923 for (
size_t i = 0; i < epochs; i++) {
924 fLayers.back()->Backward(inputLabel, inp1, fLayers[fLayers.size() - 2]->GetOutput(), inp2);
926 fLayers.back()->Forward(testInput,
false);
927 fLayers.back()->Print();
933template <
typename Architecture_t,
typename Layer_t>
940 Matrix_t last_actgrad = fLayers.back()->GetActivationGradientsAt(0);
941 Matrix_t last_output = fLayers.back()->GetOutputAt(0);
942 evaluateGradients<Architecture_t>(last_actgrad, this->GetLossFunction(), groundTruth,
943 last_output, weights);
945 for (
size_t i = fLayers.size() - 1; i > 0; i--) {
946 auto &activation_gradient_backward = fLayers[i - 1]->GetActivationGradients();
947 auto &activations_backward = fLayers[i - 1]->GetOutput();
948 fLayers[i]->Backward(activation_gradient_backward, activations_backward);
954 fLayers[0]->Backward(
dummy, input);
957#ifdef USE_PARALLEL_DEEPNET
960template <
typename Architecture_t,
typename Layer_t>
963 bool applyDropout) ->
void
965 size_t depth = this->GetDepth();
968 for (
size_t i = 0; i < nets.size(); i++) {
969 nets[i].GetLayerAt(0)->Forward(batches[i].GetInput(), applyDropout);
973 for (
size_t i = 1; i < depth; i++) {
974 for (
size_t j = 0; j < nets.size(); j++) {
975 nets[j].GetLayerAt(i)->Forward(nets[j].GetLayerAt(i - 1)->GetOutput(), applyDropout);
981template <
typename Architecture_t,
typename Layer_t>
982auto TDeepNet<Architecture_t, Layer_t>::ParallelBackward(std::vector<TDeepNet<Architecture_t, Layer_t>> &nets,
983 std::vector<TTensorBatch<Architecture_t>> &batches,
984 Scalar_t learningRate) ->
void
986 std::vector<Matrix_t> inp1;
987 std::vector<Matrix_t> inp2;
988 size_t depth = this->GetDepth();
991 for (
size_t i = 0; i < nets.size(); i++) {
992 evaluateGradients<Architecture_t>(nets[i].GetLayerAt(depth - 1)->GetActivationGradientsAt(0),
993 nets[i].GetLossFunction(), batches[i].GetOutput(),
994 nets[i].GetLayerAt(depth - 1)->GetOutputAt(0), batches[i].GetWeights());
998 for (
size_t i = depth - 1; i > 0; i--) {
999 for (
size_t j = 0; j < nets.size(); j++) {
1000 nets[j].GetLayerAt(i)->Backward(nets[j].GetLayerAt(i - 1)->GetActivationGradients(),
1001 nets[j].GetLayerAt(i - 1)->GetOutput(), inp1, inp2);
1005 std::vector<Matrix_t>
dummy;
1008 for (
size_t i = 0; i < nets.size(); i++) {
1009 nets[i].GetLayerAt(0)->Backward(
dummy, batches[i].GetInput(), inp1, inp2);
1013 for (
size_t i = 0; i < nets.size(); i++) {
1014 for (
size_t j = 0; j < depth; j++) {
1015 Layer_t *masterLayer = this->GetLayerAt(j);
1016 Layer_t *layer = nets[i].GetLayerAt(j);
1018 masterLayer->UpdateWeights(layer->GetWeightGradients(), learningRate);
1019 layer->CopyWeights(masterLayer->GetWeights());
1021 masterLayer->UpdateBiases(layer->GetBiasGradients(), learningRate);
1022 layer->CopyBiases(masterLayer->GetBiases());
1028template <
typename Architecture_t,
typename Layer_t>
1029auto TDeepNet<Architecture_t, Layer_t>::ParallelBackwardMomentum(std::vector<TDeepNet<Architecture_t, Layer_t>> &nets,
1030 std::vector<TTensorBatch<Architecture_t>> &batches,
1031 Scalar_t learningRate, Scalar_t momentum) ->
void
1033 std::vector<Matrix_t> inp1;
1034 std::vector<Matrix_t> inp2;
1035 size_t depth = this->GetDepth();
1038 for (
size_t i = 0; i < nets.size(); i++) {
1039 evaluateGradients<Architecture_t>(nets[i].GetLayerAt(depth - 1)->GetActivationGradientsAt(0),
1040 nets[i].GetLossFunction(), batches[i].GetOutput(),
1041 nets[i].GetLayerAt(depth - 1)->GetOutputAt(0), batches[i].GetWeights());
1045 for (
size_t i = depth - 1; i > 0; i--) {
1046 Layer_t *masterLayer = this->GetLayerAt(i);
1048 for (
size_t j = 0; j < nets.size(); j++) {
1049 Layer_t *layer = nets[j].GetLayerAt(i);
1051 layer->Backward(nets[j].GetLayerAt(i - 1)->GetActivationGradients(), nets[j].GetLayerAt(i - 1)->GetOutput(),
1053 masterLayer->UpdateWeightGradients(layer->GetWeightGradients(), learningRate / momentum);
1054 masterLayer->UpdateBiasGradients(layer->GetBiasGradients(), learningRate / momentum);
1057 masterLayer->UpdateWeightGradients(masterLayer->GetWeightGradients(), 1.0 - momentum);
1058 masterLayer->UpdateBiasGradients(masterLayer->GetBiasGradients(), 1.0 - momentum);
1061 std::vector<Matrix_t>
dummy;
1064 Layer_t *masterFirstLayer = this->GetLayerAt(0);
1065 for (
size_t i = 0; i < nets.size(); i++) {
1066 Layer_t *layer = nets[i].GetLayerAt(0);
1068 layer->Backward(
dummy, batches[i].GetInput(), inp1, inp2);
1070 masterFirstLayer->UpdateWeightGradients(layer->GetWeightGradients(), learningRate / momentum);
1071 masterFirstLayer->UpdateBiasGradients(layer->GetBiasGradients(), learningRate / momentum);
1074 masterFirstLayer->UpdateWeightGradients(masterFirstLayer->GetWeightGradients(), 1.0 - momentum);
1075 masterFirstLayer->UpdateBiasGradients(masterFirstLayer->GetBiasGradients(), 1.0 - momentum);
1077 for (
size_t i = 0; i < depth; i++) {
1078 Layer_t *masterLayer = this->GetLayerAt(i);
1079 masterLayer->Update(1.0);
1081 for (
size_t j = 0; j < nets.size(); j++) {
1082 Layer_t *layer = nets[j].GetLayerAt(i);
1084 layer->CopyWeights(masterLayer->GetWeights());
1085 layer->CopyBiases(masterLayer->GetBiases());
1091template <
typename Architecture_t,
typename Layer_t>
1092auto TDeepNet<Architecture_t, Layer_t>::ParallelBackwardNestorov(std::vector<TDeepNet<Architecture_t, Layer_t>> &nets,
1093 std::vector<TTensorBatch<Architecture_t>> &batches,
1094 Scalar_t learningRate, Scalar_t momentum) ->
void
1096 std::cout <<
"Parallel Backward Nestorov" << std::endl;
1097 std::vector<Matrix_t> inp1;
1098 std::vector<Matrix_t> inp2;
1099 size_t depth = this->GetDepth();
1102 for (
size_t i = 0; i < nets.size(); i++) {
1103 evaluateGradients<Architecture_t>(nets[i].GetLayerAt(depth - 1)->GetActivationGradientsAt(0),
1104 nets[i].GetLossFunction(), batches[i].GetOutput(),
1105 nets[i].GetLayerAt(depth - 1)->GetOutputAt(0), batches[i].GetWeights());
1109 for (
size_t i = depth - 1; i > 0; i--) {
1110 for (
size_t j = 0; j < nets.size(); j++) {
1111 Layer_t *layer = nets[j].GetLayerAt(i);
1113 layer->Backward(nets[j].GetLayerAt(i - 1)->GetActivationGradients(), nets[j].GetLayerAt(i - 1)->GetOutput(),
1118 std::vector<Matrix_t>
dummy;
1121 for (
size_t i = 0; i < nets.size(); i++) {
1122 Layer_t *layer = nets[i].GetLayerAt(0);
1123 layer->Backward(
dummy, batches[i].GetInput(), inp1, inp2);
1126 for (
size_t i = 0; i < depth; i++) {
1127 Layer_t *masterLayer = this->GetLayerAt(i);
1128 for (
size_t j = 0; j < nets.size(); j++) {
1129 Layer_t *layer = nets[j].GetLayerAt(i);
1131 layer->CopyWeights(masterLayer->GetWeights());
1132 layer->CopyBiases(masterLayer->GetBiases());
1134 layer->UpdateWeights(masterLayer->GetWeightGradients(), 1.0);
1135 layer->UpdateBiases(masterLayer->GetBiasGradients(), 1.0);
1138 for (
size_t j = 0; j < nets.size(); j++) {
1139 Layer_t *layer = nets[j].GetLayerAt(i);
1141 masterLayer->UpdateWeightGradients(layer->GetWeightGradients(), learningRate / momentum);
1142 masterLayer->UpdateBiasGradients(layer->GetBiasGradients(), learningRate / momentum);
1145 masterLayer->UpdateWeightGradients(masterLayer->GetWeightGradients(), 1.0 - momentum);
1146 masterLayer->UpdateBiasGradients(masterLayer->GetBiasGradients(), 1.0 - momentum);
1148 masterLayer->Update(1.0);
1154template <
typename Architecture_t,
typename Layer_t>
1157 for (
size_t i = 0; i < fLayers.size(); i++) {
1158 fLayers[i]->Update(learningRate);
1163template <
typename Architecture_t,
typename Layer_t>
1165 bool includeRegularization)
const ->
Scalar_t
1168 auto loss = evaluate<Architecture_t>(this->GetLossFunction(), groundTruth, fLayers.back()->GetOutputAt(0), weights);
1171 if (includeRegularization) {
1172 loss += RegularizationTerm();
1179template <
typename Architecture_t,
typename Layer_t>
1181 const Matrix_t &weights,
bool inTraining,
bool includeRegularization)
1184 Forward(input, inTraining);
1185 return Loss(groundTruth, weights, includeRegularization);
1189template <
typename Architecture_t,
typename Layer_t>
1193 for (
size_t i = 0; i < fLayers.size(); i++) {
1194 for (
size_t j = 0; j < (fLayers[i]->GetWeights()).size(); j++) {
1195 reg += regularization<Architecture_t>(fLayers[i]->GetWeightsAt(j), this->GetRegularization());
1198 return this->GetWeightDecay() * reg;
1203template <
typename Architecture_t,
typename Layer_t>
1207 evaluate<Architecture_t>(predictions,
f, fLayers.back()->GetOutputAt(0));
1211template <
typename Architecture_t,
typename Layer_t>
1215 Forward(input,
false);
1217 evaluate<Architecture_t>(predictions,
f, fLayers.back()->GetOutputAt(0));
1221template <
typename Architecture_t,
typename Layer_t>
1224 std::cout <<
"DEEP NEURAL NETWORK: Depth = " << this->GetDepth();
1225 std::cout <<
" Input = ( " << this->GetInputDepth();
1226 std::cout <<
", " << this->GetInputHeight();
1227 std::cout <<
", " << this->GetInputWidth() <<
" )";
1228 std::cout <<
" Batch size = " << this->GetBatchSize();
1229 std::cout <<
" Loss function = " <<
static_cast<char>(this->GetLossFunction()) << std::endl;
1233 for (
size_t i = 0; i < fLayers.size(); i++) {
1234 std::cout <<
"\tLayer " << i <<
"\t";
1235 fLayers[i]->Print();
1240template <
typename Architecture_t,
typename Layer_t>
1242 const std::vector<Double_t> & probabilities)
1244 for (
size_t i = 0; i < fLayers.size(); i++) {
1245 if (i < probabilities.size()) {
1246 fLayers[i]->SetDropoutProbability(probabilities[i]);
1248 fLayers[i]->SetDropoutProbability(1.0);
#define R(a, b, c, d, e, f, g, h, i)
static RooMathCoreReg dummy
include TDocParser_001 C image html pict1_TDocParser_001 png width
void Info(const char *location, const char *msgfmt,...)
void Error(const char *location, const char *msgfmt,...)
void Fatal(const char *location, const char *msgfmt,...)
Generic Max Pooling Layer class.
Layer implementing Batch Normalization.
Generic Deep Neural Network class.
const std::vector< Layer_t * > & GetLayers() const
void AddDenseLayer(TDenseLayer< Architecture_t > *denseLayer)
Function for adding Dense Layer in the Deep Neural Network, when the layer is already created.
size_t GetBatchHeight() const
void SetBatchDepth(size_t batchDepth)
void Forward(Tensor_t &input, bool applyDropout=false)
Function that executes the entire forward pass in the network.
void SetLossFunction(ELossFunction J)
size_t fBatchHeight
The height of the batch used for training/testing.
ERegularization GetRegularization() const
std::vector< Layer_t * > & GetLayers()
typename Architecture_t::Scalar_t Scalar_t
void Initialize()
DAE functions.
size_t GetBatchSize() const
Getters.
Scalar_t GetWeightDecay() const
size_t GetInputDepth() const
TBatchNormLayer< Architecture_t > * AddBatchNormLayer(Scalar_t momentum=-1, Scalar_t epsilon=0.0001)
Function for adding a Batch Normalization layer with given parameters.
void Backward(const Tensor_t &input, const Matrix_t &groundTruth, const Matrix_t &weights)
Function that executes the entire backward pass in the network.
std::vector< Layer_t * > fLayers
The layers consisting the DeepNet.
size_t fBatchDepth
The depth of the batch used for training/testing.
size_t fInputDepth
The depth of the input.
Layer_t * GetLayerAt(size_t i)
Get the layer in the vector of layers at poistion i.
void Print() const
Print the Deep Net Info.
void SetWeightDecay(Scalar_t weightDecay)
void AddReshapeLayer(TReshapeLayer< Architecture_t > *reshapeLayer)
Function for adding Reshape Layer in the Deep Neural Network, when the layer is already created.
void Clear()
Remove all layers from the network.
Scalar_t RegularizationTerm() const
Function for computing the regularizaton term to be added to the loss function
TDenseLayer< Architecture_t > * AddDenseLayer(size_t width, EActivationFunction f, Scalar_t dropoutProbability=1.0)
Function for adding Dense Connected Layer in the Deep Neural Network, with a given width,...
TDeepNet(size_t BatchSize, size_t InputDepth, size_t InputHeight, size_t InputWidth, size_t BatchDepth, size_t BatchHeight, size_t BatchWidth, ELossFunction fJ, EInitialization fI=EInitialization::kZero, ERegularization fR=ERegularization::kNone, Scalar_t fWeightDecay=0.0, bool isTraining=false)
Constructor.
void Prediction(Matrix_t &predictions, Tensor_t &input, EOutputFunction f)
Prediction for the given inputs, based on what network learned.
void SetInputDepth(size_t inputDepth)
size_t GetInputHeight() const
size_t fBatchSize
Batch size used for training and evaluation.
void Prediction(Matrix_t &predictions, EOutputFunction f) const
Prediction based on activations stored in the last layer.
TBasicRNNLayer< Architecture_t > * AddBasicRNNLayer(size_t stateSize, size_t inputSize, size_t timeSteps, bool rememberState=false, EActivationFunction f=EActivationFunction::kTanh)
Function for adding Recurrent Layer in the Deep Neural Network, with given parameters.
size_t fInputWidth
The width of the input.
void SetInputHeight(size_t inputHeight)
size_t GetBatchWidth() const
void AddBasicRNNLayer(TBasicRNNLayer< Architecture_t > *basicRNNLayer)
Function for adding Vanilla RNN when the layer is already created.
void AddMaxPoolLayer(CNN::TMaxPoolLayer< Architecture_t > *maxPoolLayer)
Function for adding Max Pooling layer in the Deep Neural Network, when the layer is already created.
TMaxPoolLayer< Architecture_t > * AddMaxPoolLayer(size_t frameHeight, size_t frameWidth, size_t strideRows, size_t strideCols, Scalar_t dropoutProbability=1.0)
Function for adding Pooling layer in the Deep Neural Network, with a given filter height and width,...
Scalar_t fWeightDecay
The weight decay factor.
Scalar_t Loss(const Matrix_t &groundTruth, const Matrix_t &weights, bool includeRegularization=true) const
Function for evaluating the loss, based on the activations stored in the last layer.
TConvLayer< Architecture_t > * AddConvLayer(size_t depth, size_t filterHeight, size_t filterWidth, size_t strideRows, size_t strideCols, size_t paddingHeight, size_t paddingWidth, EActivationFunction f, Scalar_t dropoutProbability=1.0)
Function for adding Convolution layer in the Deep Neural Network, with a given depth,...
ERegularization fR
The regularization used for the network.
void ResetTraining()
Function that reset some training flags after looping all the events but not the weights.
size_t GetInputWidth() const
bool isInteger(Scalar_t x) const
size_t GetOutputWidth() const
bool fIsTraining
Is the network training?
TReshapeLayer< Architecture_t > * AddReshapeLayer(size_t depth, size_t height, size_t width, bool flattening)
Function for adding Reshape Layer in the Deep Neural Network, with a given height and width.
void SetBatchSize(size_t batchSize)
Setters.
void AddConvLayer(TConvLayer< Architecture_t > *convLayer)
Function for adding Convolution Layer in the Deep Neural Network, when the layer is already created.
size_t fInputHeight
The height of the input.
void SetRegularization(ERegularization R)
TDeepNet(const TDeepNet &)
Copy-constructor.
size_t fBatchWidth
The width of the batch used for training/testing.
typename Architecture_t::Tensor_t Tensor_t
ELossFunction fJ
The loss function of the network.
void SetBatchWidth(size_t batchWidth)
void SetDropoutProbabilities(const std::vector< Double_t > &probabilities)
TDeepNet()
Default Constructor.
void SetBatchHeight(size_t batchHeight)
void Update(Scalar_t learningRate)
Function that will update the weights and biases in the layers that contain weights and biases.
ELossFunction GetLossFunction() const
size_t calculateDimension(int imgDim, int fltDim, int padding, int stride)
const Layer_t * GetLayerAt(size_t i) const
void SetInitialization(EInitialization I)
EInitialization GetInitialization() const
void SetInputWidth(size_t inputWidth)
typename Architecture_t::Matrix_t Matrix_t
Scalar_t Loss(Tensor_t &input, const Matrix_t &groundTruth, const Matrix_t &weights, bool inTraining=false, bool includeRegularization=true)
Function for evaluating the loss, based on the propagation of the given input.
EInitialization fI
The initialization method of the network.
size_t GetBatchDepth() const
EvaluateInfo init(std::vector< RooRealProxy > parameters, std::vector< ArrayWrapper * > wrappers, std::vector< double * > arrays, size_t begin, size_t batchSize)
void Copy(void *source, void *dest)
void Print(std::ostream &os, const OptionType &opt)
EOutputFunction
Enum that represents output functions.
double weightDecay(double error, ItWeight itWeight, ItWeight itWeightEnd, double factorWeightDecay, EnumRegularization eRegularization)
compute the weight decay for regularization (L1 or L2)
ERegularization
Enum representing the regularization type applied for a given layer.
EActivationFunction
Enum that represents layer activation functions.
ELossFunction
Enum that represents objective functions for the net, i.e.
create variable transformations