29#ifndef TMVA_DNN_DEEPNET
30#define TMVA_DNN_DEEPNET
48#include "TMVA/DNN/DAE/CompressionLayer.h"
49#include "TMVA/DNN/DAE/CorruptionLayer.h"
50#include "TMVA/DNN/DAE/ReconstructionLayer.h"
51#include "TMVA/DNN/DAE/LogisticRegressionLayer.h"
72template <
typename Architecture_t,
typename Layer_t = VGeneralLayer<Architecture_t>>
76 using Tensor_t =
typename Architecture_t::Tensor_t;
77 using Matrix_t =
typename Architecture_t::Matrix_t;
78 using Scalar_t =
typename Architecture_t::Scalar_t;
109 TDeepNet(
size_t BatchSize,
size_t InputDepth,
size_t InputHeight,
size_t InputWidth,
size_t BatchDepth,
125 size_t strideCols,
size_t paddingHeight,
size_t paddingWidth,
138 size_t strideCols,
Scalar_t dropoutProbability = 1.0);
147 bool rememberState =
false,
bool returnSequence =
false,
157 bool rememberState =
false,
bool returnSequence =
false);
166 bool rememberState =
false,
bool returnSequence =
false,
167 bool resetGateAfter =
false);
199 TCorruptionLayer<Architecture_t> *AddCorruptionLayer(
size_t visibleUnits,
size_t hiddenUnits,
204 void AddCorruptionLayer(TCorruptionLayer<Architecture_t> *corruptionLayer);
209 TCompressionLayer<Architecture_t> *AddCompressionLayer(
size_t visibleUnits,
size_t hiddenUnits,
211 std::vector<Matrix_t> weights, std::vector<Matrix_t> biases);
215 void AddCompressionLayer(TCompressionLayer<Architecture_t> *compressionLayer);
221 TReconstructionLayer<Architecture_t> *AddReconstructionLayer(
size_t visibleUnits,
size_t hiddenUnits,
223 std::vector<Matrix_t> weights,
224 std::vector<Matrix_t> biases,
Scalar_t corruptionLevel,
229 void AddReconstructionLayer(TReconstructionLayer<Architecture_t> *reconstructionLayer);
233 TLogisticRegressionLayer<Architecture_t> *AddLogisticRegressionLayer(
size_t inputUnits,
size_t outputUnits,
234 size_t testDataBatchSize,
239 void AddLogisticRegressionLayer(TLogisticRegressionLayer<Architecture_t> *logisticRegressionLayer);
243 void PreTrain(std::vector<Matrix_t> &input, std::vector<size_t> numHiddenUnitsPerLayer,
Scalar_t learningRate,
245 bool applyDropout =
false);
250 void FineTune(std::vector<Matrix_t> &input, std::vector<Matrix_t> &testInput, std::vector<Matrix_t> &outputLabel,
251 size_t outputUnits,
size_t testDataBatchSize,
Scalar_t learningRate,
size_t epochs);
269#ifdef USE_PARALLEL_DEEPNET
307 bool inTraining =
false,
bool includeRegularization =
true);
377template <
typename Architecture_t,
typename Layer_t>
387template <
typename Architecture_t,
typename Layer_t>
389 size_t batchDepth,
size_t batchHeight,
size_t batchWidth,
ELossFunction J,
399template <
typename Architecture_t,
typename Layer_t>
410template <
typename Architecture_t,
typename Layer_t>
420template <
typename Architecture_t,
typename Layer_t>
423 Scalar_t dimension = ((imgDim - fltDim + 2 * padding) / stride) + 1;
424 if (!
isInteger(dimension) || dimension <= 0) {
427 Fatal(
"calculateDimension",
"Not compatible hyper parameters for layer %d - (imageDim, filterDim, padding, stride) %d , %d , %d , %d",
428 iLayer, imgDim, fltDim, padding, stride);
434 return (
size_t)dimension;
438template <
typename Architecture_t,
typename Layer_t>
440 size_t filterWidth,
size_t strideRows,
441 size_t strideCols,
size_t paddingHeight,
459 Layer_t *lastLayer =
fLayers.back();
460 inputDepth = lastLayer->GetDepth();
461 inputHeight = lastLayer->GetHeight();
462 inputWidth = lastLayer->GetWidth();
469 batchSize, inputDepth, inputHeight, inputWidth, depth, init, filterHeight, filterWidth, strideRows,
470 strideCols, paddingHeight, paddingWidth, dropoutProbability,
f, reg, decay);
477template <
typename Architecture_t,
typename Layer_t>
484template <
typename Architecture_t,
typename Layer_t>
486 size_t strideRows,
size_t strideCols,
499 Layer_t *lastLayer =
fLayers.back();
500 inputDepth = lastLayer->GetDepth();
501 inputHeight = lastLayer->GetHeight();
502 inputWidth = lastLayer->GetWidth();
506 batchSize, inputDepth, inputHeight, inputWidth, frameHeight, frameWidth,
507 strideRows, strideCols, dropoutProbability);
510 fLayers.push_back(maxPoolLayer);
516template <
typename Architecture_t,
typename Layer_t>
519 fLayers.push_back(maxPoolLayer);
523template <
typename Architecture_t,
typename Layer_t>
526 bool rememberState,
bool returnSequence,
533 size_t inputHeight, inputWidth, inputDepth;
539 Layer_t *lastLayer =
fLayers.back();
540 inputHeight = lastLayer->GetHeight();
541 inputWidth = lastLayer->GetWidth();
542 inputDepth = lastLayer->GetDepth();
544 if (inputSize != inputWidth) {
545 Error(
"AddBasicRNNLayer",
"Inconsistent input size with input layout - it should be %zu instead of %zu",inputSize, inputWidth);
547 if (timeSteps != inputHeight && timeSteps != inputDepth) {
548 Error(
"AddBasicRNNLayer",
"Inconsistent time steps with input layout - it should be %zu instead of %zu or %zu",timeSteps, inputHeight,inputDepth);
554 fLayers.push_back(basicRNNLayer);
555 return basicRNNLayer;
559template <
typename Architecture_t,
typename Layer_t>
562 fLayers.push_back(basicRNNLayer);
566template <
typename Architecture_t,
typename Layer_t>
568 size_t timeSteps,
bool rememberState,
bool returnSequence)
571 size_t inputHeight, inputWidth, inputDepth;
577 Layer_t *lastLayer =
fLayers.back();
578 inputHeight = lastLayer->GetHeight();
579 inputWidth = lastLayer->GetWidth();
580 inputDepth = lastLayer->GetDepth();
582 if (inputSize != inputWidth) {
583 Error(
"AddBasicLSTMLayer",
"Inconsistent input size with input layout - it should be %zu instead of %zu", inputSize, inputWidth);
585 if (timeSteps != inputHeight && timeSteps != inputDepth) {
586 Error(
"AddBasicLSTMLayer",
"Inconsistent time steps with input layout - it should be %zu instead of %zu", timeSteps, inputHeight);
594 fLayers.push_back(basicLSTMLayer);
595 return basicLSTMLayer;
599template <
typename Architecture_t,
typename Layer_t>
602 fLayers.push_back(basicLSTMLayer);
607template <
typename Architecture_t,
typename Layer_t>
609 size_t timeSteps,
bool rememberState,
bool returnSequence,
bool resetGateAfter)
612 size_t inputHeight, inputWidth, inputDepth;
618 Layer_t *lastLayer =
fLayers.back();
619 inputHeight = lastLayer->GetHeight();
620 inputWidth = lastLayer->GetWidth();
621 inputDepth = lastLayer->GetDepth();
623 if (inputSize != inputWidth) {
624 Error(
"AddBasicGRULayer",
"Inconsistent input size with input layout - it should be %zu instead of %zu", inputSize, inputWidth);
626 if (timeSteps != inputHeight && timeSteps != inputDepth) {
627 Error(
"AddBasicGRULayer",
"Inconsistent time steps with input layout - it should be %zu instead of %zu", timeSteps, inputHeight);
635 fLayers.push_back(basicGRULayer);
636 return basicGRULayer;
640template <
typename Architecture_t,
typename Layer_t>
643 fLayers.push_back(basicGRULayer);
652template <
typename Architecture_t,
typename Layer_t>
655 Scalar_t dropoutProbability,
656 Scalar_t corruptionLevel)
658 size_t batchSize = this->GetBatchSize();
660 TCorruptionLayer<Architecture_t> *corruptionLayer =
661 new TCorruptionLayer<Architecture_t>(batchSize, visibleUnits, hiddenUnits, dropoutProbability, corruptionLevel);
662 fLayers.push_back(corruptionLayer);
663 return corruptionLayer;
667template <
typename Architecture_t,
typename Layer_t>
670 fLayers.push_back(corruptionLayer);
674template <
typename Architecture_t,
typename Layer_t>
675TCompressionLayer<Architecture_t> *TDeepNet<Architecture_t, Layer_t>::AddCompressionLayer(
676 size_t visibleUnits,
size_t hiddenUnits, Scalar_t dropoutProbability, EActivationFunction
f,
677 std::vector<Matrix_t> weights, std::vector<Matrix_t> biases)
679 size_t batchSize = this->GetBatchSize();
681 TCompressionLayer<Architecture_t> *compressionLayer =
new TCompressionLayer<Architecture_t>(
682 batchSize, visibleUnits, hiddenUnits, dropoutProbability,
f, weights, biases);
683 fLayers.push_back(compressionLayer);
684 return compressionLayer;
688template <
typename Architecture_t,
typename Layer_t>
691 fLayers.push_back(compressionLayer);
695template <
typename Architecture_t,
typename Layer_t>
697 size_t visibleUnits,
size_t hiddenUnits, Scalar_t learningRate,
EActivationFunction f, std::vector<Matrix_t> weights,
698 std::vector<Matrix_t> biases, Scalar_t corruptionLevel, Scalar_t dropoutProbability)
700 size_t batchSize = this->GetBatchSize();
702 TReconstructionLayer<Architecture_t> *reconstructionLayer =
new TReconstructionLayer<Architecture_t>(
703 batchSize, visibleUnits, hiddenUnits, learningRate,
f, weights, biases, corruptionLevel, dropoutProbability);
704 fLayers.push_back(reconstructionLayer);
705 return reconstructionLayer;
709template <
typename Architecture_t,
typename Layer_t>
711 TReconstructionLayer<Architecture_t> *reconstructionLayer)
713 fLayers.push_back(reconstructionLayer);
717template <
typename Architecture_t,
typename Layer_t>
719 size_t inputUnits,
size_t outputUnits,
size_t testDataBatchSize, Scalar_t learningRate)
721 size_t batchSize = this->GetBatchSize();
723 TLogisticRegressionLayer<Architecture_t> *logisticRegressionLayer =
724 new TLogisticRegressionLayer<Architecture_t>(batchSize, inputUnits, outputUnits, testDataBatchSize, learningRate);
725 fLayers.push_back(logisticRegressionLayer);
726 return logisticRegressionLayer;
729template <
typename Architecture_t,
typename Layer_t>
731 TLogisticRegressionLayer<Architecture_t> *logisticRegressionLayer)
733 fLayers.push_back(logisticRegressionLayer);
739template <
typename Architecture_t,
typename Layer_t>
752 Layer_t *lastLayer =
fLayers.back();
753 inputWidth = lastLayer->GetWidth();
765template <
typename Architecture_t,
typename Layer_t>
772template <
typename Architecture_t,
typename Layer_t>
774 size_t width,
bool flattening)
780 size_t outputNSlices;
789 Layer_t *lastLayer =
fLayers.back();
790 inputDepth = lastLayer->GetDepth();
791 inputHeight = lastLayer->GetHeight();
792 inputWidth = lastLayer->GetWidth();
798 outputNCols = depth * height * width;
799 size_t inputNCols = inputDepth * inputHeight * inputWidth;
800 if (outputNCols != 0 && outputNCols != inputNCols ) {
801 Info(
"AddReshapeLayer",
"Dimensions not compatibles - product of input %zu x %zu x %zu should be equal to output %zu x %zu x %zu - Force flattening output to be %zu",
802 inputDepth, inputHeight, inputWidth, depth, height, width,inputNCols);
804 outputNCols = inputNCols;
811 outputNCols = height * width;
816 outputNSlices, outputNRows, outputNCols, flattening);
818 fLayers.push_back(reshapeLayer);
824template <
typename Architecture_t,
typename Layer_t>
829 size_t inputDepth = 0;
830 size_t inputHeight = 0;
831 size_t inputWidth = 0;
834 std::vector<size_t> shape = {1, 1, 1};
840 shape[0] = batchSize;
841 shape[1] = inputWidth;
844 Layer_t *lastLayer =
fLayers.back();
845 inputDepth = lastLayer->GetDepth();
846 inputHeight = lastLayer->GetHeight();
847 inputWidth = lastLayer->GetWidth();
848 shape = lastLayer->GetOutput().GetShape();
852 if (shape.size() > 3) {
853 for (
size_t i = 3; i < shape.size(); ++i)
854 shape[2] *= shape[i];
869template <
typename Architecture_t,
typename Layer_t>
872 fLayers.push_back(reshapeLayer);
876template <
typename Architecture_t,
typename Layer_t>
879 for (
size_t i = 0; i <
fLayers.size(); i++) {
885template <
typename Architecture_t,
typename Layer_t>
888 for (
size_t i = 0; i <
fLayers.size(); i++) {
895template <
typename Architecture_t,
typename Layer_t>
898 fLayers.front()->Forward(input, applyDropout);
900 for (
size_t i = 1; i <
fLayers.size(); i++) {
910template <
typename Architecture_t,
typename Layer_t>
912 std::vector<size_t> numHiddenUnitsPerLayer, Scalar_t learningRate,
913 Scalar_t corruptionLevel, Scalar_t dropoutProbability,
size_t epochs,
916 std::vector<Matrix_t> inp1;
917 std::vector<Matrix_t> inp2;
918 size_t numOfHiddenLayers =
sizeof(numHiddenUnitsPerLayer) /
sizeof(numHiddenUnitsPerLayer[0]);
920 size_t visibleUnits = (size_t)input[0].GetNrows();
922 AddCorruptionLayer(visibleUnits, numHiddenUnitsPerLayer[0], dropoutProbability, corruptionLevel);
923 fLayers.back()->Initialize();
924 fLayers.back()->Forward(input, applyDropout);
927 AddCompressionLayer(visibleUnits, numHiddenUnitsPerLayer[0], dropoutProbability,
f, fLayers.back()->GetWeights(),
928 fLayers.back()->GetBiases());
929 fLayers.back()->Initialize();
930 fLayers.back()->Forward(fLayers[fLayers.size() - 2]->GetOutput(), applyDropout);
932 AddReconstructionLayer(visibleUnits, numHiddenUnitsPerLayer[0], learningRate,
f, fLayers.back()->GetWeights(),
933 fLayers.back()->GetBiases(), corruptionLevel, dropoutProbability);
934 fLayers.back()->Initialize();
935 fLayers.back()->Forward(fLayers[fLayers.size() - 2]->GetOutput(),
937 fLayers.back()->Backward(fLayers[fLayers.size() - 2]->GetOutput(), inp1, fLayers[fLayers.size() - 3]->GetOutput(),
940 size_t weightsSize = fLayers.back()->GetWeights().size();
941 size_t biasesSize = fLayers.back()->GetBiases().size();
942 for (
size_t epoch = 0; epoch < epochs - 1; epoch++) {
944 for (
size_t j = 0; j < weightsSize; j++) {
945 Architecture_t::Copy(fLayers[fLayers.size() - 2]->GetWeightsAt(j), fLayers.back()->GetWeightsAt(j));
947 for (
size_t j = 0; j < biasesSize; j++) {
948 Architecture_t::Copy(fLayers[fLayers.size() - 2]->GetBiasesAt(j), fLayers.back()->GetBiasesAt(j));
950 fLayers[fLayers.size() - 2]->Forward(fLayers[fLayers.size() - 3]->GetOutput(), applyDropout);
951 fLayers[fLayers.size() - 1]->Forward(fLayers[fLayers.size() - 2]->GetOutput(), applyDropout);
952 fLayers[fLayers.size() - 1]->Backward(fLayers[fLayers.size() - 2]->GetOutput(), inp1,
953 fLayers[fLayers.size() - 3]->GetOutput(), input);
955 fLayers.back()->Print();
957 for (
size_t i = 1; i < numOfHiddenLayers; i++) {
959 AddCorruptionLayer(numHiddenUnitsPerLayer[i - 1], numHiddenUnitsPerLayer[i], dropoutProbability, corruptionLevel);
960 fLayers.back()->Initialize();
961 fLayers.back()->Forward(fLayers[fLayers.size() - 3]->GetOutput(),
964 AddCompressionLayer(numHiddenUnitsPerLayer[i - 1], numHiddenUnitsPerLayer[i], dropoutProbability,
f,
965 fLayers.back()->GetWeights(), fLayers.back()->GetBiases());
966 fLayers.back()->Initialize();
967 fLayers.back()->Forward(fLayers[fLayers.size() - 2]->GetOutput(), applyDropout);
969 AddReconstructionLayer(numHiddenUnitsPerLayer[i - 1], numHiddenUnitsPerLayer[i], learningRate,
f,
970 fLayers.back()->GetWeights(), fLayers.back()->GetBiases(), corruptionLevel,
972 fLayers.back()->Initialize();
973 fLayers.back()->Forward(fLayers[fLayers.size() - 2]->GetOutput(),
975 fLayers.back()->Backward(fLayers[fLayers.size() - 2]->GetOutput(), inp1, fLayers[fLayers.size() - 3]->GetOutput(),
976 fLayers[fLayers.size() - 5]->GetOutput());
979 size_t _weightsSize = fLayers.back()->GetWeights().size();
980 size_t _biasesSize = fLayers.back()->GetBiases().size();
981 for (
size_t epoch = 0; epoch < epochs - 1; epoch++) {
983 for (
size_t j = 0; j < _weightsSize; j++) {
984 Architecture_t::Copy(fLayers[fLayers.size() - 2]->GetWeightsAt(j), fLayers.back()->GetWeightsAt(j));
986 for (
size_t j = 0; j < _biasesSize; j++) {
987 Architecture_t::Copy(fLayers[fLayers.size() - 2]->GetBiasesAt(j), fLayers.back()->GetBiasesAt(j));
989 fLayers[fLayers.size() - 2]->Forward(fLayers[fLayers.size() - 3]->GetOutput(), applyDropout);
990 fLayers[fLayers.size() - 1]->Forward(fLayers[fLayers.size() - 2]->GetOutput(), applyDropout);
991 fLayers[fLayers.size() - 1]->Backward(fLayers[fLayers.size() - 2]->GetOutput(), inp1,
992 fLayers[fLayers.size() - 3]->GetOutput(),
993 fLayers[fLayers.size() - 5]->GetOutput());
995 fLayers.back()->Print();
1000template <
typename Architecture_t,
typename Layer_t>
1002 std::vector<Matrix_t> &inputLabel,
size_t outputUnits,
1003 size_t testDataBatchSize, Scalar_t learningRate,
size_t epochs) ->
void
1005 std::vector<Matrix_t> inp1;
1006 std::vector<Matrix_t> inp2;
1007 if (fLayers.size() == 0)
1009 size_t inputUnits = input[0].GetNrows();
1011 AddLogisticRegressionLayer(inputUnits, outputUnits, testDataBatchSize, learningRate);
1012 fLayers.back()->Initialize();
1013 for (
size_t i = 0; i < epochs; i++) {
1014 fLayers.back()->Backward(inputLabel, inp1, input, inp2);
1016 fLayers.back()->Forward(input,
false);
1017 fLayers.back()->Print();
1019 size_t inputUnits = fLayers.back()->GetOutputAt(0).GetNrows();
1020 AddLogisticRegressionLayer(inputUnits, outputUnits, testDataBatchSize, learningRate);
1021 fLayers.back()->Initialize();
1022 for (
size_t i = 0; i < epochs; i++) {
1023 fLayers.back()->Backward(inputLabel, inp1, fLayers[fLayers.size() - 2]->GetOutput(), inp2);
1025 fLayers.back()->Forward(testInput,
false);
1026 fLayers.back()->Print();
1032template <
typename Architecture_t,
typename Layer_t>
1042 last_output, weights);
1044 for (
size_t i =
fLayers.size() - 1; i > 0; i--) {
1045 auto &activation_gradient_backward =
fLayers[i - 1]->GetActivationGradients();
1046 auto &activations_backward =
fLayers[i - 1]->GetOutput();
1047 fLayers[i]->Backward(activation_gradient_backward, activations_backward);
1053 fLayers[0]->Backward(dummy, input);
1056#ifdef USE_PARALLEL_DEEPNET
1059template <
typename Architecture_t,
typename Layer_t>
1062 bool applyDropout) ->
void
1064 size_t depth = this->GetDepth();
1067 for (
size_t i = 0; i < nets.size(); i++) {
1068 nets[i].GetLayerAt(0)->Forward(batches[i].GetInput(), applyDropout);
1072 for (
size_t i = 1; i < depth; i++) {
1073 for (
size_t j = 0; j < nets.size(); j++) {
1074 nets[j].GetLayerAt(i)->Forward(nets[j].GetLayerAt(i - 1)->GetOutput(), applyDropout);
1080template <
typename Architecture_t,
typename Layer_t>
1083 Scalar_t learningRate) ->
void
1085 std::vector<Matrix_t> inp1;
1086 std::vector<Matrix_t> inp2;
1087 size_t depth = this->GetDepth();
1090 for (
size_t i = 0; i < nets.size(); i++) {
1092 nets[i].GetLossFunction(), batches[i].GetOutput(),
1093 nets[i].GetLayerAt(depth - 1)->GetOutputAt(0), batches[i].GetWeights());
1097 for (
size_t i = depth - 1; i > 0; i--) {
1098 for (
size_t j = 0; j < nets.size(); j++) {
1099 nets[j].GetLayerAt(i)->Backward(nets[j].GetLayerAt(i - 1)->GetActivationGradients(),
1100 nets[j].GetLayerAt(i - 1)->GetOutput(), inp1, inp2);
1104 std::vector<Matrix_t> dummy;
1107 for (
size_t i = 0; i < nets.size(); i++) {
1108 nets[i].GetLayerAt(0)->Backward(dummy, batches[i].GetInput(), inp1, inp2);
1112 for (
size_t i = 0; i < nets.size(); i++) {
1113 for (
size_t j = 0; j < depth; j++) {
1114 Layer_t *masterLayer = this->GetLayerAt(j);
1115 Layer_t *layer = nets[i].GetLayerAt(j);
1117 masterLayer->UpdateWeights(layer->GetWeightGradients(), learningRate);
1118 layer->CopyWeights(masterLayer->GetWeights());
1120 masterLayer->UpdateBiases(layer->GetBiasGradients(), learningRate);
1121 layer->CopyBiases(masterLayer->GetBiases());
1127template <
typename Architecture_t,
typename Layer_t>
1130 Scalar_t learningRate, Scalar_t momentum) ->
void
1132 std::vector<Matrix_t> inp1;
1133 std::vector<Matrix_t> inp2;
1134 size_t depth = this->GetDepth();
1137 for (
size_t i = 0; i < nets.size(); i++) {
1139 nets[i].GetLossFunction(), batches[i].GetOutput(),
1140 nets[i].GetLayerAt(depth - 1)->GetOutputAt(0), batches[i].GetWeights());
1144 for (
size_t i = depth - 1; i > 0; i--) {
1145 Layer_t *masterLayer = this->GetLayerAt(i);
1147 for (
size_t j = 0; j < nets.size(); j++) {
1148 Layer_t *layer = nets[j].GetLayerAt(i);
1150 layer->Backward(nets[j].GetLayerAt(i - 1)->GetActivationGradients(), nets[j].GetLayerAt(i - 1)->GetOutput(),
1152 masterLayer->UpdateWeightGradients(layer->GetWeightGradients(), learningRate / momentum);
1153 masterLayer->UpdateBiasGradients(layer->GetBiasGradients(), learningRate / momentum);
1156 masterLayer->UpdateWeightGradients(masterLayer->GetWeightGradients(), 1.0 - momentum);
1157 masterLayer->UpdateBiasGradients(masterLayer->GetBiasGradients(), 1.0 - momentum);
1160 std::vector<Matrix_t> dummy;
1163 Layer_t *masterFirstLayer = this->GetLayerAt(0);
1164 for (
size_t i = 0; i < nets.size(); i++) {
1165 Layer_t *layer = nets[i].GetLayerAt(0);
1167 layer->Backward(dummy, batches[i].GetInput(), inp1, inp2);
1169 masterFirstLayer->UpdateWeightGradients(layer->GetWeightGradients(), learningRate / momentum);
1170 masterFirstLayer->UpdateBiasGradients(layer->GetBiasGradients(), learningRate / momentum);
1173 masterFirstLayer->UpdateWeightGradients(masterFirstLayer->GetWeightGradients(), 1.0 - momentum);
1174 masterFirstLayer->UpdateBiasGradients(masterFirstLayer->GetBiasGradients(), 1.0 - momentum);
1176 for (
size_t i = 0; i < depth; i++) {
1177 Layer_t *masterLayer = this->GetLayerAt(i);
1178 masterLayer->Update(1.0);
1180 for (
size_t j = 0; j < nets.size(); j++) {
1181 Layer_t *layer = nets[j].GetLayerAt(i);
1183 layer->CopyWeights(masterLayer->GetWeights());
1184 layer->CopyBiases(masterLayer->GetBiases());
1190template <
typename Architecture_t,
typename Layer_t>
1193 Scalar_t learningRate, Scalar_t momentum) ->
void
1195 std::cout <<
"Parallel Backward Nestorov" << std::endl;
1196 std::vector<Matrix_t> inp1;
1197 std::vector<Matrix_t> inp2;
1198 size_t depth = this->GetDepth();
1201 for (
size_t i = 0; i < nets.size(); i++) {
1203 nets[i].GetLossFunction(), batches[i].GetOutput(),
1204 nets[i].GetLayerAt(depth - 1)->GetOutputAt(0), batches[i].GetWeights());
1208 for (
size_t i = depth - 1; i > 0; i--) {
1209 for (
size_t j = 0; j < nets.size(); j++) {
1210 Layer_t *layer = nets[j].GetLayerAt(i);
1212 layer->Backward(nets[j].GetLayerAt(i - 1)->GetActivationGradients(), nets[j].GetLayerAt(i - 1)->GetOutput(),
1217 std::vector<Matrix_t> dummy;
1220 for (
size_t i = 0; i < nets.size(); i++) {
1221 Layer_t *layer = nets[i].GetLayerAt(0);
1222 layer->Backward(dummy, batches[i].GetInput(), inp1, inp2);
1225 for (
size_t i = 0; i < depth; i++) {
1226 Layer_t *masterLayer = this->GetLayerAt(i);
1227 for (
size_t j = 0; j < nets.size(); j++) {
1228 Layer_t *layer = nets[j].GetLayerAt(i);
1230 layer->CopyWeights(masterLayer->GetWeights());
1231 layer->CopyBiases(masterLayer->GetBiases());
1233 layer->UpdateWeights(masterLayer->GetWeightGradients(), 1.0);
1234 layer->UpdateBiases(masterLayer->GetBiasGradients(), 1.0);
1237 for (
size_t j = 0; j < nets.size(); j++) {
1238 Layer_t *layer = nets[j].GetLayerAt(i);
1240 masterLayer->UpdateWeightGradients(layer->GetWeightGradients(), learningRate / momentum);
1241 masterLayer->UpdateBiasGradients(layer->GetBiasGradients(), learningRate / momentum);
1244 masterLayer->UpdateWeightGradients(masterLayer->GetWeightGradients(), 1.0 - momentum);
1245 masterLayer->UpdateBiasGradients(masterLayer->GetBiasGradients(), 1.0 - momentum);
1247 masterLayer->Update(1.0);
1253template <
typename Architecture_t,
typename Layer_t>
1256 for (
size_t i = 0; i <
fLayers.size(); i++) {
1257 fLayers[i]->Update(learningRate);
1262template <
typename Architecture_t,
typename Layer_t>
1264 bool includeRegularization)
const ->
Scalar_t
1270 if (includeRegularization) {
1278template <
typename Architecture_t,
typename Layer_t>
1280 const Matrix_t &weights,
bool inTraining,
bool includeRegularization)
1284 return Loss(groundTruth, weights, includeRegularization);
1288template <
typename Architecture_t,
typename Layer_t>
1292 for (
size_t i = 0; i <
fLayers.size(); i++) {
1293 for (
size_t j = 0; j < (
fLayers[i]->GetWeights()).
size(); j++) {
1302template <
typename Architecture_t,
typename Layer_t>
1310template <
typename Architecture_t,
typename Layer_t>
1320template <
typename Architecture_t,
typename Layer_t>
1323 std::cout <<
"DEEP NEURAL NETWORK: Depth = " << this->
GetDepth();
1328 std::cout <<
" Loss function = " <<
static_cast<char>(this->
GetLossFunction()) << std::endl;
1332 for (
size_t i = 0; i <
fLayers.size(); i++) {
1333 std::cout <<
"\tLayer " << i <<
"\t";
1339template <
typename Architecture_t,
typename Layer_t>
1341 const std::vector<Double_t> & probabilities)
1343 for (
size_t i = 0; i <
fLayers.size(); i++) {
1344 if (i < probabilities.size()) {
1345 fLayers[i]->SetDropoutProbability(probabilities[i]);
1347 fLayers[i]->SetDropoutProbability(1.0);
#define R(a, b, c, d, e, f, g, h, i)
size_t size(const MatrixT &matrix)
retrieve the size of a square matrix
double evaluate() const override
Error("WriteTObject","The current directory (%s) is not associated with a file. The object (%s) has not been written.", GetName(), objname)
void Info(const char *location, const char *msgfmt,...)
Use this function for informational messages.
void Fatal(const char *location, const char *msgfmt,...)
Use this function in case of a fatal error. It will abort the program.
Generic Max Pooling Layer class.
Layer implementing Batch Normalization.
Generic Deep Neural Network class.
const std::vector< Layer_t * > & GetLayers() const
void AddDenseLayer(TDenseLayer< Architecture_t > *denseLayer)
Function for adding Dense Layer in the Deep Neural Network, when the layer is already created.
size_t GetBatchHeight() const
void SetBatchDepth(size_t batchDepth)
void Forward(Tensor_t &input, bool applyDropout=false)
Function that executes the entire forward pass in the network.
void SetLossFunction(ELossFunction J)
ERegularization GetRegularization() const
void AddBasicGRULayer(TBasicGRULayer< Architecture_t > *basicGRULayer)
Function for adding GRU Layer in the Deep Neural Network, when the layer is already created.
std::vector< Layer_t * > & GetLayers()
typename Architecture_t::Scalar_t Scalar_t
void Initialize()
DAE functions.
size_t GetBatchSize() const
Getters.
Scalar_t GetWeightDecay() const
size_t GetInputDepth() const
TBatchNormLayer< Architecture_t > * AddBatchNormLayer(Scalar_t momentum=-1, Scalar_t epsilon=0.0001)
Function for adding a Batch Normalization layer with given parameters.
void Backward(const Tensor_t &input, const Matrix_t &groundTruth, const Matrix_t &weights)
Function that executes the entire backward pass in the network.
std::vector< VGeneralLayer< ArchitectureImpl_t > * > fLayers
Layer_t * GetLayerAt(size_t i)
Get the layer in the vector of layers at position i.
void Print() const
Print the Deep Net Info.
TBasicGRULayer< Architecture_t > * AddBasicGRULayer(size_t stateSize, size_t inputSize, size_t timeSteps, bool rememberState=false, bool returnSequence=false, bool resetGateAfter=false)
Function for adding GRU Layer in the Deep Neural Network, with given parameters.
void SetWeightDecay(Scalar_t weightDecay)
void AddReshapeLayer(TReshapeLayer< Architecture_t > *reshapeLayer)
Function for adding Reshape Layer in the Deep Neural Network, when the layer is already created.
void Clear()
Remove all layers from the network.
Scalar_t RegularizationTerm() const
Function for computing the regularizaton term to be added to the loss function.
TDenseLayer< Architecture_t > * AddDenseLayer(size_t width, EActivationFunction f, Scalar_t dropoutProbability=1.0)
Function for adding Dense Connected Layer in the Deep Neural Network, with a given width,...
TDeepNet(size_t BatchSize, size_t InputDepth, size_t InputHeight, size_t InputWidth, size_t BatchDepth, size_t BatchHeight, size_t BatchWidth, ELossFunction fJ, EInitialization fI=EInitialization::kZero, ERegularization fR=ERegularization::kNone, Scalar_t fWeightDecay=0.0, bool isTraining=false)
Constructor.
void Prediction(Matrix_t &predictions, Tensor_t &input, EOutputFunction f)
Prediction for the given inputs, based on what network learned.
void SetInputDepth(size_t inputDepth)
size_t GetInputHeight() const
void Prediction(Matrix_t &predictions, EOutputFunction f) const
Prediction based on activations stored in the last layer.
void SetInputHeight(size_t inputHeight)
size_t GetBatchWidth() const
void AddBasicRNNLayer(TBasicRNNLayer< Architecture_t > *basicRNNLayer)
Function for adding Vanilla RNN when the layer is already created.
TBasicLSTMLayer< Architecture_t > * AddBasicLSTMLayer(size_t stateSize, size_t inputSize, size_t timeSteps, bool rememberState=false, bool returnSequence=false)
Function for adding LSTM Layer in the Deep Neural Network, with given parameters.
void AddMaxPoolLayer(CNN::TMaxPoolLayer< Architecture_t > *maxPoolLayer)
Function for adding Max Pooling layer in the Deep Neural Network, when the layer is already created.
TMaxPoolLayer< Architecture_t > * AddMaxPoolLayer(size_t frameHeight, size_t frameWidth, size_t strideRows, size_t strideCols, Scalar_t dropoutProbability=1.0)
Function for adding Pooling layer in the Deep Neural Network, with a given filter height and width,...
Scalar_t Loss(const Matrix_t &groundTruth, const Matrix_t &weights, bool includeRegularization=true) const
Function for evaluating the loss, based on the activations stored in the last layer.
TConvLayer< Architecture_t > * AddConvLayer(size_t depth, size_t filterHeight, size_t filterWidth, size_t strideRows, size_t strideCols, size_t paddingHeight, size_t paddingWidth, EActivationFunction f, Scalar_t dropoutProbability=1.0)
Function for adding Convolution layer in the Deep Neural Network, with a given depth,...
void ResetTraining()
Function that reset some training flags after looping all the events but not the weights.
size_t GetInputWidth() const
bool isInteger(Scalar_t x) const
size_t GetOutputWidth() const
TReshapeLayer< Architecture_t > * AddReshapeLayer(size_t depth, size_t height, size_t width, bool flattening)
Function for adding Reshape Layer in the Deep Neural Network, with a given height and width.
void SetBatchSize(size_t batchSize)
Setters.
void AddConvLayer(TConvLayer< Architecture_t > *convLayer)
Function for adding Convolution Layer in the Deep Neural Network, when the layer is already created.
void SetRegularization(ERegularization R)
TDeepNet(const TDeepNet &)
Copy-constructor.
typename Architecture_t::Tensor_t Tensor_t
TBasicRNNLayer< Architecture_t > * AddBasicRNNLayer(size_t stateSize, size_t inputSize, size_t timeSteps, bool rememberState=false, bool returnSequence=false, EActivationFunction f=EActivationFunction::kTanh)
Function for adding Recurrent Layer in the Deep Neural Network, with given parameters.
void SetBatchWidth(size_t batchWidth)
void SetDropoutProbabilities(const std::vector< Double_t > &probabilities)
TDeepNet()
Default Constructor.
void SetBatchHeight(size_t batchHeight)
void Update(Scalar_t learningRate)
Function that will update the weights and biases in the layers that contain weights and biases.
ELossFunction GetLossFunction() const
size_t calculateDimension(int imgDim, int fltDim, int padding, int stride)
const Layer_t * GetLayerAt(size_t i) const
void SetInitialization(EInitialization I)
EInitialization GetInitialization() const
void SetInputWidth(size_t inputWidth)
typename Architecture_t::Matrix_t Matrix_t
void AddBasicLSTMLayer(TBasicLSTMLayer< Architecture_t > *basicLSTMLayer)
Function for adding LSTM Layer in the Deep Neural Network, when the layer is already created.
Scalar_t Loss(Tensor_t &input, const Matrix_t &groundTruth, const Matrix_t &weights, bool inTraining=false, bool includeRegularization=true)
Function for evaluating the loss, based on the propagation of the given input.
size_t GetBatchDepth() const
EOutputFunction
Enum that represents output functions.
double weightDecay(double error, ItWeight itWeight, ItWeight itWeightEnd, double factorWeightDecay, EnumRegularization eRegularization)
compute the weight decay for regularization (L1 or L2)
auto regularization(const typename Architecture_t::Matrix_t &A, ERegularization R) -> decltype(Architecture_t::L1Regularization(A))
Evaluate the regularization functional for a given weight matrix.
ERegularization
Enum representing the regularization type applied for a given layer.
EActivationFunction
Enum that represents layer activation functions.
ELossFunction
Enum that represents objective functions for the net, i.e.
void evaluateGradients(typename Architecture_t::Matrix_t &dY, ELossFunction f, const typename Architecture_t::Matrix_t &Y, const typename Architecture_t::Matrix_t &output, const typename Architecture_t::Matrix_t &weights)
Compute the gradient of the given output function f for given activations output of the output layer ...
create variable transformations