29#ifndef TMVA_DNN_DEEPNET
30#define TMVA_DNN_DEEPNET
47#include "TMVA/DNN/DAE/CompressionLayer.h"
48#include "TMVA/DNN/DAE/CorruptionLayer.h"
49#include "TMVA/DNN/DAE/ReconstructionLayer.h"
50#include "TMVA/DNN/DAE/LogisticRegressionLayer.h"
73template <
typename Architecture_t,
typename Layer_t = VGeneralLayer<Architecture_t>>
76 using Matrix_t =
typename Architecture_t::Matrix_t;
77 using Scalar_t =
typename Architecture_t::Scalar_t;
107 TDeepNet(
size_t BatchSize,
size_t InputDepth,
size_t InputHeight,
size_t InputWidth,
size_t BatchDepth,
123 size_t strideCols,
size_t paddingHeight,
size_t paddingWidth,
136 size_t strideCols,
Scalar_t dropoutProbability = 1.0);
145 bool rememberState =
false);
174 TCorruptionLayer<Architecture_t> *AddCorruptionLayer(
size_t visibleUnits,
size_t hiddenUnits,
179 void AddCorruptionLayer(TCorruptionLayer<Architecture_t> *corruptionLayer);
184 TCompressionLayer<Architecture_t> *AddCompressionLayer(
size_t visibleUnits,
size_t hiddenUnits,
186 std::vector<Matrix_t> weights, std::vector<Matrix_t> biases);
190 void AddCompressionLayer(TCompressionLayer<Architecture_t> *compressionLayer);
196 TReconstructionLayer<Architecture_t> *AddReconstructionLayer(
size_t visibleUnits,
size_t hiddenUnits,
198 std::vector<Matrix_t> weights,
199 std::vector<Matrix_t> biases,
Scalar_t corruptionLevel,
204 void AddReconstructionLayer(TReconstructionLayer<Architecture_t> *reconstructionLayer);
208 TLogisticRegressionLayer<Architecture_t> *AddLogisticRegressionLayer(
size_t inputUnits,
size_t outputUnits,
209 size_t testDataBatchSize,
214 void AddLogisticRegressionLayer(TLogisticRegressionLayer<Architecture_t> *logisticRegressionLayer);
218 void PreTrain(std::vector<Matrix_t> &input, std::vector<size_t> numHiddenUnitsPerLayer,
Scalar_t learningRate,
220 bool applyDropout =
false);
225 void FineTune(std::vector<Matrix_t> &input, std::vector<Matrix_t> &testInput, std::vector<Matrix_t> &outputLabel,
226 size_t outputUnits,
size_t testDataBatchSize,
Scalar_t learningRate,
size_t epochs);
233 void Forward(std::vector<Matrix_t> &input,
bool applyDropout =
false);
274 bool applyDropout =
false,
bool includeRegularization =
true);
344template <
typename Architecture_t,
typename Layer_t>
346 : fLayers(), fBatchSize(0), fInputDepth(0), fInputHeight(0), fInputWidth(0), fBatchDepth(0), fBatchHeight(0),
348 fIsTraining(true), fWeightDecay(0.0)
354template <
typename Architecture_t,
typename Layer_t>
356 size_t batchDepth,
size_t batchHeight,
size_t batchWidth,
ELossFunction J,
358 : fLayers(), fBatchSize(batchSize), fInputDepth(inputDepth), fInputHeight(inputHeight), fInputWidth(inputWidth),
359 fBatchDepth(batchDepth), fBatchHeight(batchHeight), fBatchWidth(batchWidth), fIsTraining(isTraining), fJ(J), fI(
I),
366template <
typename Architecture_t,
typename Layer_t>
368 : fLayers(), fBatchSize(deepNet.fBatchSize), fInputDepth(deepNet.fInputDepth), fInputHeight(deepNet.fInputHeight),
369 fInputWidth(deepNet.fInputWidth), fBatchDepth(deepNet.fBatchDepth), fBatchHeight(deepNet.fBatchHeight),
370 fBatchWidth(deepNet.fBatchWidth), fIsTraining(deepNet.fIsTraining), fJ(deepNet.fJ), fI(deepNet.fI), fR(deepNet.fR),
371 fWeightDecay(deepNet.fWeightDecay)
377template <
typename Architecture_t,
typename Layer_t>
384template <
typename Architecture_t,
typename Layer_t>
387 Scalar_t dimension = ((imgDim - fltDim + 2 * padding) / stride) + 1;
388 if (!isInteger(dimension) || dimension <= 0) {
390 int iLayer = fLayers.size();
391 Fatal(
"calculateDimension",
"Not compatible hyper parameters for layer %d - (imageDim, filterDim, padding, stride) %d , %d , %d , %d",
392 iLayer, imgDim, fltDim, padding, stride);
398 return (
size_t)dimension;
402template <
typename Architecture_t,
typename Layer_t>
404 size_t filterWidth,
size_t strideRows,
405 size_t strideCols,
size_t paddingHeight,
410 size_t batchSize = this->GetBatchSize();
416 Scalar_t decay = this->GetWeightDecay();
418 if (fLayers.size() == 0) {
419 inputDepth = this->GetInputDepth();
420 inputHeight = this->GetInputHeight();
421 inputWidth = this->GetInputWidth();
423 Layer_t *lastLayer = fLayers.back();
424 inputDepth = lastLayer->GetDepth();
425 inputHeight = lastLayer->GetHeight();
426 inputWidth = lastLayer->GetWidth();
433 batchSize, inputDepth, inputHeight, inputWidth, depth,
init, filterHeight, filterWidth, strideRows,
434 strideCols, paddingHeight, paddingWidth, dropoutProbability,
f, reg, decay);
436 fLayers.push_back(convLayer);
441template <
typename Architecture_t,
typename Layer_t>
444 fLayers.push_back(convLayer);
448template <
typename Architecture_t,
typename Layer_t>
450 size_t strideRows,
size_t strideCols,
453 size_t batchSize = this->GetBatchSize();
458 if (fLayers.size() == 0) {
459 inputDepth = this->GetInputDepth();
460 inputHeight = this->GetInputHeight();
461 inputWidth = this->GetInputWidth();
463 Layer_t *lastLayer = fLayers.back();
464 inputDepth = lastLayer->GetDepth();
465 inputHeight = lastLayer->GetHeight();
466 inputWidth = lastLayer->GetWidth();
470 batchSize, inputDepth, inputHeight, inputWidth, frameHeight, frameWidth,
471 strideRows, strideCols, dropoutProbability);
474 fLayers.push_back(maxPoolLayer);
480template <
typename Architecture_t,
typename Layer_t>
483 fLayers.push_back(maxPoolLayer);
487template <
typename Architecture_t,
typename Layer_t>
496 size_t inputHeight, inputWidth;
497 if (fLayers.size() == 0) {
498 inputHeight = this->GetInputHeight();
499 inputWidth = this->GetInputWidth();
501 Layer_t *lastLayer = fLayers.back();
502 inputHeight = lastLayer->GetHeight();
503 inputWidth = lastLayer->GetWidth();
505 if (inputSize != inputWidth) {
506 Error(
"AddBasicRNNLayer",
"Inconsistent input size with input layout - it should be %zu instead of %zu",inputSize, inputWidth);
508 if (timeSteps != inputHeight) {
509 Error(
"AddBasicRNNLayer",
"Inconsistent time steps with input layout - it should be %zu instead of %zu",timeSteps, inputHeight);
515 fLayers.push_back(basicRNNLayer);
516 return basicRNNLayer;
520template <
typename Architecture_t,
typename Layer_t>
523 fLayers.push_back(basicRNNLayer);
530template <
typename Architecture_t,
typename Layer_t>
533 Scalar_t dropoutProbability,
534 Scalar_t corruptionLevel)
536 size_t batchSize = this->GetBatchSize();
538 TCorruptionLayer<Architecture_t> *corruptionLayer =
539 new TCorruptionLayer<Architecture_t>(batchSize, visibleUnits, hiddenUnits, dropoutProbability, corruptionLevel);
540 fLayers.push_back(corruptionLayer);
541 return corruptionLayer;
545template <
typename Architecture_t,
typename Layer_t>
548 fLayers.push_back(corruptionLayer);
552template <
typename Architecture_t,
typename Layer_t>
553TCompressionLayer<Architecture_t> *TDeepNet<Architecture_t, Layer_t>::AddCompressionLayer(
555 std::vector<Matrix_t> weights, std::vector<Matrix_t> biases)
557 size_t batchSize = this->GetBatchSize();
559 TCompressionLayer<Architecture_t> *compressionLayer =
new TCompressionLayer<Architecture_t>(
560 batchSize, visibleUnits, hiddenUnits, dropoutProbability,
f, weights, biases);
561 fLayers.push_back(compressionLayer);
562 return compressionLayer;
566template <
typename Architecture_t,
typename Layer_t>
567void TDeepNet<Architecture_t, Layer_t>::AddCompressionLayer(TCompressionLayer<Architecture_t> *compressionLayer)
569 fLayers.push_back(compressionLayer);
573template <
typename Architecture_t,
typename Layer_t>
574TReconstructionLayer<Architecture_t> *TDeepNet<Architecture_t, Layer_t>::AddReconstructionLayer(
575 size_t visibleUnits,
size_t hiddenUnits, Scalar_t learningRate,
EActivationFunction f, std::vector<Matrix_t> weights,
576 std::vector<Matrix_t> biases, Scalar_t corruptionLevel, Scalar_t dropoutProbability)
578 size_t batchSize = this->GetBatchSize();
580 TReconstructionLayer<Architecture_t> *reconstructionLayer =
new TReconstructionLayer<Architecture_t>(
581 batchSize, visibleUnits, hiddenUnits, learningRate,
f, weights, biases, corruptionLevel, dropoutProbability);
582 fLayers.push_back(reconstructionLayer);
583 return reconstructionLayer;
587template <
typename Architecture_t,
typename Layer_t>
588void TDeepNet<Architecture_t, Layer_t>::AddReconstructionLayer(
589 TReconstructionLayer<Architecture_t> *reconstructionLayer)
591 fLayers.push_back(reconstructionLayer);
595template <
typename Architecture_t,
typename Layer_t>
596TLogisticRegressionLayer<Architecture_t> *TDeepNet<Architecture_t, Layer_t>::AddLogisticRegressionLayer(
597 size_t inputUnits,
size_t outputUnits,
size_t testDataBatchSize, Scalar_t learningRate)
599 size_t batchSize = this->GetBatchSize();
601 TLogisticRegressionLayer<Architecture_t> *logisticRegressionLayer =
602 new TLogisticRegressionLayer<Architecture_t>(batchSize, inputUnits, outputUnits, testDataBatchSize, learningRate);
603 fLayers.push_back(logisticRegressionLayer);
604 return logisticRegressionLayer;
607template <
typename Architecture_t,
typename Layer_t>
608void TDeepNet<Architecture_t, Layer_t>::AddLogisticRegressionLayer(
609 TLogisticRegressionLayer<Architecture_t> *logisticRegressionLayer)
611 fLayers.push_back(logisticRegressionLayer);
617template <
typename Architecture_t,
typename Layer_t>
621 size_t batchSize = this->GetBatchSize();
625 Scalar_t decay = this->GetWeightDecay();
627 if (fLayers.size() == 0) {
628 inputWidth = this->GetInputWidth();
630 Layer_t *lastLayer = fLayers.back();
631 inputWidth = lastLayer->GetWidth();
637 fLayers.push_back(denseLayer);
643template <
typename Architecture_t,
typename Layer_t>
646 fLayers.push_back(denseLayer);
650template <
typename Architecture_t,
typename Layer_t>
652 size_t width,
bool flattening)
654 size_t batchSize = this->GetBatchSize();
658 size_t outputNSlices;
662 if (fLayers.size() == 0) {
663 inputDepth = this->GetInputDepth();
664 inputHeight = this->GetInputHeight();
665 inputWidth = this->GetInputWidth();
667 Layer_t *lastLayer = fLayers.back();
668 inputDepth = lastLayer->GetDepth();
669 inputHeight = lastLayer->GetHeight();
670 inputWidth = lastLayer->GetWidth();
675 outputNRows = this->GetBatchSize();
676 outputNCols = depth * height *
width;
677 size_t inputNCols = inputDepth * inputHeight * inputWidth;
678 if (outputNCols != 0 && outputNCols != inputNCols ) {
679 Info(
"AddReshapeLayer",
"Dimensions not compatibles - product of input %zu x %zu x %zu should be equal to output %zu x %zu x %zu - Force flattening output to be %zu",
680 inputDepth, inputHeight, inputWidth, depth, height,
width,inputNCols);
682 outputNCols = inputNCols;
687 outputNSlices = this->GetBatchSize();
689 outputNCols = height *
width;
694 outputNSlices, outputNRows, outputNCols, flattening);
696 fLayers.push_back(reshapeLayer);
702template <
typename Architecture_t,
typename Layer_t>
705 fLayers.push_back(reshapeLayer);
709template <
typename Architecture_t,
typename Layer_t>
712 for (
size_t i = 0; i < fLayers.size(); i++) {
713 fLayers[i]->Initialize();
717template <
typename Architecture>
718auto debugTensor(
const std::vector<typename Architecture::Matrix_t> &
A,
const std::string
name =
"tensor") ->
void
720 std::cout <<
name <<
"\n";
721 for (
size_t l = 0;
l <
A.size(); ++
l) {
722 for (
size_t i = 0; i <
A[
l].GetNrows(); ++i) {
723 for (
size_t j = 0; j <
A[
l].GetNcols(); ++j) {
724 std::cout <<
A[
l](i, j) <<
" ";
728 std::cout <<
"********\n";
733template <
typename Architecture_t,
typename Layer_t>
736 fLayers.front()->Forward(input, applyDropout);
738 for (
size_t i = 1; i < fLayers.size(); i++) {
739 fLayers[i]->Forward(fLayers[i - 1]->GetOutput(), applyDropout);
744template <
typename Architecture_t,
typename Layer_t>
747 bool applyDropout) ->
void
749 size_t depth = this->GetDepth();
752 for (
size_t i = 0; i < nets.size(); i++) {
753 nets[i].GetLayerAt(0)->Forward(batches[i].GetInput(), applyDropout);
757 for (
size_t i = 1; i < depth; i++) {
758 for (
size_t j = 0; j < nets.size(); j++) {
759 nets[j].GetLayerAt(i)->Forward(nets[j].GetLayerAt(i - 1)->GetOutput(), applyDropout);
766template <
typename Architecture_t,
typename Layer_t>
768 std::vector<size_t> numHiddenUnitsPerLayer, Scalar_t learningRate,
769 Scalar_t corruptionLevel, Scalar_t dropoutProbability,
size_t epochs,
772 std::vector<Matrix_t> inp1;
773 std::vector<Matrix_t> inp2;
774 size_t numOfHiddenLayers =
sizeof(numHiddenUnitsPerLayer) /
sizeof(numHiddenUnitsPerLayer[0]);
776 size_t visibleUnits = (size_t)input[0].GetNrows();
778 AddCorruptionLayer(visibleUnits, numHiddenUnitsPerLayer[0], dropoutProbability, corruptionLevel);
779 fLayers.back()->Initialize();
780 fLayers.back()->Forward(input, applyDropout);
783 AddCompressionLayer(visibleUnits, numHiddenUnitsPerLayer[0], dropoutProbability,
f, fLayers.back()->GetWeights(),
784 fLayers.back()->GetBiases());
785 fLayers.back()->Initialize();
786 fLayers.back()->Forward(fLayers[fLayers.size() - 2]->GetOutput(), applyDropout);
788 AddReconstructionLayer(visibleUnits, numHiddenUnitsPerLayer[0], learningRate,
f, fLayers.back()->GetWeights(),
789 fLayers.back()->GetBiases(), corruptionLevel, dropoutProbability);
790 fLayers.back()->Initialize();
791 fLayers.back()->Forward(fLayers[fLayers.size() - 2]->GetOutput(),
793 fLayers.back()->Backward(fLayers[fLayers.size() - 2]->GetOutput(), inp1, fLayers[fLayers.size() - 3]->GetOutput(),
796 size_t weightsSize = fLayers.back()->GetWeights().size();
797 size_t biasesSize = fLayers.back()->GetBiases().size();
798 for (
size_t epoch = 0; epoch < epochs - 1; epoch++) {
800 for (
size_t j = 0; j < weightsSize; j++) {
801 Architecture_t::Copy(fLayers[fLayers.size() - 2]->GetWeightsAt(j), fLayers.back()->GetWeightsAt(j));
803 for (
size_t j = 0; j < biasesSize; j++) {
804 Architecture_t::Copy(fLayers[fLayers.size() - 2]->GetBiasesAt(j), fLayers.back()->GetBiasesAt(j));
806 fLayers[fLayers.size() - 2]->Forward(fLayers[fLayers.size() - 3]->GetOutput(), applyDropout);
807 fLayers[fLayers.size() - 1]->Forward(fLayers[fLayers.size() - 2]->GetOutput(), applyDropout);
808 fLayers[fLayers.size() - 1]->Backward(fLayers[fLayers.size() - 2]->GetOutput(), inp1,
809 fLayers[fLayers.size() - 3]->GetOutput(), input);
811 fLayers.back()->Print();
813 for (
size_t i = 1; i < numOfHiddenLayers; i++) {
815 AddCorruptionLayer(numHiddenUnitsPerLayer[i - 1], numHiddenUnitsPerLayer[i], dropoutProbability, corruptionLevel);
816 fLayers.back()->Initialize();
817 fLayers.back()->Forward(fLayers[fLayers.size() - 3]->GetOutput(),
820 AddCompressionLayer(numHiddenUnitsPerLayer[i - 1], numHiddenUnitsPerLayer[i], dropoutProbability,
f,
821 fLayers.back()->GetWeights(), fLayers.back()->GetBiases());
822 fLayers.back()->Initialize();
823 fLayers.back()->Forward(fLayers[fLayers.size() - 2]->GetOutput(), applyDropout);
825 AddReconstructionLayer(numHiddenUnitsPerLayer[i - 1], numHiddenUnitsPerLayer[i], learningRate,
f,
826 fLayers.back()->GetWeights(), fLayers.back()->GetBiases(), corruptionLevel,
828 fLayers.back()->Initialize();
829 fLayers.back()->Forward(fLayers[fLayers.size() - 2]->GetOutput(),
831 fLayers.back()->Backward(fLayers[fLayers.size() - 2]->GetOutput(), inp1, fLayers[fLayers.size() - 3]->GetOutput(),
832 fLayers[fLayers.size() - 5]->GetOutput());
835 size_t _weightsSize = fLayers.back()->GetWeights().size();
836 size_t _biasesSize = fLayers.back()->GetBiases().size();
837 for (
size_t epoch = 0; epoch < epochs - 1; epoch++) {
839 for (
size_t j = 0; j < _weightsSize; j++) {
840 Architecture_t::Copy(fLayers[fLayers.size() - 2]->GetWeightsAt(j), fLayers.back()->GetWeightsAt(j));
842 for (
size_t j = 0; j < _biasesSize; j++) {
843 Architecture_t::Copy(fLayers[fLayers.size() - 2]->GetBiasesAt(j), fLayers.back()->GetBiasesAt(j));
845 fLayers[fLayers.size() - 2]->Forward(fLayers[fLayers.size() - 3]->GetOutput(), applyDropout);
846 fLayers[fLayers.size() - 1]->Forward(fLayers[fLayers.size() - 2]->GetOutput(), applyDropout);
847 fLayers[fLayers.size() - 1]->Backward(fLayers[fLayers.size() - 2]->GetOutput(), inp1,
848 fLayers[fLayers.size() - 3]->GetOutput(),
849 fLayers[fLayers.size() - 5]->GetOutput());
851 fLayers.back()->Print();
856template <
typename Architecture_t,
typename Layer_t>
857auto TDeepNet<Architecture_t, Layer_t>::FineTune(std::vector<Matrix_t> &input, std::vector<Matrix_t> &testInput,
858 std::vector<Matrix_t> &inputLabel,
size_t outputUnits,
859 size_t testDataBatchSize, Scalar_t learningRate,
size_t epochs) ->
void
861 std::vector<Matrix_t> inp1;
862 std::vector<Matrix_t> inp2;
863 if (fLayers.size() == 0)
865 size_t inputUnits = input[0].GetNrows();
867 AddLogisticRegressionLayer(inputUnits, outputUnits, testDataBatchSize, learningRate);
868 fLayers.back()->Initialize();
869 for (
size_t i = 0; i < epochs; i++) {
870 fLayers.back()->Backward(inputLabel, inp1, input, inp2);
872 fLayers.back()->Forward(input,
false);
873 fLayers.back()->Print();
875 size_t inputUnits = fLayers.back()->GetOutputAt(0).GetNrows();
876 AddLogisticRegressionLayer(inputUnits, outputUnits, testDataBatchSize, learningRate);
877 fLayers.back()->Initialize();
878 for (
size_t i = 0; i < epochs; i++) {
879 fLayers.back()->Backward(inputLabel, inp1, fLayers[fLayers.size() - 2]->GetOutput(), inp2);
881 fLayers.back()->Forward(testInput,
false);
882 fLayers.back()->Print();
888template <
typename Architecture_t,
typename Layer_t>
892 std::vector<Matrix_t> inp1;
893 std::vector<Matrix_t> inp2;
895 evaluateGradients<Architecture_t>(fLayers.back()->GetActivationGradientsAt(0), this->GetLossFunction(), groundTruth,
896 fLayers.back()->GetOutputAt(0), weights);
897 for (
size_t i = fLayers.size() - 1; i > 0; i--) {
898 std::vector<Matrix_t> &activation_gradient_backward = fLayers[i - 1]->GetActivationGradients();
899 std::vector<Matrix_t> &activations_backward = fLayers[i - 1]->GetOutput();
900 fLayers[i]->Backward(activation_gradient_backward, activations_backward, inp1, inp2);
905 std::vector<Matrix_t>
dummy;
906 fLayers[0]->Backward(
dummy, input, inp1, inp2);
910template <
typename Architecture_t,
typename Layer_t>
915 std::vector<Matrix_t> inp1;
916 std::vector<Matrix_t> inp2;
917 size_t depth = this->GetDepth();
920 for (
size_t i = 0; i < nets.size(); i++) {
921 evaluateGradients<Architecture_t>(nets[i].GetLayerAt(depth - 1)->GetActivationGradientsAt(0),
922 nets[i].GetLossFunction(), batches[i].GetOutput(),
923 nets[i].GetLayerAt(depth - 1)->GetOutputAt(0), batches[i].GetWeights());
927 for (
size_t i = depth - 1; i > 0; i--) {
928 for (
size_t j = 0; j < nets.size(); j++) {
929 nets[j].GetLayerAt(i)->Backward(nets[j].GetLayerAt(i - 1)->GetActivationGradients(),
930 nets[j].GetLayerAt(i - 1)->GetOutput(), inp1, inp2);
934 std::vector<Matrix_t>
dummy;
937 for (
size_t i = 0; i < nets.size(); i++) {
938 nets[i].GetLayerAt(0)->Backward(
dummy, batches[i].GetInput(), inp1, inp2);
942 for (
size_t i = 0; i < nets.size(); i++) {
943 for (
size_t j = 0; j < depth; j++) {
944 Layer_t *masterLayer = this->GetLayerAt(j);
945 Layer_t *layer = nets[i].GetLayerAt(j);
947 masterLayer->UpdateWeights(layer->GetWeightGradients(), learningRate);
948 layer->CopyWeights(masterLayer->GetWeights());
950 masterLayer->UpdateBiases(layer->GetBiasGradients(), learningRate);
951 layer->CopyBiases(masterLayer->GetBiases());
957template <
typename Architecture_t,
typename Layer_t>
962 std::vector<Matrix_t> inp1;
963 std::vector<Matrix_t> inp2;
964 size_t depth = this->GetDepth();
967 for (
size_t i = 0; i < nets.size(); i++) {
968 evaluateGradients<Architecture_t>(nets[i].GetLayerAt(depth - 1)->GetActivationGradientsAt(0),
969 nets[i].GetLossFunction(), batches[i].GetOutput(),
970 nets[i].GetLayerAt(depth - 1)->GetOutputAt(0), batches[i].GetWeights());
974 for (
size_t i = depth - 1; i > 0; i--) {
975 Layer_t *masterLayer = this->GetLayerAt(i);
977 for (
size_t j = 0; j < nets.size(); j++) {
978 Layer_t *layer = nets[j].GetLayerAt(i);
980 layer->Backward(nets[j].GetLayerAt(i - 1)->GetActivationGradients(), nets[j].GetLayerAt(i - 1)->GetOutput(),
982 masterLayer->UpdateWeightGradients(layer->GetWeightGradients(), learningRate / momentum);
983 masterLayer->UpdateBiasGradients(layer->GetBiasGradients(), learningRate / momentum);
986 masterLayer->UpdateWeightGradients(masterLayer->GetWeightGradients(), 1.0 - momentum);
987 masterLayer->UpdateBiasGradients(masterLayer->GetBiasGradients(), 1.0 - momentum);
990 std::vector<Matrix_t>
dummy;
993 Layer_t *masterFirstLayer = this->GetLayerAt(0);
994 for (
size_t i = 0; i < nets.size(); i++) {
995 Layer_t *layer = nets[i].GetLayerAt(0);
997 layer->Backward(
dummy, batches[i].GetInput(), inp1, inp2);
999 masterFirstLayer->UpdateWeightGradients(layer->GetWeightGradients(), learningRate / momentum);
1000 masterFirstLayer->UpdateBiasGradients(layer->GetBiasGradients(), learningRate / momentum);
1003 masterFirstLayer->UpdateWeightGradients(masterFirstLayer->GetWeightGradients(), 1.0 - momentum);
1004 masterFirstLayer->UpdateBiasGradients(masterFirstLayer->GetBiasGradients(), 1.0 - momentum);
1006 for (
size_t i = 0; i < depth; i++) {
1007 Layer_t *masterLayer = this->GetLayerAt(i);
1008 masterLayer->Update(1.0);
1010 for (
size_t j = 0; j < nets.size(); j++) {
1011 Layer_t *layer = nets[j].GetLayerAt(i);
1013 layer->CopyWeights(masterLayer->GetWeights());
1014 layer->CopyBiases(masterLayer->GetBiases());
1020template <
typename Architecture_t,
typename Layer_t>
1025 std::cout <<
"Parallel Backward Nestorov" << std::endl;
1026 std::vector<Matrix_t> inp1;
1027 std::vector<Matrix_t> inp2;
1028 size_t depth = this->GetDepth();
1031 for (
size_t i = 0; i < nets.size(); i++) {
1032 evaluateGradients<Architecture_t>(nets[i].GetLayerAt(depth - 1)->GetActivationGradientsAt(0),
1033 nets[i].GetLossFunction(), batches[i].GetOutput(),
1034 nets[i].GetLayerAt(depth - 1)->GetOutputAt(0), batches[i].GetWeights());
1038 for (
size_t i = depth - 1; i > 0; i--) {
1039 for (
size_t j = 0; j < nets.size(); j++) {
1040 Layer_t *layer = nets[j].GetLayerAt(i);
1042 layer->Backward(nets[j].GetLayerAt(i - 1)->GetActivationGradients(), nets[j].GetLayerAt(i - 1)->GetOutput(),
1047 std::vector<Matrix_t>
dummy;
1050 for (
size_t i = 0; i < nets.size(); i++) {
1051 Layer_t *layer = nets[i].GetLayerAt(0);
1052 layer->Backward(
dummy, batches[i].GetInput(), inp1, inp2);
1055 for (
size_t i = 0; i < depth; i++) {
1056 Layer_t *masterLayer = this->GetLayerAt(i);
1057 for (
size_t j = 0; j < nets.size(); j++) {
1058 Layer_t *layer = nets[j].GetLayerAt(i);
1060 layer->CopyWeights(masterLayer->GetWeights());
1061 layer->CopyBiases(masterLayer->GetBiases());
1063 layer->UpdateWeights(masterLayer->GetWeightGradients(), 1.0);
1064 layer->UpdateBiases(masterLayer->GetBiasGradients(), 1.0);
1067 for (
size_t j = 0; j < nets.size(); j++) {
1068 Layer_t *layer = nets[j].GetLayerAt(i);
1070 masterLayer->UpdateWeightGradients(layer->GetWeightGradients(), learningRate / momentum);
1071 masterLayer->UpdateBiasGradients(layer->GetBiasGradients(), learningRate / momentum);
1074 masterLayer->UpdateWeightGradients(masterLayer->GetWeightGradients(), 1.0 - momentum);
1075 masterLayer->UpdateBiasGradients(masterLayer->GetBiasGradients(), 1.0 - momentum);
1077 masterLayer->Update(1.0);
1082template <
typename Architecture_t,
typename Layer_t>
1085 for (
size_t i = 0; i < fLayers.size(); i++) {
1086 fLayers[i]->Update(learningRate);
1091template <
typename Architecture_t,
typename Layer_t>
1093 bool includeRegularization)
const ->
Scalar_t
1096 auto loss = evaluate<Architecture_t>(this->GetLossFunction(), groundTruth, fLayers.back()->GetOutputAt(0), weights);
1099 if (includeRegularization) {
1100 loss += RegularizationTerm();
1107template <
typename Architecture_t,
typename Layer_t>
1109 const Matrix_t &weights,
bool applyDropout,
bool includeRegularization)
1112 Forward(input, applyDropout);
1113 return Loss(groundTruth, weights, includeRegularization);
1117template <
typename Architecture_t,
typename Layer_t>
1121 for (
size_t i = 0; i < fLayers.size(); i++) {
1122 for (
size_t j = 0; j < (fLayers[i]->GetWeights()).size(); j++) {
1123 reg += regularization<Architecture_t>(fLayers[i]->GetWeightsAt(j), this->GetRegularization());
1126 return this->GetWeightDecay() * reg;
1131template <
typename Architecture_t,
typename Layer_t>
1135 evaluate<Architecture_t>(predictions,
f, fLayers.back()->GetOutputAt(0));
1139template <
typename Architecture_t,
typename Layer_t>
1143 Forward(input,
false);
1145 evaluate<Architecture_t>(predictions,
f, fLayers.back()->GetOutputAt(0));
1149template <
typename Architecture_t,
typename Layer_t>
1152 std::cout <<
"DEEP NEURAL NETWORK: Depth = " << this->GetDepth();
1153 std::cout <<
" Input = ( " << this->GetInputDepth();
1154 std::cout <<
", " << this->GetInputHeight();
1155 std::cout <<
", " << this->GetInputWidth() <<
" )";
1156 std::cout <<
" Batch size = " << this->GetBatchSize();
1157 std::cout <<
" Loss function = " <<
static_cast<char>(this->GetLossFunction()) << std::endl;
1161 for (
size_t i = 0; i < fLayers.size(); i++) {
1162 std::cout <<
"\tLayer " << i <<
"\t";
1163 fLayers[i]->Print();
1168template <
typename Architecture_t,
typename Layer_t>
1170 const std::vector<Double_t> & probabilities)
1172 for (
size_t i = 0; i < fLayers.size(); i++) {
1173 if (i < probabilities.size()) {
1174 fLayers[i]->SetDropoutProbability(probabilities[i]);
1176 fLayers[i]->SetDropoutProbability(1.0);
#define R(a, b, c, d, e, f, g, h, i)
static RooMathCoreReg dummy
include TDocParser_001 C image html pict1_TDocParser_001 png width
void Info(const char *location, const char *msgfmt,...)
void Error(const char *location, const char *msgfmt,...)
void Fatal(const char *location, const char *msgfmt,...)
Generic Max Pooling Layer class.
Generic Deep Neural Network class.
const std::vector< Layer_t * > & GetLayers() const
void AddDenseLayer(TDenseLayer< Architecture_t > *denseLayer)
Function for adding Dense Layer in the Deep Neural Network, when the layer is already created.
size_t GetBatchHeight() const
void SetBatchDepth(size_t batchDepth)
void ParallelBackward(std::vector< TDeepNet< Architecture_t, Layer_t > > &nets, std::vector< TTensorBatch< Architecture_t > > &batches, Scalar_t learningRate)
Function for parallel backward in the vector of deep nets, where the master net is the net calling th...
void SetLossFunction(ELossFunction J)
size_t fBatchHeight
The height of the batch used for training/testing.
ERegularization GetRegularization() const
std::vector< Layer_t * > & GetLayers()
typename Architecture_t::Scalar_t Scalar_t
void Initialize()
DAE functions.
size_t GetBatchSize() const
Getters.
Scalar_t GetWeightDecay() const
size_t GetInputDepth() const
std::vector< Layer_t * > fLayers
The layers consisting the DeepNet.
size_t fBatchDepth
The depth of the batch used for training/testing.
size_t fInputDepth
The depth of the input.
Layer_t * GetLayerAt(size_t i)
Get the layer in the vector of layers at poistion i.
void Print() const
Print the Deep Net Info.
void SetWeightDecay(Scalar_t weightDecay)
void AddReshapeLayer(TReshapeLayer< Architecture_t > *reshapeLayer)
Function for adding Reshape Layer in the Deep Neural Network, when the layer is already created.
void Clear()
Remove all layers from the network.
Scalar_t RegularizationTerm() const
Function for computing the regularizaton term to be added to the loss function
TDenseLayer< Architecture_t > * AddDenseLayer(size_t width, EActivationFunction f, Scalar_t dropoutProbability=1.0)
Function for adding Dense Connected Layer in the Deep Neural Network, with a given width,...
void Prediction(Matrix_t &predictions, std::vector< Matrix_t > input, EOutputFunction f)
Prediction for the given inputs, based on what network learned.
TDeepNet(size_t BatchSize, size_t InputDepth, size_t InputHeight, size_t InputWidth, size_t BatchDepth, size_t BatchHeight, size_t BatchWidth, ELossFunction fJ, EInitialization fI=EInitialization::kZero, ERegularization fR=ERegularization::kNone, Scalar_t fWeightDecay=0.0, bool isTraining=false)
Constructor.
void ParallelForward(std::vector< TDeepNet< Architecture_t, Layer_t > > &nets, std::vector< TTensorBatch< Architecture_t > > &batches, bool applyDropout=false)
Function for parallel forward in the vector of deep nets, where the master net is the net calling thi...
void SetInputDepth(size_t inputDepth)
size_t GetInputHeight() const
size_t fBatchSize
Batch size used for training and evaluation.
void Prediction(Matrix_t &predictions, EOutputFunction f) const
Prediction based on activations stored in the last layer.
void ParallelBackwardMomentum(std::vector< TDeepNet< Architecture_t, Layer_t > > &nets, std::vector< TTensorBatch< Architecture_t > > &batches, Scalar_t learningRate, Scalar_t momentum)
Function for parallel backward in the vector of deep nets, where the master net is the net calling th...
size_t fInputWidth
The width of the input.
void SetInputHeight(size_t inputHeight)
size_t GetBatchWidth() const
void AddBasicRNNLayer(TBasicRNNLayer< Architecture_t > *basicRNNLayer)
Function for adding Vanilla RNN when the layer is already created.
void AddMaxPoolLayer(CNN::TMaxPoolLayer< Architecture_t > *maxPoolLayer)
Function for adding Max Pooling layer in the Deep Neural Network, when the layer is already created.
TMaxPoolLayer< Architecture_t > * AddMaxPoolLayer(size_t frameHeight, size_t frameWidth, size_t strideRows, size_t strideCols, Scalar_t dropoutProbability=1.0)
Function for adding Pooling layer in the Deep Neural Network, with a given filter height and width,...
Scalar_t fWeightDecay
The weight decay factor.
Scalar_t Loss(const Matrix_t &groundTruth, const Matrix_t &weights, bool includeRegularization=true) const
Function for evaluating the loss, based on the activations stored in the last layer.
TConvLayer< Architecture_t > * AddConvLayer(size_t depth, size_t filterHeight, size_t filterWidth, size_t strideRows, size_t strideCols, size_t paddingHeight, size_t paddingWidth, EActivationFunction f, Scalar_t dropoutProbability=1.0)
Function for adding Convolution layer in the Deep Neural Network, with a given depth,...
ERegularization fR
The regularization used for the network.
size_t GetInputWidth() const
TBasicRNNLayer< Architecture_t > * AddBasicRNNLayer(size_t stateSize, size_t inputSize, size_t timeSteps, bool rememberState=false)
Function for adding Recurrent Layer in the Deep Neural Network, with given parameters.
void Backward(std::vector< Matrix_t > &input, const Matrix_t &groundTruth, const Matrix_t &weights)
Function that executes the entire backward pass in the network.
bool isInteger(Scalar_t x) const
size_t GetOutputWidth() const
bool fIsTraining
Is the network training?
TReshapeLayer< Architecture_t > * AddReshapeLayer(size_t depth, size_t height, size_t width, bool flattening)
Function for adding Reshape Layer in the Deep Neural Network, with a given height and width.
void SetBatchSize(size_t batchSize)
Setters.
void AddConvLayer(TConvLayer< Architecture_t > *convLayer)
Function for adding Convolution Layer in the Deep Neural Network, when the layer is already created.
size_t fInputHeight
The height of the input.
void SetRegularization(ERegularization R)
void Forward(std::vector< Matrix_t > &input, bool applyDropout=false)
Function that executes the entire forward pass in the network.
TDeepNet(const TDeepNet &)
Copy-constructor.
size_t fBatchWidth
The width of the batch used for training/testing.
ELossFunction fJ
The loss function of the network.
void SetBatchWidth(size_t batchWidth)
Scalar_t Loss(std::vector< Matrix_t > &input, const Matrix_t &groundTruth, const Matrix_t &weights, bool applyDropout=false, bool includeRegularization=true)
Function for evaluating the loss, based on the propagation of the given input.
void SetDropoutProbabilities(const std::vector< Double_t > &probabilities)
TDeepNet()
Default Constructor.
void SetBatchHeight(size_t batchHeight)
void Update(Scalar_t learningRate)
Function that will update the weights and biases in the layers that contain weights and biases.
ELossFunction GetLossFunction() const
size_t calculateDimension(int imgDim, int fltDim, int padding, int stride)
const Layer_t * GetLayerAt(size_t i) const
void SetInitialization(EInitialization I)
EInitialization GetInitialization() const
void ParallelBackwardNestorov(std::vector< TDeepNet< Architecture_t, Layer_t > > &nets, std::vector< TTensorBatch< Architecture_t > > &batches, Scalar_t learningRate, Scalar_t momentum)
Function for parallel backward in the vector of deep nets, where the master net is the net calling th...
void SetInputWidth(size_t inputWidth)
typename Architecture_t::Matrix_t Matrix_t
EInitialization fI
The initialization method of the network.
size_t GetBatchDepth() const
void Copy(void *source, void *dest)
void Print(std::ostream &os, const OptionType &opt)
EOutputFunction
Enum that represents output functions.
double weightDecay(double error, ItWeight itWeight, ItWeight itWeightEnd, double factorWeightDecay, EnumRegularization eRegularization)
compute the weight decay for regularization (L1 or L2)
ERegularization
Enum representing the regularization type applied for a given layer.
EActivationFunction
Enum that represents layer activation functions.
ELossFunction
Enum that represents objective functions for the net, i.e.
auto debugTensor(const std::vector< typename Architecture::Matrix_t > &A, const std::string name="tensor") -> void
create variable transformations