29#ifndef TMVA_DNN_DEEPNET
30#define TMVA_DNN_DEEPNET
47#include "TMVA/DNN/DAE/CompressionLayer.h"
48#include "TMVA/DNN/DAE/CorruptionLayer.h"
49#include "TMVA/DNN/DAE/ReconstructionLayer.h"
50#include "TMVA/DNN/DAE/LogisticRegressionLayer.h"
73template <
typename Architecture_t,
typename Layer_t = VGeneralLayer<Architecture_t>>
76 using Matrix_t =
typename Architecture_t::Matrix_t;
77 using Scalar_t =
typename Architecture_t::Scalar_t;
107 TDeepNet(
size_t BatchSize,
size_t InputDepth,
size_t InputHeight,
size_t InputWidth,
size_t BatchDepth,
123 size_t strideCols,
size_t paddingHeight,
size_t paddingWidth,
136 size_t strideCols,
Scalar_t dropoutProbability = 1.0);
145 bool rememberState =
false);
174 TCorruptionLayer<Architecture_t> *AddCorruptionLayer(
size_t visibleUnits,
size_t hiddenUnits,
179 void AddCorruptionLayer(TCorruptionLayer<Architecture_t> *corruptionLayer);
184 TCompressionLayer<Architecture_t> *AddCompressionLayer(
size_t visibleUnits,
size_t hiddenUnits,
186 std::vector<Matrix_t> weights, std::vector<Matrix_t> biases);
190 void AddCompressionLayer(TCompressionLayer<Architecture_t> *compressionLayer);
196 TReconstructionLayer<Architecture_t> *AddReconstructionLayer(
size_t visibleUnits,
size_t hiddenUnits,
198 std::vector<Matrix_t> weights,
199 std::vector<Matrix_t> biases,
Scalar_t corruptionLevel,
204 void AddReconstructionLayer(TReconstructionLayer<Architecture_t> *reconstructionLayer);
208 TLogisticRegressionLayer<Architecture_t> *AddLogisticRegressionLayer(
size_t inputUnits,
size_t outputUnits,
209 size_t testDataBatchSize,
214 void AddLogisticRegressionLayer(TLogisticRegressionLayer<Architecture_t> *logisticRegressionLayer);
218 void PreTrain(std::vector<Matrix_t> &input, std::vector<size_t> numHiddenUnitsPerLayer,
Scalar_t learningRate,
220 bool applyDropout =
false);
225 void FineTune(std::vector<Matrix_t> &input, std::vector<Matrix_t> &testInput, std::vector<Matrix_t> &outputLabel,
226 size_t outputUnits,
size_t testDataBatchSize,
Scalar_t learningRate,
size_t epochs);
233 void Forward(std::vector<Matrix_t> &input,
bool applyDropout =
false);
274 bool applyDropout =
false,
bool includeRegularization =
true);
341template <
typename Architecture_t,
typename Layer_t>
343 : fLayers(), fBatchSize(0), fInputDepth(0), fInputHeight(0), fInputWidth(0), fBatchDepth(0), fBatchHeight(0),
345 fIsTraining(true), fWeightDecay(0.0)
351template <
typename Architecture_t,
typename Layer_t>
353 size_t batchDepth,
size_t batchHeight,
size_t batchWidth,
ELossFunction J,
355 : fLayers(), fBatchSize(batchSize), fInputDepth(inputDepth), fInputHeight(inputHeight), fInputWidth(inputWidth),
356 fBatchDepth(batchDepth), fBatchHeight(batchHeight), fBatchWidth(batchWidth), fIsTraining(isTraining), fJ(J), fI(
I),
363template <
typename Architecture_t,
typename Layer_t>
365 : fLayers(), fBatchSize(deepNet.fBatchSize), fInputDepth(deepNet.fInputDepth), fInputHeight(deepNet.fInputHeight),
366 fInputWidth(deepNet.fInputWidth), fBatchDepth(deepNet.fBatchDepth), fBatchHeight(deepNet.fBatchHeight),
367 fBatchWidth(deepNet.fBatchWidth), fIsTraining(deepNet.fIsTraining), fJ(deepNet.fJ), fI(deepNet.fI), fR(deepNet.fR),
368 fWeightDecay(deepNet.fWeightDecay)
374template <
typename Architecture_t,
typename Layer_t>
381template <
typename Architecture_t,
typename Layer_t>
384 Scalar_t dimension = ((imgDim - fltDim + 2 * padding) / stride) + 1;
385 if (!isInteger(dimension) || dimension <= 0) {
387 int iLayer = fLayers.size();
388 Fatal(
"calculateDimension",
"Not compatible hyper parameters for layer %d - (imageDim, filterDim, padding, stride) %d , %d , %d , %d",
389 iLayer, imgDim, fltDim, padding, stride);
395 return (
size_t)dimension;
399template <
typename Architecture_t,
typename Layer_t>
401 size_t filterWidth,
size_t strideRows,
402 size_t strideCols,
size_t paddingHeight,
407 size_t batchSize = this->GetBatchSize();
413 Scalar_t decay = this->GetWeightDecay();
415 if (fLayers.size() == 0) {
416 inputDepth = this->GetInputDepth();
417 inputHeight = this->GetInputHeight();
418 inputWidth = this->GetInputWidth();
420 Layer_t *lastLayer = fLayers.back();
421 inputDepth = lastLayer->GetDepth();
422 inputHeight = lastLayer->GetHeight();
423 inputWidth = lastLayer->GetWidth();
430 batchSize, inputDepth, inputHeight, inputWidth, depth,
init, filterHeight, filterWidth, strideRows,
431 strideCols, paddingHeight, paddingWidth, dropoutProbability,
f, reg, decay);
433 fLayers.push_back(convLayer);
438template <
typename Architecture_t,
typename Layer_t>
441 fLayers.push_back(convLayer);
445template <
typename Architecture_t,
typename Layer_t>
447 size_t strideRows,
size_t strideCols,
450 size_t batchSize = this->GetBatchSize();
455 if (fLayers.size() == 0) {
456 inputDepth = this->GetInputDepth();
457 inputHeight = this->GetInputHeight();
458 inputWidth = this->GetInputWidth();
460 Layer_t *lastLayer = fLayers.back();
461 inputDepth = lastLayer->GetDepth();
462 inputHeight = lastLayer->GetHeight();
463 inputWidth = lastLayer->GetWidth();
467 batchSize, inputDepth, inputHeight, inputWidth, frameHeight, frameWidth,
468 strideRows, strideCols, dropoutProbability);
471 fLayers.push_back(maxPoolLayer);
477template <
typename Architecture_t,
typename Layer_t>
480 fLayers.push_back(maxPoolLayer);
484template <
typename Architecture_t,
typename Layer_t>
493 size_t inputHeight, inputWidth;
494 if (fLayers.size() == 0) {
495 inputHeight = this->GetInputHeight();
496 inputWidth = this->GetInputWidth();
498 Layer_t *lastLayer = fLayers.back();
499 inputHeight = lastLayer->GetHeight();
500 inputWidth = lastLayer->GetWidth();
502 if (inputSize != inputWidth) {
503 Error(
"AddBasicRNNLayer",
"Inconsistent input size with input layout - it should be %zu instead of %zu",inputSize, inputWidth);
505 if (timeSteps != inputHeight) {
506 Error(
"AddBasicRNNLayer",
"Inconsistent time steps with input layout - it should be %zu instead of %zu",timeSteps, inputHeight);
512 fLayers.push_back(basicRNNLayer);
513 return basicRNNLayer;
517template <
typename Architecture_t,
typename Layer_t>
520 fLayers.push_back(basicRNNLayer);
527template <
typename Architecture_t,
typename Layer_t>
530 Scalar_t dropoutProbability,
531 Scalar_t corruptionLevel)
533 size_t batchSize = this->GetBatchSize();
535 TCorruptionLayer<Architecture_t> *corruptionLayer =
536 new TCorruptionLayer<Architecture_t>(batchSize, visibleUnits, hiddenUnits, dropoutProbability, corruptionLevel);
537 fLayers.push_back(corruptionLayer);
538 return corruptionLayer;
542template <
typename Architecture_t,
typename Layer_t>
545 fLayers.push_back(corruptionLayer);
549template <
typename Architecture_t,
typename Layer_t>
550TCompressionLayer<Architecture_t> *TDeepNet<Architecture_t, Layer_t>::AddCompressionLayer(
552 std::vector<Matrix_t> weights, std::vector<Matrix_t> biases)
554 size_t batchSize = this->GetBatchSize();
556 TCompressionLayer<Architecture_t> *compressionLayer =
new TCompressionLayer<Architecture_t>(
557 batchSize, visibleUnits, hiddenUnits, dropoutProbability,
f, weights, biases);
558 fLayers.push_back(compressionLayer);
559 return compressionLayer;
563template <
typename Architecture_t,
typename Layer_t>
564void TDeepNet<Architecture_t, Layer_t>::AddCompressionLayer(TCompressionLayer<Architecture_t> *compressionLayer)
566 fLayers.push_back(compressionLayer);
570template <
typename Architecture_t,
typename Layer_t>
571TReconstructionLayer<Architecture_t> *TDeepNet<Architecture_t, Layer_t>::AddReconstructionLayer(
572 size_t visibleUnits,
size_t hiddenUnits, Scalar_t learningRate,
EActivationFunction f, std::vector<Matrix_t> weights,
573 std::vector<Matrix_t> biases, Scalar_t corruptionLevel, Scalar_t dropoutProbability)
575 size_t batchSize = this->GetBatchSize();
577 TReconstructionLayer<Architecture_t> *reconstructionLayer =
new TReconstructionLayer<Architecture_t>(
578 batchSize, visibleUnits, hiddenUnits, learningRate,
f, weights, biases, corruptionLevel, dropoutProbability);
579 fLayers.push_back(reconstructionLayer);
580 return reconstructionLayer;
584template <
typename Architecture_t,
typename Layer_t>
585void TDeepNet<Architecture_t, Layer_t>::AddReconstructionLayer(
586 TReconstructionLayer<Architecture_t> *reconstructionLayer)
588 fLayers.push_back(reconstructionLayer);
592template <
typename Architecture_t,
typename Layer_t>
593TLogisticRegressionLayer<Architecture_t> *TDeepNet<Architecture_t, Layer_t>::AddLogisticRegressionLayer(
594 size_t inputUnits,
size_t outputUnits,
size_t testDataBatchSize, Scalar_t learningRate)
596 size_t batchSize = this->GetBatchSize();
598 TLogisticRegressionLayer<Architecture_t> *logisticRegressionLayer =
599 new TLogisticRegressionLayer<Architecture_t>(batchSize, inputUnits, outputUnits, testDataBatchSize, learningRate);
600 fLayers.push_back(logisticRegressionLayer);
601 return logisticRegressionLayer;
604template <
typename Architecture_t,
typename Layer_t>
605void TDeepNet<Architecture_t, Layer_t>::AddLogisticRegressionLayer(
606 TLogisticRegressionLayer<Architecture_t> *logisticRegressionLayer)
608 fLayers.push_back(logisticRegressionLayer);
614template <
typename Architecture_t,
typename Layer_t>
618 size_t batchSize = this->GetBatchSize();
622 Scalar_t decay = this->GetWeightDecay();
624 if (fLayers.size() == 0) {
625 inputWidth = this->GetInputWidth();
627 Layer_t *lastLayer = fLayers.back();
628 inputWidth = lastLayer->GetWidth();
634 fLayers.push_back(denseLayer);
640template <
typename Architecture_t,
typename Layer_t>
643 fLayers.push_back(denseLayer);
647template <
typename Architecture_t,
typename Layer_t>
649 size_t width,
bool flattening)
651 size_t batchSize = this->GetBatchSize();
655 size_t outputNSlices;
659 if (fLayers.size() == 0) {
660 inputDepth = this->GetInputDepth();
661 inputHeight = this->GetInputHeight();
662 inputWidth = this->GetInputWidth();
664 Layer_t *lastLayer = fLayers.back();
665 inputDepth = lastLayer->GetDepth();
666 inputHeight = lastLayer->GetHeight();
667 inputWidth = lastLayer->GetWidth();
672 outputNRows = this->GetBatchSize();
673 outputNCols = depth * height *
width;
674 size_t inputNCols = inputDepth * inputHeight * inputWidth;
675 if (outputNCols != 0 && outputNCols != inputNCols ) {
676 Info(
"AddReshapeLayer",
"Dimensions not compatibles - product of input %zu x %zu x %zu should be equal to output %zu x %zu x %zu - Force flattening output to be %zu",
677 inputDepth, inputHeight, inputWidth, depth, height,
width,inputNCols);
679 outputNCols = inputNCols;
684 outputNSlices = this->GetBatchSize();
686 outputNCols = height *
width;
691 outputNSlices, outputNRows, outputNCols, flattening);
693 fLayers.push_back(reshapeLayer);
699template <
typename Architecture_t,
typename Layer_t>
702 fLayers.push_back(reshapeLayer);
706template <
typename Architecture_t,
typename Layer_t>
709 for (
size_t i = 0; i < fLayers.size(); i++) {
710 fLayers[i]->Initialize();
714template <
typename Architecture>
715auto debugTensor(
const std::vector<typename Architecture::Matrix_t> &
A,
const std::string
name =
"tensor") ->
void
717 std::cout <<
name <<
"\n";
718 for (
size_t l = 0;
l <
A.size(); ++
l) {
719 for (
size_t i = 0; i <
A[
l].GetNrows(); ++i) {
720 for (
size_t j = 0; j <
A[
l].GetNcols(); ++j) {
721 std::cout <<
A[
l](i, j) <<
" ";
725 std::cout <<
"********\n";
730template <
typename Architecture_t,
typename Layer_t>
733 fLayers.front()->Forward(input, applyDropout);
735 for (
size_t i = 1; i < fLayers.size(); i++) {
736 fLayers[i]->Forward(fLayers[i - 1]->GetOutput(), applyDropout);
741template <
typename Architecture_t,
typename Layer_t>
744 bool applyDropout) ->
void
746 size_t depth = this->GetDepth();
749 for (
size_t i = 0; i < nets.size(); i++) {
750 nets[i].GetLayerAt(0)->Forward(batches[i].GetInput(), applyDropout);
754 for (
size_t i = 1; i < depth; i++) {
755 for (
size_t j = 0; j < nets.size(); j++) {
756 nets[j].GetLayerAt(i)->Forward(nets[j].GetLayerAt(i - 1)->GetOutput(), applyDropout);
763template <
typename Architecture_t,
typename Layer_t>
765 std::vector<size_t> numHiddenUnitsPerLayer, Scalar_t learningRate,
766 Scalar_t corruptionLevel, Scalar_t dropoutProbability,
size_t epochs,
769 std::vector<Matrix_t> inp1;
770 std::vector<Matrix_t> inp2;
771 size_t numOfHiddenLayers =
sizeof(numHiddenUnitsPerLayer) /
sizeof(numHiddenUnitsPerLayer[0]);
773 size_t visibleUnits = (size_t)input[0].GetNrows();
775 AddCorruptionLayer(visibleUnits, numHiddenUnitsPerLayer[0], dropoutProbability, corruptionLevel);
776 fLayers.back()->Initialize();
777 fLayers.back()->Forward(input, applyDropout);
780 AddCompressionLayer(visibleUnits, numHiddenUnitsPerLayer[0], dropoutProbability,
f, fLayers.back()->GetWeights(),
781 fLayers.back()->GetBiases());
782 fLayers.back()->Initialize();
783 fLayers.back()->Forward(fLayers[fLayers.size() - 2]->GetOutput(), applyDropout);
785 AddReconstructionLayer(visibleUnits, numHiddenUnitsPerLayer[0], learningRate,
f, fLayers.back()->GetWeights(),
786 fLayers.back()->GetBiases(), corruptionLevel, dropoutProbability);
787 fLayers.back()->Initialize();
788 fLayers.back()->Forward(fLayers[fLayers.size() - 2]->GetOutput(),
790 fLayers.back()->Backward(fLayers[fLayers.size() - 2]->GetOutput(), inp1, fLayers[fLayers.size() - 3]->GetOutput(),
793 size_t weightsSize = fLayers.back()->GetWeights().size();
794 size_t biasesSize = fLayers.back()->GetBiases().size();
795 for (
size_t epoch = 0; epoch < epochs - 1; epoch++) {
797 for (
size_t j = 0; j < weightsSize; j++) {
798 Architecture_t::Copy(fLayers[fLayers.size() - 2]->GetWeightsAt(j), fLayers.back()->GetWeightsAt(j));
800 for (
size_t j = 0; j < biasesSize; j++) {
801 Architecture_t::Copy(fLayers[fLayers.size() - 2]->GetBiasesAt(j), fLayers.back()->GetBiasesAt(j));
803 fLayers[fLayers.size() - 2]->Forward(fLayers[fLayers.size() - 3]->GetOutput(), applyDropout);
804 fLayers[fLayers.size() - 1]->Forward(fLayers[fLayers.size() - 2]->GetOutput(), applyDropout);
805 fLayers[fLayers.size() - 1]->Backward(fLayers[fLayers.size() - 2]->GetOutput(), inp1,
806 fLayers[fLayers.size() - 3]->GetOutput(), input);
808 fLayers.back()->Print();
810 for (
size_t i = 1; i < numOfHiddenLayers; i++) {
812 AddCorruptionLayer(numHiddenUnitsPerLayer[i - 1], numHiddenUnitsPerLayer[i], dropoutProbability, corruptionLevel);
813 fLayers.back()->Initialize();
814 fLayers.back()->Forward(fLayers[fLayers.size() - 3]->GetOutput(),
817 AddCompressionLayer(numHiddenUnitsPerLayer[i - 1], numHiddenUnitsPerLayer[i], dropoutProbability,
f,
818 fLayers.back()->GetWeights(), fLayers.back()->GetBiases());
819 fLayers.back()->Initialize();
820 fLayers.back()->Forward(fLayers[fLayers.size() - 2]->GetOutput(), applyDropout);
822 AddReconstructionLayer(numHiddenUnitsPerLayer[i - 1], numHiddenUnitsPerLayer[i], learningRate,
f,
823 fLayers.back()->GetWeights(), fLayers.back()->GetBiases(), corruptionLevel,
825 fLayers.back()->Initialize();
826 fLayers.back()->Forward(fLayers[fLayers.size() - 2]->GetOutput(),
828 fLayers.back()->Backward(fLayers[fLayers.size() - 2]->GetOutput(), inp1, fLayers[fLayers.size() - 3]->GetOutput(),
829 fLayers[fLayers.size() - 5]->GetOutput());
832 size_t _weightsSize = fLayers.back()->GetWeights().size();
833 size_t _biasesSize = fLayers.back()->GetBiases().size();
834 for (
size_t epoch = 0; epoch < epochs - 1; epoch++) {
836 for (
size_t j = 0; j < _weightsSize; j++) {
837 Architecture_t::Copy(fLayers[fLayers.size() - 2]->GetWeightsAt(j), fLayers.back()->GetWeightsAt(j));
839 for (
size_t j = 0; j < _biasesSize; j++) {
840 Architecture_t::Copy(fLayers[fLayers.size() - 2]->GetBiasesAt(j), fLayers.back()->GetBiasesAt(j));
842 fLayers[fLayers.size() - 2]->Forward(fLayers[fLayers.size() - 3]->GetOutput(), applyDropout);
843 fLayers[fLayers.size() - 1]->Forward(fLayers[fLayers.size() - 2]->GetOutput(), applyDropout);
844 fLayers[fLayers.size() - 1]->Backward(fLayers[fLayers.size() - 2]->GetOutput(), inp1,
845 fLayers[fLayers.size() - 3]->GetOutput(),
846 fLayers[fLayers.size() - 5]->GetOutput());
848 fLayers.back()->Print();
853template <
typename Architecture_t,
typename Layer_t>
854auto TDeepNet<Architecture_t, Layer_t>::FineTune(std::vector<Matrix_t> &input, std::vector<Matrix_t> &testInput,
855 std::vector<Matrix_t> &inputLabel,
size_t outputUnits,
856 size_t testDataBatchSize, Scalar_t learningRate,
size_t epochs) ->
void
858 std::vector<Matrix_t> inp1;
859 std::vector<Matrix_t> inp2;
860 if (fLayers.size() == 0)
862 size_t inputUnits = input[0].GetNrows();
864 AddLogisticRegressionLayer(inputUnits, outputUnits, testDataBatchSize, learningRate);
865 fLayers.back()->Initialize();
866 for (
size_t i = 0; i < epochs; i++) {
867 fLayers.back()->Backward(inputLabel, inp1, input, inp2);
869 fLayers.back()->Forward(input,
false);
870 fLayers.back()->Print();
872 size_t inputUnits = fLayers.back()->GetOutputAt(0).GetNrows();
873 AddLogisticRegressionLayer(inputUnits, outputUnits, testDataBatchSize, learningRate);
874 fLayers.back()->Initialize();
875 for (
size_t i = 0; i < epochs; i++) {
876 fLayers.back()->Backward(inputLabel, inp1, fLayers[fLayers.size() - 2]->GetOutput(), inp2);
878 fLayers.back()->Forward(testInput,
false);
879 fLayers.back()->Print();
885template <
typename Architecture_t,
typename Layer_t>
889 std::vector<Matrix_t> inp1;
890 std::vector<Matrix_t> inp2;
892 evaluateGradients<Architecture_t>(fLayers.back()->GetActivationGradientsAt(0), this->GetLossFunction(), groundTruth,
893 fLayers.back()->GetOutputAt(0), weights);
894 for (
size_t i = fLayers.size() - 1; i > 0; i--) {
895 std::vector<Matrix_t> &activation_gradient_backward = fLayers[i - 1]->GetActivationGradients();
896 std::vector<Matrix_t> &activations_backward = fLayers[i - 1]->GetOutput();
897 fLayers[i]->Backward(activation_gradient_backward, activations_backward, inp1, inp2);
902 std::vector<Matrix_t>
dummy;
903 fLayers[0]->Backward(
dummy, input, inp1, inp2);
907template <
typename Architecture_t,
typename Layer_t>
912 std::vector<Matrix_t> inp1;
913 std::vector<Matrix_t> inp2;
914 size_t depth = this->GetDepth();
917 for (
size_t i = 0; i < nets.size(); i++) {
918 evaluateGradients<Architecture_t>(nets[i].GetLayerAt(depth - 1)->GetActivationGradientsAt(0),
919 nets[i].GetLossFunction(), batches[i].GetOutput(),
920 nets[i].GetLayerAt(depth - 1)->GetOutputAt(0), batches[i].GetWeights());
924 for (
size_t i = depth - 1; i > 0; i--) {
925 for (
size_t j = 0; j < nets.size(); j++) {
926 nets[j].GetLayerAt(i)->Backward(nets[j].GetLayerAt(i - 1)->GetActivationGradients(),
927 nets[j].GetLayerAt(i - 1)->GetOutput(), inp1, inp2);
931 std::vector<Matrix_t>
dummy;
934 for (
size_t i = 0; i < nets.size(); i++) {
935 nets[i].GetLayerAt(0)->Backward(
dummy, batches[i].GetInput(), inp1, inp2);
939 for (
size_t i = 0; i < nets.size(); i++) {
940 for (
size_t j = 0; j < depth; j++) {
941 Layer_t *masterLayer = this->GetLayerAt(j);
942 Layer_t *layer = nets[i].GetLayerAt(j);
944 masterLayer->UpdateWeights(layer->GetWeightGradients(), learningRate);
945 layer->CopyWeights(masterLayer->GetWeights());
947 masterLayer->UpdateBiases(layer->GetBiasGradients(), learningRate);
948 layer->CopyBiases(masterLayer->GetBiases());
954template <
typename Architecture_t,
typename Layer_t>
959 std::vector<Matrix_t> inp1;
960 std::vector<Matrix_t> inp2;
961 size_t depth = this->GetDepth();
964 for (
size_t i = 0; i < nets.size(); i++) {
965 evaluateGradients<Architecture_t>(nets[i].GetLayerAt(depth - 1)->GetActivationGradientsAt(0),
966 nets[i].GetLossFunction(), batches[i].GetOutput(),
967 nets[i].GetLayerAt(depth - 1)->GetOutputAt(0), batches[i].GetWeights());
971 for (
size_t i = depth - 1; i > 0; i--) {
972 Layer_t *masterLayer = this->GetLayerAt(i);
974 for (
size_t j = 0; j < nets.size(); j++) {
975 Layer_t *layer = nets[j].GetLayerAt(i);
977 layer->Backward(nets[j].GetLayerAt(i - 1)->GetActivationGradients(), nets[j].GetLayerAt(i - 1)->GetOutput(),
979 masterLayer->UpdateWeightGradients(layer->GetWeightGradients(), learningRate / momentum);
980 masterLayer->UpdateBiasGradients(layer->GetBiasGradients(), learningRate / momentum);
983 masterLayer->UpdateWeightGradients(masterLayer->GetWeightGradients(), 1.0 - momentum);
984 masterLayer->UpdateBiasGradients(masterLayer->GetBiasGradients(), 1.0 - momentum);
987 std::vector<Matrix_t>
dummy;
990 Layer_t *masterFirstLayer = this->GetLayerAt(0);
991 for (
size_t i = 0; i < nets.size(); i++) {
992 Layer_t *layer = nets[i].GetLayerAt(0);
994 layer->Backward(
dummy, batches[i].GetInput(), inp1, inp2);
996 masterFirstLayer->UpdateWeightGradients(layer->GetWeightGradients(), learningRate / momentum);
997 masterFirstLayer->UpdateBiasGradients(layer->GetBiasGradients(), learningRate / momentum);
1000 masterFirstLayer->UpdateWeightGradients(masterFirstLayer->GetWeightGradients(), 1.0 - momentum);
1001 masterFirstLayer->UpdateBiasGradients(masterFirstLayer->GetBiasGradients(), 1.0 - momentum);
1003 for (
size_t i = 0; i < depth; i++) {
1004 Layer_t *masterLayer = this->GetLayerAt(i);
1005 masterLayer->Update(1.0);
1007 for (
size_t j = 0; j < nets.size(); j++) {
1008 Layer_t *layer = nets[j].GetLayerAt(i);
1010 layer->CopyWeights(masterLayer->GetWeights());
1011 layer->CopyBiases(masterLayer->GetBiases());
1017template <
typename Architecture_t,
typename Layer_t>
1022 std::cout <<
"Parallel Backward Nestorov" << std::endl;
1023 std::vector<Matrix_t> inp1;
1024 std::vector<Matrix_t> inp2;
1025 size_t depth = this->GetDepth();
1028 for (
size_t i = 0; i < nets.size(); i++) {
1029 evaluateGradients<Architecture_t>(nets[i].GetLayerAt(depth - 1)->GetActivationGradientsAt(0),
1030 nets[i].GetLossFunction(), batches[i].GetOutput(),
1031 nets[i].GetLayerAt(depth - 1)->GetOutputAt(0), batches[i].GetWeights());
1035 for (
size_t i = depth - 1; i > 0; i--) {
1036 for (
size_t j = 0; j < nets.size(); j++) {
1037 Layer_t *layer = nets[j].GetLayerAt(i);
1039 layer->Backward(nets[j].GetLayerAt(i - 1)->GetActivationGradients(), nets[j].GetLayerAt(i - 1)->GetOutput(),
1044 std::vector<Matrix_t>
dummy;
1047 for (
size_t i = 0; i < nets.size(); i++) {
1048 Layer_t *layer = nets[i].GetLayerAt(0);
1049 layer->Backward(
dummy, batches[i].GetInput(), inp1, inp2);
1052 for (
size_t i = 0; i < depth; i++) {
1053 Layer_t *masterLayer = this->GetLayerAt(i);
1054 for (
size_t j = 0; j < nets.size(); j++) {
1055 Layer_t *layer = nets[j].GetLayerAt(i);
1057 layer->CopyWeights(masterLayer->GetWeights());
1058 layer->CopyBiases(masterLayer->GetBiases());
1060 layer->UpdateWeights(masterLayer->GetWeightGradients(), 1.0);
1061 layer->UpdateBiases(masterLayer->GetBiasGradients(), 1.0);
1064 for (
size_t j = 0; j < nets.size(); j++) {
1065 Layer_t *layer = nets[j].GetLayerAt(i);
1067 masterLayer->UpdateWeightGradients(layer->GetWeightGradients(), learningRate / momentum);
1068 masterLayer->UpdateBiasGradients(layer->GetBiasGradients(), learningRate / momentum);
1071 masterLayer->UpdateWeightGradients(masterLayer->GetWeightGradients(), 1.0 - momentum);
1072 masterLayer->UpdateBiasGradients(masterLayer->GetBiasGradients(), 1.0 - momentum);
1074 masterLayer->Update(1.0);
1079template <
typename Architecture_t,
typename Layer_t>
1082 for (
size_t i = 0; i < fLayers.size(); i++) {
1083 fLayers[i]->Update(learningRate);
1088template <
typename Architecture_t,
typename Layer_t>
1090 bool includeRegularization)
const ->
Scalar_t
1093 auto loss = evaluate<Architecture_t>(this->GetLossFunction(), groundTruth, fLayers.back()->GetOutputAt(0), weights);
1096 if (includeRegularization) {
1097 loss += RegularizationTerm();
1104template <
typename Architecture_t,
typename Layer_t>
1106 const Matrix_t &weights,
bool applyDropout,
bool includeRegularization)
1109 Forward(input, applyDropout);
1110 return Loss(groundTruth, weights, includeRegularization);
1114template <
typename Architecture_t,
typename Layer_t>
1118 for (
size_t i = 0; i < fLayers.size(); i++) {
1119 for (
size_t j = 0; j < (fLayers[i]->GetWeights()).size(); j++) {
1120 reg += regularization<Architecture_t>(fLayers[i]->GetWeightsAt(j), this->GetRegularization());
1123 return this->GetWeightDecay() * reg;
1128template <
typename Architecture_t,
typename Layer_t>
1132 evaluate<Architecture_t>(predictions,
f, fLayers.back()->GetOutputAt(0));
1136template <
typename Architecture_t,
typename Layer_t>
1140 Forward(input,
false);
1142 evaluate<Architecture_t>(predictions,
f, fLayers.back()->GetOutputAt(0));
1146template <
typename Architecture_t,
typename Layer_t>
1149 std::cout <<
"DEEP NEURAL NETWORK: Depth = " << this->GetDepth();
1150 std::cout <<
" Input = ( " << this->GetInputDepth();
1151 std::cout <<
", " << this->GetInputHeight();
1152 std::cout <<
", " << this->GetInputWidth() <<
" )";
1153 std::cout <<
" Batch size = " << this->GetBatchSize();
1154 std::cout <<
" Loss function = " <<
static_cast<char>(this->GetLossFunction()) << std::endl;
1158 for (
size_t i = 0; i < fLayers.size(); i++) {
1159 std::cout <<
"\tLayer " << i <<
"\t";
1160 fLayers[i]->Print();
#define R(a, b, c, d, e, f, g, h, i)
static RooMathCoreReg dummy
include TDocParser_001 C image html pict1_TDocParser_001 png width
void Info(const char *location, const char *msgfmt,...)
void Error(const char *location, const char *msgfmt,...)
void Fatal(const char *location, const char *msgfmt,...)
Generic Max Pooling Layer class.
Generic Deep Neural Network class.
const std::vector< Layer_t * > & GetLayers() const
void AddDenseLayer(TDenseLayer< Architecture_t > *denseLayer)
Function for adding Dense Layer in the Deep Neural Network, when the layer is already created.
size_t GetBatchHeight() const
void SetBatchDepth(size_t batchDepth)
void ParallelBackward(std::vector< TDeepNet< Architecture_t, Layer_t > > &nets, std::vector< TTensorBatch< Architecture_t > > &batches, Scalar_t learningRate)
Function for parallel backward in the vector of deep nets, where the master net is the net calling th...
void SetLossFunction(ELossFunction J)
size_t fBatchHeight
The height of the batch used for training/testing.
ERegularization GetRegularization() const
std::vector< Layer_t * > & GetLayers()
typename Architecture_t::Scalar_t Scalar_t
void Initialize()
DAE functions.
size_t GetBatchSize() const
Getters.
Scalar_t GetWeightDecay() const
size_t GetInputDepth() const
std::vector< Layer_t * > fLayers
The layers consisting the DeepNet.
size_t fBatchDepth
The depth of the batch used for training/testing.
size_t fInputDepth
The depth of the input.
Layer_t * GetLayerAt(size_t i)
Get the layer in the vector of layers at poistion i.
void Print() const
Print the Deep Net Info.
void SetWeightDecay(Scalar_t weightDecay)
void AddReshapeLayer(TReshapeLayer< Architecture_t > *reshapeLayer)
Function for adding Reshape Layer in the Deep Neural Network, when the layer is already created.
void Clear()
Remove all layers from the network.
Scalar_t RegularizationTerm() const
Function for computing the regularizaton term to be added to the loss function
TDenseLayer< Architecture_t > * AddDenseLayer(size_t width, EActivationFunction f, Scalar_t dropoutProbability=1.0)
Function for adding Dense Connected Layer in the Deep Neural Network, with a given width,...
void Prediction(Matrix_t &predictions, std::vector< Matrix_t > input, EOutputFunction f)
Prediction for the given inputs, based on what network learned.
TDeepNet(size_t BatchSize, size_t InputDepth, size_t InputHeight, size_t InputWidth, size_t BatchDepth, size_t BatchHeight, size_t BatchWidth, ELossFunction fJ, EInitialization fI=EInitialization::kZero, ERegularization fR=ERegularization::kNone, Scalar_t fWeightDecay=0.0, bool isTraining=false)
Constructor.
void ParallelForward(std::vector< TDeepNet< Architecture_t, Layer_t > > &nets, std::vector< TTensorBatch< Architecture_t > > &batches, bool applyDropout=false)
Function for parallel forward in the vector of deep nets, where the master net is the net calling thi...
void SetInputDepth(size_t inputDepth)
size_t GetInputHeight() const
size_t fBatchSize
Batch size used for training and evaluation.
void Prediction(Matrix_t &predictions, EOutputFunction f) const
Prediction based on activations stored in the last layer.
void ParallelBackwardMomentum(std::vector< TDeepNet< Architecture_t, Layer_t > > &nets, std::vector< TTensorBatch< Architecture_t > > &batches, Scalar_t learningRate, Scalar_t momentum)
Function for parallel backward in the vector of deep nets, where the master net is the net calling th...
size_t fInputWidth
The width of the input.
void SetInputHeight(size_t inputHeight)
size_t GetBatchWidth() const
void AddBasicRNNLayer(TBasicRNNLayer< Architecture_t > *basicRNNLayer)
Function for adding Vanilla RNN when the layer is already created.
void AddMaxPoolLayer(CNN::TMaxPoolLayer< Architecture_t > *maxPoolLayer)
Function for adding Max Pooling layer in the Deep Neural Network, when the layer is already created.
TMaxPoolLayer< Architecture_t > * AddMaxPoolLayer(size_t frameHeight, size_t frameWidth, size_t strideRows, size_t strideCols, Scalar_t dropoutProbability=1.0)
Function for adding Pooling layer in the Deep Neural Network, with a given filter height and width,...
Scalar_t fWeightDecay
The weight decay factor.
Scalar_t Loss(const Matrix_t &groundTruth, const Matrix_t &weights, bool includeRegularization=true) const
Function for evaluating the loss, based on the activations stored in the last layer.
TConvLayer< Architecture_t > * AddConvLayer(size_t depth, size_t filterHeight, size_t filterWidth, size_t strideRows, size_t strideCols, size_t paddingHeight, size_t paddingWidth, EActivationFunction f, Scalar_t dropoutProbability=1.0)
Function for adding Convolution layer in the Deep Neural Network, with a given depth,...
ERegularization fR
The regularization used for the network.
size_t GetInputWidth() const
TBasicRNNLayer< Architecture_t > * AddBasicRNNLayer(size_t stateSize, size_t inputSize, size_t timeSteps, bool rememberState=false)
Function for adding Recurrent Layer in the Deep Neural Network, with given parameters.
void Backward(std::vector< Matrix_t > &input, const Matrix_t &groundTruth, const Matrix_t &weights)
Function that executes the entire backward pass in the network.
bool isInteger(Scalar_t x) const
size_t GetOutputWidth() const
bool fIsTraining
Is the network training?
TReshapeLayer< Architecture_t > * AddReshapeLayer(size_t depth, size_t height, size_t width, bool flattening)
Function for adding Reshape Layer in the Deep Neural Network, with a given height and width.
void SetBatchSize(size_t batchSize)
Setters.
void AddConvLayer(TConvLayer< Architecture_t > *convLayer)
Function for adding Convolution Layer in the Deep Neural Network, when the layer is already created.
size_t fInputHeight
The height of the input.
void SetRegularization(ERegularization R)
void Forward(std::vector< Matrix_t > &input, bool applyDropout=false)
Function that executes the entire forward pass in the network.
TDeepNet(const TDeepNet &)
Copy-constructor.
size_t fBatchWidth
The width of the batch used for training/testing.
ELossFunction fJ
The loss function of the network.
void SetBatchWidth(size_t batchWidth)
Scalar_t Loss(std::vector< Matrix_t > &input, const Matrix_t &groundTruth, const Matrix_t &weights, bool applyDropout=false, bool includeRegularization=true)
Function for evaluating the loss, based on the propagation of the given input.
TDeepNet()
Default Constructor.
void SetBatchHeight(size_t batchHeight)
void Update(Scalar_t learningRate)
Function that will update the weights and biases in the layers that contain weights and biases.
ELossFunction GetLossFunction() const
size_t calculateDimension(int imgDim, int fltDim, int padding, int stride)
const Layer_t * GetLayerAt(size_t i) const
void SetInitialization(EInitialization I)
EInitialization GetInitialization() const
void ParallelBackwardNestorov(std::vector< TDeepNet< Architecture_t, Layer_t > > &nets, std::vector< TTensorBatch< Architecture_t > > &batches, Scalar_t learningRate, Scalar_t momentum)
Function for parallel backward in the vector of deep nets, where the master net is the net calling th...
void SetInputWidth(size_t inputWidth)
typename Architecture_t::Matrix_t Matrix_t
EInitialization fI
The initialization method of the network.
size_t GetBatchDepth() const
void Copy(void *source, void *dest)
void Print(std::ostream &os, const OptionType &opt)
EOutputFunction
Enum that represents output functions.
double weightDecay(double error, ItWeight itWeight, ItWeight itWeightEnd, double factorWeightDecay, EnumRegularization eRegularization)
compute the weight decay for regularization (L1 or L2)
ERegularization
Enum representing the regularization type applied for a given layer.
EActivationFunction
Enum that represents layer activation functions.
ELossFunction
Enum that represents objective functions for the net, i.e.
auto debugTensor(const std::vector< typename Architecture::Matrix_t > &A, const std::string name="tensor") -> void
Abstract ClassifierFactory template that handles arbitrary types.