75 : MethodBase(jobName, Types::kDNN, methodTitle, theData, theOption), fWeightInitialization(), fOutputFunction(),
76 fLayoutString(), fErrorStrategy(), fTrainingStrategyString(), fWeightInitializationString(),
77 fArchitectureString(), fTrainingSettings(), fResume(false), fSettings()
86 :
MethodBase( Types::kDNN, theData, theWeightFile),
87 fWeightInitialization(), fOutputFunction(), fLayoutString(), fErrorStrategy(),
88 fTrainingStrategyString(), fWeightInitializationString(), fArchitectureString(),
89 fTrainingSettings(), fResume(false), fSettings()
142 "Layout of the network.");
145 "Part of the training data to use for " 146 "validation. Specify as 0.2 or 20% to use a " 147 "fifth of the data set as validation set. " 148 "Specify as 100 to use exactly 100 events. " 153 "Loss function: Mean squared error (regression)" 154 " or cross entropy (binary classification).");
160 "WeightInitialization",
161 "Weight initialization strategy");
165 DeclareOptionRef(fArchitectureString =
"CPU",
"Architecture",
"Which architecture to perform the training on.");
172 fTrainingStrategyString =
"LearningRate=1e-1," 175 "ConvergenceSteps=50," 181 "DropRepetitions=5|LearningRate=1e-4," 184 "ConvergenceSteps=50," 189 "DropConfig=0.0+0.5+0.5," 191 "Multithreading=True",
193 "Defines the training strategies.");
203 LayoutVector_t layout;
204 const TString layerDelimiter(
",");
205 const TString subDelimiter(
"|");
207 const size_t inputSize =
GetNvar();
209 TObjArray* layerStrings = layoutString.Tokenize(layerDelimiter);
210 TIter nextLayer (layerStrings);
213 for (; layerString !=
nullptr; layerString = (
TObjString*) nextLayer()) {
218 TIter nextToken (subStrings);
221 for (; token !=
nullptr; token = (
TObjString *) nextToken()) {
227 if (strActFnc ==
"RELU") {
229 }
else if (strActFnc ==
"TANH") {
231 }
else if (strActFnc ==
"SYMMRELU") {
233 }
else if (strActFnc ==
"SOFTSIGN") {
235 }
else if (strActFnc ==
"SIGMOID") {
237 }
else if (strActFnc ==
"LINEAR") {
239 }
else if (strActFnc ==
"GAUSS") {
248 strNumNodes.ReplaceAll (
"N", strN);
249 strNumNodes.ReplaceAll (
"n", strN);
251 numNodes = fml.Eval (inputSize);
257 layout.push_back(std::make_pair(numNodes, activationFunction));
270 KeyValueVector_t blockKeyValues;
271 const TString keyValueDelim (
"=");
273 TObjArray* blockStrings = parseString.Tokenize (blockDelim);
274 TIter nextBlock (blockStrings);
277 for (; blockString !=
nullptr; blockString = (
TObjString *) nextBlock())
279 blockKeyValues.push_back (std::map<TString,TString>());
280 std::map<TString,TString>& currentBlock = blockKeyValues.back ();
283 TIter nextToken (subStrings);
286 for (; token !=
nullptr; token = (
TObjString *)nextToken())
289 int delimPos = strKeyValue.
First (keyValueDelim.Data ());
295 TString strValue =
TString (strKeyValue (delimPos+1, strKeyValue.Length ()));
300 currentBlock.insert (std::make_pair (strKey, strValue));
303 return blockKeyValues;
311 std::map<TString, TString>::const_iterator it = keyValueMap.find (key);
312 if (it == keyValueMap.end()) {
320 template <
typename T>
321 T fetchValue(
const std::map<TString,TString>& keyValueMap,
332 TString value (fetchValue (keyValueMap, key));
336 return value.
Atoi ();
342 double fetchValue (
const std::map<TString,TString>& keyValueMap,
343 TString key,
double defaultValue)
345 TString value (fetchValue (keyValueMap, key));
349 return value.
Atof ();
358 TString value (fetchValue (keyValueMap, key));
368 bool fetchValue (
const std::map<TString,TString>& keyValueMap,
369 TString key,
bool defaultValue)
371 TString value (fetchValue (keyValueMap, key));
376 if (value ==
"TRUE" || value ==
"T" || value ==
"1") {
385 std::vector<double>
fetchValue(
const std::map<TString, TString> & keyValueMap,
387 std::vector<double> defaultValue)
389 TString parseString (fetchValue (keyValueMap, key));
390 if (parseString ==
"") {
394 std::vector<double> values;
396 const TString tokenDelim (
"+");
398 TIter nextToken (tokenStrings);
400 for (; tokenString != NULL; tokenString = (
TObjString*)nextToken ()) {
401 std::stringstream sstr;
404 sstr >> currentValue;
405 values.push_back (currentValue);
416 <<
"Will ignore negative events in training!" 420 if (fArchitectureString ==
"STANDARD") {
421 Log() << kERROR <<
"The STANDARD architecture has been deprecated. " 422 "Please use Architecture=CPU or Architecture=CPU." 423 "See the TMVA Users' Guide for instructions if you " 424 "encounter problems." 426 Log() << kFATAL <<
"The STANDARD architecture has been deprecated. " 427 "Please use Architecture=CPU or Architecture=CPU." 428 "See the TMVA Users' Guide for instructions if you " 429 "encounter problems." 433 if (fArchitectureString ==
"OPENCL") {
434 Log() << kERROR <<
"The OPENCL architecture has not been implemented yet. " 435 "Please use Architecture=CPU or Architecture=CPU for the " 436 "time being. See the TMVA Users' Guide for instructions " 437 "if you encounter problems." 439 Log() << kFATAL <<
"The OPENCL architecture has not been implemented yet. " 440 "Please use Architecture=CPU or Architecture=CPU for the " 441 "time being. See the TMVA Users' Guide for instructions " 442 "if you encounter problems." 446 if (fArchitectureString ==
"GPU") {
447 #ifndef DNNCUDA // Included only if DNNCUDA flag is _not_ set. 448 Log() << kERROR <<
"CUDA backend not enabled. Please make sure " 449 "you have CUDA installed and it was successfully " 452 Log() << kFATAL <<
"CUDA backend not enabled. Please make sure " 453 "you have CUDA installed and it was successfully " 459 if (fArchitectureString ==
"CPU") {
460 #ifndef DNNCPU // Included only if DNNCPU flag is _not_ set. 461 Log() << kERROR <<
"Multi-core CPU backend not enabled. Please make sure " 462 "you have a BLAS implementation and it was successfully " 463 "detected by CMake as well that the imt CMake flag is set." 465 Log() << kFATAL <<
"Multi-core CPU backend not enabled. Please make sure " 466 "you have a BLAS implementation and it was successfully " 467 "detected by CMake as well that the imt CMake flag is set." 478 size_t outputSize = 1;
485 fNet.SetBatchSize(1);
486 fNet.SetInputWidth(inputSize);
488 auto itLayout = std::begin (fLayout);
489 auto itLayoutEnd = std::end (fLayout)-1;
490 for ( ; itLayout != itLayoutEnd; ++itLayout) {
491 fNet.AddLayer((*itLayout).first, (*itLayout).second);
493 fNet.AddLayer(outputSize, EActivationFunction::kIdentity);
499 fOutputFunction = EOutputFunction::kSigmoid;
502 if (fErrorStrategy ==
"SUMOFSQUARES") {
503 fNet.SetLossFunction(ELossFunction::kMeanSquaredError);
505 if (fErrorStrategy ==
"CROSSENTROPY") {
506 fNet.SetLossFunction(ELossFunction::kCrossEntropy);
508 fOutputFunction = EOutputFunction::kSigmoid;
510 if (fErrorStrategy !=
"SUMOFSQUARES") {
511 Log () << kWARNING <<
"For regression only SUMOFSQUARES is a valid " 512 <<
" neural net error function. Setting error function to " 513 <<
" SUMOFSQUARES now." <<
Endl;
515 fNet.SetLossFunction(ELossFunction::kMeanSquaredError);
516 fOutputFunction = EOutputFunction::kIdentity;
518 if (fErrorStrategy ==
"SUMOFSQUARES") {
519 fNet.SetLossFunction(ELossFunction::kMeanSquaredError);
521 if (fErrorStrategy ==
"CROSSENTROPY") {
522 fNet.SetLossFunction(ELossFunction::kCrossEntropy);
524 if (fErrorStrategy ==
"MUTUALEXCLUSIVE") {
525 fNet.SetLossFunction(ELossFunction::kSoftmaxCrossEntropy);
527 fOutputFunction = EOutputFunction::kSoftmax;
534 if (fWeightInitializationString ==
"XAVIER") {
537 else if (fWeightInitializationString ==
"XAVIERUNIFORM") {
549 GetNumValidationSamples();
551 KeyValueVector_t strategyKeyValues = ParseKeyValueString(fTrainingStrategyString,
554 for (
auto& block : strategyKeyValues) {
564 std::vector<Double_t>());
568 if (regularization ==
"L1") {
570 }
else if (regularization ==
"L2") {
582 fTrainingSettings.push_back(settings);
597 Int_t nValidationSamples = 0;
602 if (fValidationSize.EndsWith(
"%")) {
607 Double_t valSizeAsDouble = fValidationSize.
Atof() / 100.0;
610 Log() << kFATAL <<
"Cannot parse number \"" << fValidationSize
611 <<
"\". Expected string like \"20%\" or \"20.0%\"." <<
Endl;
613 }
else if (fValidationSize.IsFloat()) {
614 Double_t valSizeAsDouble = fValidationSize.Atof();
616 if (valSizeAsDouble < 1.0) {
621 nValidationSamples = valSizeAsDouble;
624 Log() << kFATAL <<
"Cannot parse number \"" << fValidationSize <<
"\". Expected string like \"0.2\" or \"100\"." 630 if (nValidationSamples < 0) {
631 Log() << kFATAL <<
"Validation size \"" << fValidationSize <<
"\" is negative." <<
Endl;
634 if (nValidationSamples == 0) {
635 Log() << kFATAL <<
"Validation size \"" << fValidationSize <<
"\" is zero." <<
Endl;
638 if (nValidationSamples >= (
Int_t)trainingSetSize) {
639 Log() << kFATAL <<
"Validation size \"" << fValidationSize
640 <<
"\" is larger than or equal in size to training set (size=\"" << trainingSetSize <<
"\")." <<
Endl;
643 return nValidationSamples;
651 std::vector<TString> titles = {
"Error on training set",
"Error on test set"};
657 if (fArchitectureString ==
"GPU") {
662 }
else if (fArchitectureString ==
"OpenCL") {
663 Log() << kFATAL <<
"OpenCL backend not yet supported." <<
Endl;
665 }
else if (fArchitectureString ==
"CPU") {
672 Log() << kINFO <<
"Using Standard Implementation.";
674 std::vector<Pattern> trainPattern;
675 std::vector<Pattern> testPattern;
677 size_t nValidationSamples = GetNumValidationSamples();
681 const std::vector<TMVA::Event *> eventCollectionTraining{allData.begin(), allData.begin() + nTrainingSamples};
682 const std::vector<TMVA::Event *> eventCollectionTesting{allData.begin() + nTrainingSamples, allData.end()};
684 for (
auto &event : eventCollectionTraining) {
685 const std::vector<Float_t>& values =
event->GetValues();
687 double outputValue =
event->GetClass () == 0 ? 0.9 : 0.1;
688 trainPattern.push_back(
Pattern (values.begin(),
691 event->GetWeight()));
692 trainPattern.back().addInput(1.0);
694 std::vector<Float_t> oneHot(
DataInfo().GetNClasses(), 0.0);
695 oneHot[
event->GetClass()] = 1.0;
696 trainPattern.push_back(
Pattern (values.begin(), values.end(),
697 oneHot.cbegin(), oneHot.cend(),
698 event->GetWeight()));
699 trainPattern.back().addInput(1.0);
701 const std::vector<Float_t>& targets =
event->GetTargets ();
702 trainPattern.push_back(
Pattern(values.begin(),
706 event->GetWeight ()));
707 trainPattern.back ().addInput (1.0);
711 for (
auto &event : eventCollectionTesting) {
712 const std::vector<Float_t>& values =
event->GetValues();
714 double outputValue =
event->GetClass () == 0 ? 0.9 : 0.1;
715 testPattern.push_back(
Pattern (values.begin(),
718 event->GetWeight()));
719 testPattern.back().addInput(1.0);
721 std::vector<Float_t> oneHot(
DataInfo().GetNClasses(), 0.0);
722 oneHot[
event->GetClass()] = 1.0;
723 testPattern.push_back(
Pattern (values.begin(), values.end(),
724 oneHot.cbegin(), oneHot.cend(),
725 event->GetWeight()));
726 testPattern.back().addInput(1.0);
728 const std::vector<Float_t>& targets =
event->GetTargets ();
729 testPattern.push_back(
Pattern(values.begin(),
733 event->GetWeight ()));
734 testPattern.back ().addInput (1.0);
739 std::vector<double> weights;
746 for (
size_t i = 0; i < fNet.GetDepth(); i++) {
750 case EActivationFunction::kIdentity: g = EnumFunction::LINEAR;
break;
751 case EActivationFunction::kRelu: g = EnumFunction::RELU;
break;
752 case EActivationFunction::kSigmoid: g = EnumFunction::SIGMOID;
break;
753 case EActivationFunction::kTanh: g = EnumFunction::TANH;
break;
754 case EActivationFunction::kSymmRelu: g = EnumFunction::SYMMRELU;
break;
755 case EActivationFunction::kSoftSign: g = EnumFunction::SOFTSIGN;
break;
756 case EActivationFunction::kGauss: g = EnumFunction::GAUSS;
break;
758 if (i < fNet.GetDepth() - 1) {
762 switch(fOutputFunction) {
763 case EOutputFunction::kIdentity: h = ModeOutputValues::DIRECT;
break;
764 case EOutputFunction::kSigmoid: h = ModeOutputValues::SIGMOID;
break;
765 case EOutputFunction::kSoftmax: h = ModeOutputValues::SOFTMAX;
break;
771 switch(fNet.GetLossFunction()) {
772 case ELossFunction::kMeanSquaredError:
775 case ELossFunction::kCrossEntropy:
778 case ELossFunction::kSoftmaxCrossEntropy:
783 switch(fWeightInitialization) {
784 case EInitialization::kGauss:
786 std::back_inserter(weights));
788 case EInitialization::kUniform:
790 std::back_inserter(weights));
794 std::back_inserter(weights));
799 for (
auto s : fTrainingSettings) {
802 switch(
s.regularization) {
804 case ERegularization::kL1: r = EnumRegularization::L1;
break;
805 case ERegularization::kL2: r = EnumRegularization::L2;
break;
809 s.testInterval,
s.weightDecay, r,
811 s.momentum, 1,
s.multithreading);
812 std::shared_ptr<Settings> ptrSettings(settings);
813 ptrSettings->setMonitoring (0);
815 <<
"Training with learning rate = " << ptrSettings->learningRate ()
816 <<
", momentum = " << ptrSettings->momentum ()
817 <<
", repetitions = " << ptrSettings->repetitions ()
820 ptrSettings->setProgressLimits ((idxSetting)*100.0/(fSettings.size ()),
821 (idxSetting+1)*100.0/(fSettings.size ()));
823 const std::vector<double>& dropConfig = ptrSettings->dropFractions ();
824 if (!dropConfig.empty ()) {
825 Log () << kINFO <<
"Drop configuration" <<
Endl 826 <<
" drop repetitions = " << ptrSettings->dropRepetitions()
831 for (
auto f : dropConfig) {
832 Log () << kINFO <<
" Layer " << idx <<
" = " << f <<
Endl;
838 ptrSettings->momentum(),
839 ptrSettings->repetitions());
840 net.
train(weights, trainPattern, testPattern, minimizer, *ptrSettings.get());
845 size_t weightIndex = 0;
846 for (
size_t l = 0;
l < fNet.GetDepth();
l++) {
847 auto & layerWeights = fNet.GetLayer(
l).GetWeights();
848 for (
Int_t j = 0; j < layerWeights.GetNcols(); j++) {
849 for (
Int_t i = 0; i < layerWeights.GetNrows(); i++) {
850 layerWeights(i,j) = weights[weightIndex];
854 auto & layerBiases = fNet.GetLayer(
l).GetBiases();
856 for (
Int_t i = 0; i < layerBiases.GetNrows(); i++) {
857 layerBiases(i,0) = weights[weightIndex];
861 for (
Int_t i = 0; i < layerBiases.GetNrows(); i++) {
862 layerBiases(i,0) = 0.0;
875 #ifdef DNNCUDA // Included only if DNNCUDA flag is set. 876 Log() << kINFO <<
"Start of neural network training on GPU." <<
Endl <<
Endl;
878 size_t nValidationSamples = GetNumValidationSamples();
880 size_t nTestSamples = nValidationSamples;
882 Log() << kDEBUG <<
"Using " << nValidationSamples <<
" validation samples." <<
Endl;
883 Log() << kDEBUG <<
"Using " << nTestSamples <<
" training samples." <<
Endl;
885 size_t trainingPhase = 1;
886 fNet.Initialize(fWeightInitialization);
899 std::vector<Double_t> dropoutVector(settings.dropoutProbabilities);
900 for (
auto & p : dropoutVector) {
906 auto testNet = net.
CreateClone(settings.batchSize);
908 Log() << kINFO <<
"Training phase " << trainingPhase <<
" of " 909 << fTrainingSettings.size() <<
":" <<
Endl;
916 const std::vector<Event *> trainingInputData =
917 std::vector<Event *>(allData.begin(), allData.begin() + nTrainingSamples);
918 const std::vector<Event *> testInputData =
919 std::vector<Event *>(allData.begin() + nTrainingSamples, allData.end());
921 if (trainingInputData.size() != nTrainingSamples) {
922 Log() << kFATAL <<
"Inconsistent training sample size" <<
Endl;
924 if (testInputData.size() != nTestSamples) {
925 Log() << kFATAL <<
"Inconsistent test sample size" <<
Endl;
931 DataLoader_t trainingData(trainingTuple, nTrainingSamples,
934 DataLoader_t testData(testTuple, nTestSamples, testNet.GetBatchSize(),
938 settings.convergenceSteps,
939 settings.testInterval);
941 std::vector<TNet<TCuda<>>> nets{};
942 std::vector<TBatch<TCuda<>>> batches{};
943 nets.reserve(nThreads);
944 for (
size_t i = 0; i < nThreads; i++) {
946 for (
size_t j = 0; j < net.
GetDepth(); j++)
948 auto &masterLayer = net.
GetLayer(j);
949 auto &layer = nets.back().GetLayer(j);
951 masterLayer.GetWeights());
953 masterLayer.GetBiases());
957 bool converged =
false;
958 size_t stepCount = 0;
959 size_t batchesInEpoch = nTrainingSamples / net.
GetBatchSize();
961 std::chrono::time_point<std::chrono::system_clock> start, end;
962 start = std::chrono::system_clock::now();
965 Log() << std::setw(10) <<
"Epoch" <<
" | " 966 << std::setw(12) <<
"Train Err." 967 << std::setw(12) <<
"Test Err." 968 << std::setw(12) <<
"GFLOP/s" 969 << std::setw(12) <<
"Conv. Steps" <<
Endl;
970 std::string separator(62,
'-');
979 trainingData.Shuffle();
980 for (
size_t i = 0; i < batchesInEpoch; i += nThreads) {
982 for (
size_t j = 0; j < nThreads; j++) {
983 batches.reserve(nThreads);
984 batches.push_back(trainingData.GetBatch());
986 if (settings.momentum > 0.0) {
987 minimizer.StepMomentum(net, nets, batches, settings.momentum);
989 minimizer.Step(net, nets, batches);
993 if ((stepCount % minimizer.GetTestInterval()) == 0) {
997 for (
auto batch : testData) {
998 auto inputMatrix = batch.GetInput();
999 auto outputMatrix = batch.GetOutput();
1000 testError += testNet.Loss(inputMatrix, outputMatrix);
1002 testError /= (
Double_t) (nTestSamples / settings.batchSize);
1004 end = std::chrono::system_clock::now();
1008 for (
auto batch : trainingData) {
1009 auto inputMatrix = batch.GetInput();
1010 auto outputMatrix = batch.GetOutput();
1011 trainingError += net.
Loss(inputMatrix, outputMatrix);
1013 trainingError /= (
Double_t) (nTrainingSamples / settings.batchSize);
1016 std::chrono::duration<double> elapsed_seconds = end - start;
1017 double seconds = elapsed_seconds.count();
1018 double nFlops = (double) (settings.testInterval * batchesInEpoch);
1021 converged = minimizer.HasConverged(testError);
1022 start = std::chrono::system_clock::now();
1027 / minimizer.GetConvergenceSteps ();
1030 Log() << std::setw(10) << stepCount <<
" | " 1031 << std::setw(12) << trainingError
1032 << std::setw(12) << testError
1033 << std::setw(12) << nFlops / seconds
1034 << std::setw(12) << minimizer.GetConvergenceCount() <<
Endl;
1047 #else // DNNCUDA flag not set. 1049 Log() << kFATAL <<
"CUDA backend not enabled. Please make sure " 1050 "you have CUDA installed and it was successfully " 1051 "detected by CMAKE." <<
Endl;
1060 #ifdef DNNCPU // Included only if DNNCPU flag is set. 1061 Log() << kINFO <<
"Start of neural network training on CPU." <<
Endl <<
Endl;
1063 size_t nValidationSamples = GetNumValidationSamples();
1065 size_t nTestSamples = nValidationSamples;
1067 Log() << kDEBUG <<
"Using " << nValidationSamples <<
" validation samples." <<
Endl;
1068 Log() << kDEBUG <<
"Using " << nTestSamples <<
" training samples." <<
Endl;
1070 fNet.Initialize(fWeightInitialization);
1072 size_t trainingPhase = 1;
1079 Log() <<
"Training phase " << trainingPhase <<
" of " 1080 << fTrainingSettings.size() <<
":" <<
Endl;
1088 std::vector<Double_t> dropoutVector(settings.dropoutProbabilities);
1089 for (
auto & p : dropoutVector) {
1095 auto testNet = net.
CreateClone(settings.batchSize);
1101 const std::vector<Event *> trainingInputData =
1102 std::vector<Event *>(allData.begin(), allData.begin() + nTrainingSamples);
1103 const std::vector<Event *> testInputData =
1104 std::vector<Event *>(allData.begin() + nTrainingSamples, allData.end());
1106 if (trainingInputData.size() != nTrainingSamples) {
1107 Log() << kFATAL <<
"Inconsistent training sample size" <<
Endl;
1109 if (testInputData.size() != nTestSamples) {
1110 Log() << kFATAL <<
"Inconsistent test sample size" <<
Endl;
1113 size_t nThreads = 1;
1116 DataLoader_t trainingData(trainingTuple, nTrainingSamples,
1119 DataLoader_t testData(testTuple, nTestSamples, testNet.GetBatchSize(),
1123 settings.convergenceSteps,
1124 settings.testInterval);
1126 std::vector<TNet<TCpu<>>> nets{};
1127 std::vector<TBatch<TCpu<>>> batches{};
1128 nets.reserve(nThreads);
1129 for (
size_t i = 0; i < nThreads; i++) {
1130 nets.push_back(net);
1131 for (
size_t j = 0; j < net.
GetDepth(); j++)
1133 auto &masterLayer = net.
GetLayer(j);
1134 auto &layer = nets.back().GetLayer(j);
1136 masterLayer.GetWeights());
1138 masterLayer.GetBiases());
1142 bool converged =
false;
1143 size_t stepCount = 0;
1144 size_t batchesInEpoch = nTrainingSamples / net.
GetBatchSize();
1146 std::chrono::time_point<std::chrono::system_clock> start, end;
1147 start = std::chrono::system_clock::now();
1150 Log() << std::setw(10) <<
"Epoch" <<
" | " 1151 << std::setw(12) <<
"Train Err." 1152 << std::setw(12) <<
"Test Err." 1153 << std::setw(12) <<
"GFLOP/s" 1154 << std::setw(12) <<
"Conv. Steps" <<
Endl;
1155 std::string separator(62,
'-');
1163 trainingData.Shuffle();
1164 for (
size_t i = 0; i < batchesInEpoch; i += nThreads) {
1166 for (
size_t j = 0; j < nThreads; j++) {
1167 batches.reserve(nThreads);
1168 batches.push_back(trainingData.GetBatch());
1170 if (settings.momentum > 0.0) {
1171 minimizer.StepMomentum(net, nets, batches, settings.momentum);
1173 minimizer.Step(net, nets, batches);
1177 if ((stepCount % minimizer.GetTestInterval()) == 0) {
1181 for (
auto batch : testData) {
1182 auto inputMatrix = batch.GetInput();
1183 auto outputMatrix = batch.GetOutput();
1184 auto weightMatrix = batch.GetWeights();
1185 testError += testNet.Loss(inputMatrix, outputMatrix, weightMatrix);
1187 testError /= (
Double_t) (nTestSamples / settings.batchSize);
1189 end = std::chrono::system_clock::now();
1193 for (
auto batch : trainingData) {
1194 auto inputMatrix = batch.GetInput();
1195 auto outputMatrix = batch.GetOutput();
1196 auto weightMatrix = batch.GetWeights();
1197 trainingError += net.
Loss(inputMatrix, outputMatrix, weightMatrix);
1199 trainingError /= (
Double_t) (nTrainingSamples / settings.batchSize);
1203 fIPyCurrentIter = 100*(double)minimizer.GetConvergenceCount() /(double)settings.convergenceSteps;
1208 std::chrono::duration<double> elapsed_seconds = end - start;
1209 double seconds = elapsed_seconds.count();
1210 double nFlops = (double) (settings.testInterval * batchesInEpoch);
1213 converged = minimizer.HasConverged(testError);
1214 start = std::chrono::system_clock::now();
1219 / minimizer.GetConvergenceSteps ();
1222 Log() << std::setw(10) << stepCount <<
" | " 1223 << std::setw(12) << trainingError
1224 << std::setw(12) << testError
1225 << std::setw(12) << nFlops / seconds
1226 << std::setw(12) << minimizer.GetConvergenceCount() <<
Endl;
1236 auto & layer = fNet.GetLayer(
l);
1242 #else // DNNCPU flag not set. 1243 Log() << kFATAL <<
"Multi-core CPU backend not enabled. Please make sure " 1244 "you have a BLAS implementation and it was successfully " 1245 "detected by CMake as well that the imt CMake flag is set." <<
Endl;
1258 for (
size_t i = 0; i < nVariables; i++) {
1259 X(0,i) = inputValues[i];
1262 fNet.Prediction(YHat, X, fOutputFunction);
1274 const std::vector<Float_t>& inputValues = ev->
GetValues();
1275 for (
size_t i = 0; i < nVariables; i++) {
1276 X(0,i) = inputValues[i];
1279 size_t nTargets = std::max(1u, ev->
GetNTargets());
1281 std::vector<Float_t>
output(nTargets);
1282 auto net = fNet.CreateClone(1);
1283 net.Prediction(YHat, X, fOutputFunction);
1285 for (
size_t i = 0; i < nTargets; i++)
1286 output[i] = YHat(0, i);
1294 for (
size_t i = 0; i < nTargets; ++i) {
1299 for (
size_t i = 0; i < nTargets; ++i) {
1316 for (
size_t i = 0; i < nVariables; i++) {
1317 X(0,i) = inputValues[i];
1320 fNet.Prediction(YHat, X, fOutputFunction);
1321 for (
size_t i = 0; i < (size_t) YHat.GetNcols(); i++) {
1322 (*fMulticlassReturnVal)[i] = YHat(0, i);
1332 Int_t inputWidth = fNet.GetInputWidth();
1333 Int_t depth = fNet.GetDepth();
1334 char lossFunction =
static_cast<char>(fNet.GetLossFunction());
1336 gTools().StringFromInt(inputWidth));
1340 TString(static_cast<char>(fOutputFunction)));
1342 for (
Int_t i = 0; i < depth; i++) {
1343 const auto& layer = fNet.GetLayer(i);
1345 int activationFunction =
static_cast<int>(layer.GetActivationFunction());
1348 WriteMatrixXML(layerxml,
"Weights", layer.GetWeights());
1349 WriteMatrixXML(layerxml,
"Biases", layer.GetBiases());
1363 fNet.SetBatchSize(1);
1365 size_t inputWidth, depth;
1368 char lossFunctionChar;
1370 char outputFunctionChar;
1373 fNet.SetInputWidth(inputWidth);
1374 fNet.SetLossFunction(static_cast<ELossFunction>(lossFunctionChar));
1377 size_t previousWidth = inputWidth;
1379 for (
size_t i = 0; i < depth; i++) {
1392 fNet.AddLayer(width, f);
1395 ReadMatrixXML(layerXML,
"Weights", weights);
1396 ReadMatrixXML(layerXML,
"Biases", biases);
1397 fNet.GetLayer(i).GetWeights() = weights;
1398 fNet.GetLayer(i).GetBiases() = biases;
1401 previousWidth = width;
1441 Log() << col <<
"--- Short description:" << colres <<
Endl;
1443 Log() <<
"The DNN neural network is a feedforward" <<
Endl;
1444 Log() <<
"multilayer perceptron implementation. The DNN has a user-" <<
Endl;
1445 Log() <<
"defined hidden layer architecture, where the number of input (output)" <<
Endl;
1446 Log() <<
"nodes is determined by the input variables (output classes, i.e., " <<
Endl;
1447 Log() <<
"signal and one background, regression or multiclass). " <<
Endl;
1449 Log() << col <<
"--- Performance optimisation:" << colres <<
Endl;
1452 const char* txt =
"The DNN supports various options to improve performance in terms of training speed and \n \ 1453 reduction of overfitting: \n \ 1455 - different training settings can be stacked. Such that the initial training \n\ 1456 is done with a large learning rate and a large drop out fraction whilst \n \ 1457 in a later stage learning rate and drop out can be reduced. \n \ 1460 initial training stage: 0.0 for the first layer, 0.5 for later layers. \n \ 1461 later training stage: 0.1 or 0.0 for all layers \n \ 1462 final training stage: 0.0] \n \ 1463 Drop out is a technique where a at each training cycle a fraction of arbitrary \n \ 1464 nodes is disabled. This reduces co-adaptation of weights and thus reduces overfitting. \n \ 1465 - L1 and L2 regularization are available \n \ 1467 [recommended 10 - 150] \n \ 1468 Arbitrary mini-batch sizes can be chosen. \n \ 1469 - Multithreading \n \ 1470 [recommended: True] \n \ 1471 Multithreading can be turned on. The minibatches are distributed to the available \n \ 1472 cores. The algorithm is lock-free (\"Hogwild!\"-style) for each cycle. \n \ 1476 - example: \"TANH|(N+30)*2,TANH|(N+30),LINEAR\" \n \ 1478 . two hidden layers (separated by \",\") \n \ 1479 . the activation function is TANH (other options: RELU, SOFTSIGN, LINEAR) \n \ 1480 . the activation function for the output layer is LINEAR \n \ 1481 . the first hidden layer has (N+30)*2 nodes where N is the number of input neurons \n \ 1482 . the second hidden layer has N+30 nodes, where N is the number of input neurons \n \ 1483 . the number of nodes in the output layer is determined by the number of output nodes \n \ 1484 and can therefore not be chosen freely. \n \ 1486 \"ErrorStrategy\": \n \ 1488 The error of the neural net is determined by a sum-of-squares error function \n \ 1489 For regression, this is the only possible choice. \n \ 1491 The error of the neural net is determined by a cross entropy function. The \n \ 1492 output values are automatically (internally) transformed into probabilities \n \ 1493 using a sigmoid function. \n \ 1494 For signal/background classification this is the default choice. \n \ 1495 For multiclass using cross entropy more than one or no output classes \n \ 1496 can be equally true or false (e.g. Event 0: A and B are true, Event 1: \n \ 1497 A and C is true, Event 2: C is true, ...) \n \ 1498 - MUTUALEXCLUSIVE \n \ 1499 In multiclass settings, exactly one of the output classes can be true (e.g. either A or B or C) \n \ 1501 \"WeightInitialization\" \n \ 1504 \"Xavier Glorot & Yoshua Bengio\"-style of initializing the weights. The weights are chosen randomly \n \ 1505 such that the variance of the values of the nodes is preserved for each layer. \n \ 1506 - XAVIERUNIFORM \n \ 1507 The same as XAVIER, but with uniformly distributed weights instead of gaussian weights \n \ 1509 Random values scaled by the layer size \n \ 1511 \"TrainingStrategy\" \n \ 1512 - example: \"LearningRate=1e-1,Momentum=0.3,ConvergenceSteps=50,BatchSize=30,TestRepetitions=7,WeightDecay=0.0,Renormalize=L2,DropConfig=0.0,DropRepetitions=5|LearningRate=1e-4,Momentum=0.3,ConvergenceSteps=50,BatchSize=20,TestRepetitions=7,WeightDecay=0.001,Renormalize=L2,DropFraction=0.0,DropRepetitions=5\" \n \ 1513 - explanation: two stacked training settings separated by \"|\" \n \ 1514 . first training setting: \"LearningRate=1e-1,Momentum=0.3,ConvergenceSteps=50,BatchSize=30,TestRepetitions=7,WeightDecay=0.0,Renormalize=L2,DropConfig=0.0,DropRepetitions=5\" \n \ 1515 . second training setting : \"LearningRate=1e-4,Momentum=0.3,ConvergenceSteps=50,BatchSize=20,TestRepetitions=7,WeightDecay=0.001,Renormalize=L2,DropFractions=0.0,DropRepetitions=5\" \n \ 1516 . LearningRate : \n \ 1517 - recommended for classification: 0.1 initially, 1e-4 later \n \ 1518 - recommended for regression: 1e-4 and less \n \ 1520 preserve a fraction of the momentum for the next training batch [fraction = 0.0 - 1.0] \n \ 1521 . Repetitions : \n \ 1522 train \"Repetitions\" repetitions with the same minibatch before switching to the next one \n \ 1523 . ConvergenceSteps : \n \ 1524 Assume that convergence is reached after \"ConvergenceSteps\" cycles where no improvement \n \ 1525 of the error on the test samples has been found. (Mind that only at each \"TestRepetitions\" \n \ 1526 cycle the test samples are evaluated and thus the convergence is checked) \n \ 1528 Size of the mini-batches. \n \ 1529 . TestRepetitions \n \ 1530 Perform testing the neural net on the test samples each \"TestRepetitions\" cycle \n \ 1532 If \"Renormalize\" is set to L1 or L2, \"WeightDecay\" provides the renormalization factor \n \ 1534 NONE, L1 (|w|) or L2 (w^2) \n \ 1536 Drop a fraction of arbitrary nodes of each of the layers according to the values given \n \ 1537 in the DropConfig. \n \ 1538 [example: DropConfig=0.0+0.5+0.3 \n \ 1539 meaning: drop no nodes in layer 0 (input layer), half of the nodes in layer 1 and 30% of the nodes \n \ 1541 recommended: leave all the nodes turned on for the input layer (layer 0) \n \ 1542 turn off half of the nodes in later layers for the initial training; leave all nodes \n \ 1543 turned on (0.0) in later training stages] \n \ 1544 . DropRepetitions \n \ 1545 Each \"DropRepetitions\" cycle the configuration of which nodes are dropped is changed \n \ 1546 [recommended : 1] \n \ 1547 . Multithreading \n \ 1548 turn on multithreading [recommended: True] \n \ Types::EAnalysisType fAnalysisType
void GetHelpMessage() const
Scalar_t Loss(const Matrix_t &Y, const Matrix_t &weights, bool includeRegularization=true) const
Evaluate the loss function of the net using the activations that are currently stored in the output l...
LayoutVector_t ParseLayoutString(TString layerSpec)
static TString Itoa(Int_t value, Int_t base)
Converts an Int_t to a TString with respect to the base specified (2-36).
MsgLogger & Endl(MsgLogger &ml)
void AddPoint(Double_t x, Double_t y1, Double_t y2)
This function is used only in 2 TGraph case, and it will add new data points to graphs.
Collectable string class.
Steepest Gradient Descent algorithm (SGD)
std::vector< std::map< TString, TString > > KeyValueVector_t
void SetDropoutProbabilities(const std::vector< Double_t > &probabilities)
void MakeClassSpecific(std::ostream &, const TString &) const
OptionBase * DeclareOptionRef(T &ref, const TString &name, const TString &desc="")
Bool_t IsFloat() const
Returns kTRUE if string contains a floating point or integer number.
void ToUpper()
Change string to upper case.
void setErrorFunction(ModeErrorFunction eErrorFunction)
which error function is to be used
typename Architecture_t::Matrix_t Matrix_t
TransformationHandler & GetTransformationHandler(Bool_t takeReroutedIfAvailable=true)
Ranking for variables in method (implementation)
UInt_t GetNClasses() const
UInt_t GetNTargets() const
virtual Bool_t HasAnalysisType(Types::EAnalysisType type, UInt_t numberClasses, UInt_t numberTargets)
void setOutputSize(size_t sizeOutput)
set the output size of the DNN
void SetWeightDecay(Scalar_t weightDecay)
const TString & GetInputLabel(Int_t i) const
void ReadWeightsFromStream(std::istream &i)
void SetIpythonInteractive(IPythonInteractive *fI, bool *fE, UInt_t *M, UInt_t *C)
std::vector< Double_t > dropoutProbabilities
const Event * GetEvent() const
MethodBase(const TString &jobName, Types::EMVA methodType, const TString &methodTitle, DataSetInfo &dsi, const TString &theOption="")
standard constructor
void ClearGraphs()
This function sets the point number to 0 for all graphs.
void ReadWeightsFromXML(void *wghtnode)
void setInputSize(size_t sizeInput)
set the input size of the DNN
Generic neural network class.
void Init(std::vector< TString > &graphTitles)
This function gets some title and it creates a TGraph for every title.
DataSetInfo & DataInfo() const
Ssiz_t First(char c) const
Find first occurrence of a character c.
UInt_t GetNTargets() const
accessor to the number of targets
KeyValueVector_t ParseKeyValueString(TString parseString, TString blockDelim, TString tokenDelim)
void initializeWeights(WeightInitializationStrategy eInitStrategy, OutIterator itWeight)
initialize the weights with the given strategy
const TString & GetString() const
Float_t GetTarget(UInt_t itgt) const
const char * GetName() const
const Ranking * CreateRanking()
Bool_t BeginsWith(const char *s, ECaseCompare cmp=kExact) const
void SetRegularization(ERegularization R)
auto regularization(const typename Architecture_t::Matrix_t &A, ERegularization R) -> decltype(Architecture_t::L1Regularization(A))
Evaluate the regularization functional for a given weight matrix.
TSubString Strip(EStripType s=kTrailing, char c=' ') const
Return a substring of self stripped at beginning and/or end.
void SetTarget(UInt_t itgt, Float_t value)
set the target value (dimension itgt) to value
Settings for the training of the neural net.
TNet< Architecture_t, TSharedLayer< Architecture_t > > CreateClone(size_t batchSize)
Create a clone that uses the same weight and biases matrices but potentially a difference batch size...
UInt_t GetNVariables() const
UInt_t GetNVariables() const
accessor to the number of variables
Layer defines the layout of a layer.
Bool_t IgnoreEventsWithNegWeightsInTraining() const
XMLAttrPointer_t NewAttr(XMLNodePointer_t xmlnode, XMLNsPointer_t, const char *name, const char *value)
creates new attribute for xmlnode, namespaces are not supported for attributes
TObjArray * Tokenize(const TString &delim) const
This function is used to isolate sequential tokens in a TString.
const std::vector< TMVA::Event * > & GetEventCollection(Types::ETreeType type)
returns the event collection (i.e.
size_t GetOutputWidth() const
void Copy(void *source, void *dest)
size_t GetInputWidth() const
UInt_t GetNumValidationSamples()
Bool_t WriteOptionsReference() const
Deep Neural Network Implementation.
TString fetchValue(const std::map< TString, TString > &keyValueMap, TString key)
std::vector< Float_t > * fMulticlassReturnVal
double train(std::vector< double > &weights, std::vector< Pattern > &trainPattern, const std::vector< Pattern > &testPattern, Minimizer &minimizer, Settings &settings)
start the training
EOutputFunction
Enum that represents output functions.
static constexpr double s
you should not use this method at all Int_t Int_t Double_t Double_t Double_t e
DNN::ERegularization regularization
void AddPreDefVal(const T &)
std::tuple< const std::vector< Event * > &, const DataSetInfo & > TMVAInput_t
#define REGISTER_METHOD(CLASS)
for example
void addLayer(Layer &layer)
add a layer (layout)
Abstract ClassifierFactory template that handles arbitrary types.
std::vector< Float_t > & GetValues()
IPythonInteractive * fInteractive
virtual Double_t GetMvaValue(Double_t *err=0, Double_t *errUpper=0)
void InitializeGradients()
Initialize the gradients in the net to zero.
XMLNodePointer_t GetChild(XMLNodePointer_t xmlnode, Bool_t realnode=kTRUE)
returns first child of xmlnode
virtual void AddRank(const Rank &rank)
Add a new rank take ownership of it.
MethodDNN(const TString &jobName, const TString &methodTitle, DataSetInfo &theData, const TString &theOption)
XMLNodePointer_t NewChild(XMLNodePointer_t parent, XMLNsPointer_t ns, const char *name, const char *content=0)
create new child element for parent node
Int_t Atoi() const
Return integer value of string.
EActivationFunction
Enum that represents layer activation functions.
std::vector< Float_t > * fRegressionReturnVal
Double_t Atof() const
Return floating-point value contained in string.
void AddWeightsXMLTo(void *parent) const
size_t GetBatchSize() const
virtual const std::vector< Float_t > & GetMulticlassValues()
virtual const std::vector< Float_t > & GetRegressionValues()
Layer_t & GetLayer(size_t i)
const char * Data() const
static constexpr double g