67TString
fetchValueTmp(
const std::map<TString, TString> &keyValueMap, TString key)
70 std::map<TString, TString>::const_iterator it = keyValueMap.find(key);
71 if (it == keyValueMap.end()) {
79T fetchValueTmp(
const std::map<TString, TString> &keyValueMap, TString key,
T defaultValue);
83int fetchValueTmp(
const std::map<TString, TString> &keyValueMap, TString key,
int defaultValue)
94double fetchValueTmp(
const std::map<TString, TString> &keyValueMap, TString key,
double defaultValue)
105TString
fetchValueTmp(
const std::map<TString, TString> &keyValueMap, TString key, TString defaultValue)
116bool fetchValueTmp(
const std::map<TString, TString> &keyValueMap, TString key,
bool defaultValue)
124 if (value ==
"TRUE" || value ==
"T" || value ==
"1") {
133std::vector<double>
fetchValueTmp(
const std::map<TString, TString> &keyValueMap, TString key,
134 std::vector<double> defaultValue)
137 if (parseString ==
"") {
141 parseString.ToUpper();
142 std::vector<double> values;
144 const TString tokenDelim(
"+");
145 TObjArray *tokenStrings = parseString.Tokenize(tokenDelim);
146 TIter nextToken(tokenStrings);
148 for (; tokenString != NULL; tokenString = (
TObjString *)nextToken()) {
149 std::stringstream sstr;
152 sstr >> currentValue;
153 values.push_back(currentValue);
170 " or cross entropy (binary classification).");
182 "Specify as 0.2 or 20% to use a fifth of the data set as validation set. "
183 "Specify as 100 to use exactly 100 events. (Default: 20%)");
195 "ConvergenceSteps=50,"
199 "Regularization=None,"
206 "ConvergenceSteps=50,"
212 "DropConfig=0.0+0.5+0.5,"
214 "Multithreading=True",
215 "TrainingStrategy",
"Defines the training strategies.");
223 Log() << kINFO <<
"Will ignore negative events in training!" <<
Endl;
227 Log() << kINFO <<
"The STANDARD architecture has been deprecated. "
228 "Please use Architecture=CPU or Architecture=CPU."
229 "See the TMVA Users' Guide for instructions if you "
230 "encounter problems."
234 Log() << kERROR <<
"The OPENCL architecture has not been implemented yet. "
235 "Please use Architecture=CPU or Architecture=CPU for the "
236 "time being. See the TMVA Users' Guide for instructions "
237 "if you encounter problems."
245#ifndef R__HAS_TMVAGPU
246 Log() << kERROR <<
"CUDA backend not enabled. Please make sure "
247 "you have CUDA installed and it was successfully "
248 "detected by CMAKE by using -Dcuda=On "
252 Log() << kINFO <<
"Will use now the CPU architecture !" <<
Endl;
255 Log() << kINFO <<
"Will use now the Standard architecture !" <<
Endl;
258 Log() << kINFO <<
"Will use now the GPU architecture !" <<
Endl;
263#ifndef R__HAS_TMVACPU
264 Log() << kERROR <<
"Multi-core CPU backend not enabled. Please make sure "
265 "you have a BLAS implementation and it was successfully "
266 "detected by CMake as well that the imt CMake flag is set."
270 Log() << kINFO <<
"Will use now the GPU architecture !" <<
Endl;
273 Log() << kINFO <<
"Will use now the Standard architecture !" <<
Endl;
276 Log() << kINFO <<
"Will use now the CPU architecture !" <<
Endl;
281 Log() << kINFO <<
"Will use the deprecated STANDARD architecture !" <<
Endl;
301 Log() << kWARNING <<
"For regression only SUMOFSQUARES is a valid "
302 <<
" neural net error function. Setting error function to "
303 <<
" SUMOFSQUARES now." <<
Endl;
340 for (
auto &block : strategyKeyValues) {
378 TString strMultithreading =
fetchValueTmp(block,
"Multithreading", TString(
"True"));
380 if (strMultithreading.BeginsWith(
"T")) {
402 const TString delim(
"|");
412 TObjArray *inputDimStrings = inputLayoutString.Tokenize(delim);
413 TIter nextInputDim(inputDimStrings);
417 for (; inputDimString !=
nullptr; inputDimString = (
TObjString *)nextInputDim()) {
421 TString strDepth(inputDimString->
GetString());
422 depth = (size_t)strDepth.Atoi();
426 TString strHeight(inputDimString->
GetString());
427 height = (size_t)strHeight.Atoi();
431 TString strWidth(inputDimString->
GetString());
432 width = (size_t)strWidth.Atoi();
448 const TString delim(
"|");
453 size_t batchDepth = 0;
454 size_t batchHeight = 0;
455 size_t batchWidth = 0;
458 TObjArray *batchDimStrings = batchLayoutString.Tokenize(delim);
459 TIter nextBatchDim(batchDimStrings);
463 for (; batchDimString !=
nullptr; batchDimString = (
TObjString *)nextBatchDim()) {
467 TString strDepth(batchDimString->
GetString());
468 batchDepth = (size_t)strDepth.Atoi();
472 TString strHeight(batchDimString->
GetString());
473 batchHeight = (size_t)strHeight.Atoi();
477 TString strWidth(batchDimString->
GetString());
478 batchWidth = (size_t)strWidth.Atoi();
491template <
typename Architecture_t,
typename Layer_t>
496 const TString layerDelimiter(
",");
497 const TString subDelimiter(
"|");
504 TObjArray *layerStrings = layoutString.Tokenize(layerDelimiter);
505 TIter nextLayer(layerStrings);
509 for (; layerString !=
nullptr; layerString = (
TObjString *)nextLayer()) {
512 TIter nextToken(subStrings);
516 TString strLayerType = token->
GetString();
519 if (strLayerType ==
"DENSE") {
521 }
else if (strLayerType ==
"CONV") {
523 }
else if (strLayerType ==
"MAXPOOL") {
525 }
else if (strLayerType ==
"RESHAPE") {
527 }
else if (strLayerType ==
"RNN") {
529 }
else if (strLayerType ==
"LSTM") {
530 Log() << kFATAL <<
"LSTM Layer is not yet fully implemented" <<
Endl;
538template <
typename Architecture_t,
typename Layer_t>
549 const size_t inputSize =
GetNvar();
552 TObjArray *subStrings = layerString.Tokenize(delim);
553 TIter nextToken(subStrings);
560 for (; token !=
nullptr; token = (
TObjString *)nextToken()) {
563 if (idxToken == 1)
continue;
566 if (strActFnc ==
"RELU") {
568 }
else if (strActFnc ==
"TANH") {
570 }
else if (strActFnc ==
"SYMMRELU") {
572 }
else if (strActFnc ==
"SOFTSIGN") {
574 }
else if (strActFnc ==
"SIGMOID") {
576 }
else if (strActFnc ==
"LINEAR") {
578 }
else if (strActFnc ==
"GAUSS") {
580 }
else if (
width == 0) {
584 TString strNumNodes = strActFnc;
587 strNumNodes.ReplaceAll(
"N", strN);
588 strNumNodes.ReplaceAll(
"n", strN);
615template <
typename Architecture_t,
typename Layer_t>
625 int zeroPadHeight = 0;
626 int zeroPadWidth = 0;
630 TObjArray *subStrings = layerString.Tokenize(delim);
631 TIter nextToken(subStrings);
635 for (; token !=
nullptr; token = (
TObjString *)nextToken()) {
640 depth = strDepth.Atoi();
644 TString strFltHeight(token->
GetString());
645 fltHeight = strFltHeight.Atoi();
650 fltWidth = strFltWidth.Atoi();
654 TString strStrideRows(token->
GetString());
655 strideRows = strStrideRows.Atoi();
659 TString strStrideCols(token->
GetString());
660 strideCols = strStrideCols.Atoi();
664 TString strZeroPadHeight(token->
GetString());
665 zeroPadHeight = strZeroPadHeight.Atoi();
669 TString strZeroPadWidth(token->
GetString());
670 zeroPadWidth = strZeroPadWidth.Atoi();
675 if (strActFnc ==
"RELU") {
677 }
else if (strActFnc ==
"TANH") {
679 }
else if (strActFnc ==
"SYMMRELU") {
681 }
else if (strActFnc ==
"SOFTSIGN") {
683 }
else if (strActFnc ==
"SIGMOID") {
685 }
else if (strActFnc ==
"LINEAR") {
687 }
else if (strActFnc ==
"GAUSS") {
697 zeroPadHeight, zeroPadWidth, activationFunction);
701 if (
fBuildNet)
fNet->AddConvLayer(depth, fltHeight, fltWidth, strideRows, strideCols,
702 zeroPadHeight, zeroPadWidth, activationFunction);
714template <
typename Architecture_t,
typename Layer_t>
720 int filterHeight = 0;
726 TObjArray *subStrings = layerString.Tokenize(delim);
727 TIter nextToken(subStrings);
731 for (; token !=
nullptr; token = (
TObjString *)nextToken()) {
735 TString strFrmHeight(token->
GetString());
736 filterHeight = strFrmHeight.Atoi();
741 filterWidth = strFrmWidth.Atoi();
745 TString strStrideRows(token->
GetString());
746 strideRows = strStrideRows.Atoi();
750 TString strStrideCols(token->
GetString());
751 strideCols = strStrideCols.Atoi();
759 deepNet.
AddMaxPoolLayer(filterHeight, filterWidth, strideRows, strideCols);
762 if (
fBuildNet)
fNet->AddMaxPoolLayer(filterHeight, filterWidth, strideRows, strideCols);
775template <
typename Architecture_t,
typename Layer_t>
783 bool flattening =
false;
786 TObjArray *subStrings = layerString.Tokenize(delim);
787 TIter nextToken(subStrings);
791 for (; token !=
nullptr; token = (
TObjString *)nextToken()) {
792 if (token->
GetString() ==
"FLAT") idxToken=4;
796 depth = strDepth.Atoi();
801 height = strHeight.Atoi();
806 width = strWidth.Atoi();
811 if (flat ==
"FLAT") {
836template <
typename Architecture_t,
typename Layer_t>
845 bool rememberState =
false;
848 TObjArray *subStrings = layerString.Tokenize(delim);
849 TIter nextToken(subStrings);
853 for (; token !=
nullptr; token = (
TObjString *)nextToken()) {
857 TString strstateSize(token->
GetString());
858 stateSize = strstateSize.Atoi();
862 TString strinputSize(token->
GetString());
863 inputSize = strinputSize.Atoi();
867 TString strtimeSteps(token->
GetString());
868 timeSteps = strtimeSteps.Atoi();
872 TString strrememberState(token->
GetString());
873 rememberState = (bool) strrememberState.Atoi();
881 timeSteps, rememberState);
885 if (
fBuildNet)
fNet->AddBasicRNNLayer(stateSize, inputSize, timeSteps, rememberState);
897template <
typename Architecture_t,
typename Layer_t>
903 TObjArray *subStrings = layerString.Tokenize(delim);
904 TIter nextToken(subStrings);
908 for (; token !=
nullptr; token = (
TObjString *)nextToken()) {
918 :
MethodBase(jobName,
Types::kDL, methodTitle, theData, theOption), fInputDepth(), fInputHeight(), fInputWidth(),
919 fBatchDepth(), fBatchHeight(), fBatchWidth(), fRandomSeed(0), fWeightInitialization(), fOutputFunction(), fLossFunction(),
920 fInputLayoutString(), fBatchLayoutString(), fLayoutString(), fErrorStrategy(), fTrainingStrategyString(),
921 fWeightInitializationString(), fArchitectureString(), fResume(false), fBuildNet(true), fTrainingSettings()
929 :
MethodBase(
Types::kDL, theData, theWeightFile), fInputDepth(), fInputHeight(), fInputWidth(), fBatchDepth(),
930 fBatchHeight(), fBatchWidth(), fRandomSeed(0), fWeightInitialization(), fOutputFunction(), fLossFunction(), fInputLayoutString(),
931 fBatchLayoutString(), fLayoutString(), fErrorStrategy(), fTrainingStrategyString(), fWeightInitializationString(),
932 fArchitectureString(), fResume(false), fBuildNet(true), fTrainingSettings()
949 parseString.ReplaceAll(
" ",
"");
951 const TString keyValueDelim(
"=");
953 TObjArray *blockStrings = parseString.Tokenize(blockDelim);
954 TIter nextBlock(blockStrings);
957 for (; blockString !=
nullptr; blockString = (
TObjString *)nextBlock()) {
958 blockKeyValues.push_back(std::map<TString, TString>());
959 std::map<TString, TString> ¤tBlock = blockKeyValues.back();
962 TIter nextToken(subStrings);
965 for (; token !=
nullptr; token = (
TObjString *)nextToken()) {
967 int delimPos = strKeyValue.First(keyValueDelim.Data());
968 if (delimPos <= 0)
continue;
970 TString strKey = TString(strKeyValue(0, delimPos));
972 TString strValue = TString(strKeyValue(delimPos + 1, strKeyValue.Length()));
977 currentBlock.insert(std::make_pair(strKey, strValue));
980 return blockKeyValues;
1004 Int_t nValidationSamples = 0;
1009 if (fNumValidationString.EndsWith(
"%")) {
1013 if (intValStr.IsFloat()) {
1014 Double_t valSizeAsDouble = fNumValidationString.Atof() / 100.0;
1015 nValidationSamples = GetEventCollection(
Types::kTraining).size() * valSizeAsDouble;
1017 Log() << kFATAL <<
"Cannot parse number \"" << fNumValidationString
1018 <<
"\". Expected string like \"20%\" or \"20.0%\"." <<
Endl;
1020 }
else if (fNumValidationString.IsFloat()) {
1021 Double_t valSizeAsDouble = fNumValidationString.Atof();
1023 if (valSizeAsDouble < 1.0) {
1025 nValidationSamples = GetEventCollection(
Types::kTraining).size() * valSizeAsDouble;
1028 nValidationSamples = valSizeAsDouble;
1031 Log() << kFATAL <<
"Cannot parse number \"" << fNumValidationString <<
"\". Expected string like \"0.2\" or \"100\"."
1037 if (nValidationSamples < 0) {
1038 Log() << kFATAL <<
"Validation size \"" << fNumValidationString <<
"\" is negative." <<
Endl;
1041 if (nValidationSamples == 0) {
1042 Log() << kFATAL <<
"Validation size \"" << fNumValidationString <<
"\" is zero." <<
Endl;
1045 if (nValidationSamples >= (
Int_t)trainingSetSize) {
1046 Log() << kFATAL <<
"Validation size \"" << fNumValidationString
1047 <<
"\" is larger than or equal in size to training set (size=\"" << trainingSetSize <<
"\")." <<
Endl;
1050 return nValidationSamples;
1057template <
typename Architecture_t>
1062 using Scalar_t =
typename Architecture_t::Scalar_t;
1088 const std::vector<TMVA::Event *> eventCollectionTraining{allData.begin(), allData.begin() + nTrainingSamples};
1089 const std::vector<TMVA::Event *> eventCollectionValidation{allData.begin() + nTrainingSamples, allData.end()};
1091 size_t trainingPhase = 1;
1095 size_t nThreads = 1;
1099 size_t batchSize = settings.batchSize;
1119 if (batchDepth != batchSize && batchDepth > 1) {
1120 Error(
"Train",
"Given batch depth of %zu (specified in BatchLayout) should be equal to given batch size %zu",batchDepth,batchSize);
1123 if (batchDepth == 1 && batchSize > 1 && batchSize != batchHeight ) {
1124 Error(
"Train",
"Given batch height of %zu (specified in BatchLayout) should be equal to given batch size %zu",batchHeight,batchSize);
1130 bool badLayout =
false;
1132 if (batchDepth == batchSize)
1133 badLayout = ( inputDepth * inputHeight * inputWidth != batchHeight * batchWidth ) ;
1135 if (batchHeight == batchSize && batchDepth == 1)
1136 badLayout |= ( inputDepth * inputHeight * inputWidth != batchWidth);
1138 Error(
"Train",
"Given input layout %zu x %zu x %zu is not compatible with batch layout %zu x %zu x %zu ",
1139 inputDepth,inputHeight,inputWidth,batchDepth,batchHeight,batchWidth);
1144 if (nTrainingSamples < settings.batchSize || nValidationSamples < settings.batchSize) {
1145 Log() << kFATAL <<
"Number of samples in the datasets are train: ("
1146 << nTrainingSamples <<
") test: (" << nValidationSamples
1147 <<
"). One of these is smaller than the batch size of "
1148 << settings.batchSize <<
". Please increase the batch"
1149 <<
" size to be at least the same size as the smallest"
1150 <<
" of them." <<
Endl;
1154 DeepNet_t deepNet(batchSize, inputDepth, inputHeight, inputWidth, batchDepth, batchHeight, batchWidth, J,
I,
R,
weightDecay);
1158 if (trainingPhase == 1) {
1159 fNet = std::unique_ptr<DeepNetImpl_t>(
new DeepNetImpl_t(1, inputDepth, inputHeight, inputWidth, batchDepth,
1167 std::vector<DeepNet_t> nets{};
1168 nets.reserve(nThreads);
1169 for (
size_t i = 0; i < nThreads; i++) {
1171 nets.push_back(deepNet);
1177 if (trainingPhase > 1) {
1179 for (
size_t i = 0; i < deepNet.GetDepth(); ++i) {
1180 const auto & nLayer =
fNet->GetLayerAt(i);
1181 const auto & dLayer = deepNet.GetLayerAt(i);
1185 Architecture_t::CopyDiffArch(dLayer->GetWeights(), nLayer->GetWeights() );
1186 Architecture_t::CopyDiffArch(dLayer->GetBiases(), nLayer->GetBiases() );
1192 int n1 = batchHeight;
1193 int n2 = batchWidth;
1201 Log() <<
"***** Deep Learning Network *****" <<
Endl;
1202 if (
Log().GetMinType() <= kINFO)
1205 Log() <<
"Using " << nTrainingSamples <<
" events for training and " << nValidationSamples <<
" for testing" <<
Endl;
1209 TensorDataLoader_t trainingData(trainingTuple, nTrainingSamples, deepNet.GetBatchSize(),
1210 deepNet.GetBatchDepth(), deepNet.GetBatchHeight(), deepNet.GetBatchWidth(),
1211 deepNet.GetOutputWidth(), nThreads);
1214 TensorDataLoader_t validationData(validationTuple, nValidationSamples, deepNet.GetBatchSize(),
1215 deepNet.GetBatchDepth(), deepNet.GetBatchHeight(), deepNet.GetBatchWidth(),
1216 deepNet.GetOutputWidth(), nThreads);
1225 for (
auto batch : validationData) {
1226 auto inputTensor = batch.GetInput();
1227 auto outputMatrix = batch.GetOutput();
1228 auto weights = batch.GetWeights();
1230 minValError += deepNet.Loss(inputTensor, outputMatrix, weights,
false,
false);
1233 Double_t regzTerm = (includeRegularization) ? deepNet.RegularizationTerm() : 0.0;
1234 minValError /= (
Double_t)(nValidationSamples / settings.batchSize);
1235 minValError += regzTerm;
1239 std::unique_ptr<DNN::VOptimizer<Architecture_t, Layer_t, DeepNet_t>>
optimizer;
1244 case EOptimizer::kSGD:
1245 optimizer = std::unique_ptr<DNN::TSGD<Architecture_t, Layer_t, DeepNet_t>>(
1249 case EOptimizer::kAdam:
1250 optimizer = std::unique_ptr<DNN::TAdam<Architecture_t, Layer_t, DeepNet_t>>(
1254 case EOptimizer::kAdagrad:
1255 optimizer = std::unique_ptr<DNN::TAdagrad<Architecture_t, Layer_t, DeepNet_t>>(
1259 case EOptimizer::kRMSProp:
1260 optimizer = std::unique_ptr<DNN::TRMSProp<Architecture_t, Layer_t, DeepNet_t>>(
1264 case EOptimizer::kAdadelta:
1265 optimizer = std::unique_ptr<DNN::TAdadelta<Architecture_t, Layer_t, DeepNet_t>>(
1272 std::vector<TTensorBatch<Architecture_t>> batches{};
1274 bool converged =
false;
1275 size_t convergenceCount = 0;
1276 size_t batchesInEpoch = nTrainingSamples / deepNet.GetBatchSize();
1279 std::chrono::time_point<std::chrono::system_clock> tstart, tend;
1280 tstart = std::chrono::system_clock::now();
1283 <<
"Learning rate = " << settings.learningRate
1284 <<
" regularization " << (char) settings.regularization
1285 <<
" minimum error = " << minValError
1288 std::string separator(62,
'-');
1290 Log() << std::setw(10) <<
"Epoch"
1291 <<
" | " << std::setw(12) <<
"Train Err." << std::setw(12) <<
"Val. Err."
1292 << std::setw(12) <<
"t(s)/epoch" << std::setw(12) <<
"t(s)/Loss"
1293 << std::setw(12) <<
"nEvents/s"
1294 << std::setw(12) <<
"Conv. Steps" <<
Endl;
1300 size_t shuffleSeed = 0;
1306 Log() <<
"Initial Deep Net Weights " <<
Endl;
1307 auto & weights_tensor = deepNet.GetLayerAt(0)->GetWeights();
1308 for (
size_t l = 0;
l < weights_tensor.size(); ++
l)
1309 weights_tensor[
l].
Print();
1310 auto & bias_tensor = deepNet.GetLayerAt(0)->GetBiases();
1311 bias_tensor[0].Print();
1315 while (!converged) {
1317 trainingData.Shuffle(rng);
1322 for (
size_t i = 0; i < batchesInEpoch; ++i ) {
1330 auto my_batch = trainingData.GetTensorBatch();
1334 deepNet.Forward(my_batch.GetInput(),
true);
1335 deepNet.Backward(my_batch.GetInput(), my_batch.GetOutput(), my_batch.GetWeights());
1340 if ((
optimizer->GetGlobalStep() % settings.testInterval) == 0) {
1342 std::chrono::time_point<std::chrono::system_clock>
t1,t2;
1344 t1 = std::chrono::system_clock::now();
1348 for (
auto batch : validationData) {
1349 auto inputTensor = batch.GetInput();
1350 auto outputMatrix = batch.GetOutput();
1351 auto weights = batch.GetWeights();
1353 valError += deepNet.Loss(inputTensor, outputMatrix, weights,
false,
false);
1356 Double_t regTerm = (includeRegularization) ? deepNet.RegularizationTerm() : 0.0;
1357 valError /= (
Double_t)(nValidationSamples / settings.batchSize);
1358 valError += regTerm;
1360 t2 = std::chrono::system_clock::now();
1363 if (valError < minValError) {
1364 convergenceCount = 0;
1366 convergenceCount += settings.testInterval;
1370 if (valError < minValError ) {
1373 <<
" Minimum Test error found - save the configuration " <<
Endl;
1374 for (
size_t i = 0; i < deepNet.GetDepth(); ++i) {
1375 const auto & nLayer =
fNet->GetLayerAt(i);
1376 const auto & dLayer = deepNet.GetLayerAt(i);
1383 minValError = valError;
1385 else if ( minValError <= 0. )
1386 minValError = valError;
1391 for (
auto batch : trainingData) {
1392 auto inputTensor = batch.GetInput();
1393 auto outputMatrix = batch.GetOutput();
1394 auto weights = batch.GetWeights();
1395 trainingError += deepNet.Loss(inputTensor, outputMatrix, weights,
false,
false);
1398 trainingError /= (
Double_t)(nTrainingSamples / settings.batchSize);
1399 trainingError += regTerm;
1402 tend = std::chrono::system_clock::now();
1405 std::chrono::duration<double> elapsed_seconds = tend - tstart;
1406 std::chrono::duration<double> elapsed1 =
t1-tstart;
1409 std::chrono::duration<double> elapsed_testing = tend-
t1;
1411 double seconds = elapsed_seconds.count();
1414 double eventTime = elapsed1.count()/( batchesInEpoch * settings.testInterval * settings.batchSize);
1417 convergenceCount > settings.convergenceSteps ||
optimizer->GetGlobalStep() >= settings.maxEpochs;
1420 Log() << std::setw(10) <<
optimizer->GetGlobalStep() <<
" | "
1421 << std::setw(12) << trainingError
1422 << std::setw(12) << valError
1423 << std::setw(12) << seconds / settings.testInterval
1424 << std::setw(12) << elapsed_testing.count()
1425 << std::setw(12) << 1. / eventTime
1426 << std::setw(12) << convergenceCount
1432 tstart = std::chrono::system_clock::now();
1436 if (converged && debug) {
1437 Log() <<
"Final Deep Net Weights for phase " << trainingPhase <<
" epoch " <<
optimizer->GetGlobalStep()
1439 auto & weights_tensor = deepNet.GetLayerAt(0)->GetWeights();
1440 auto & bias_tensor = deepNet.GetLayerAt(0)->GetBiases();
1441 for (
size_t l = 0;
l < weights_tensor.size(); ++
l)
1442 weights_tensor[
l].
Print();
1443 bias_tensor[0].Print();
1456 Log() << kFATAL <<
"Not implemented yet" <<
Endl;
1462#ifdef R__HAS_TMVAGPU
1463 Log() << kINFO <<
"Start of deep neural network training on GPU." <<
Endl <<
Endl;
1464 TrainDeepNet<DNN::TCuda<ScalarImpl_t> >();
1466 Log() << kFATAL <<
"CUDA backend not enabled. Please make sure "
1467 "you have CUDA installed and it was successfully "
1468 "detected by CMAKE."
1473 Log() << kFATAL <<
"OPENCL backend not yet supported." <<
Endl;
1476#ifdef R__HAS_TMVACPU
1477 Log() << kINFO <<
"Start of deep neural network training on CPU." <<
Endl <<
Endl;
1478 TrainDeepNet<DNN::TCpu<ScalarImpl_t> >();
1480 Log() << kFATAL <<
"Multi-core CPU backend not enabled. Please make sure "
1481 "you have a BLAS implementation and it was successfully "
1482 "detected by CMake as well that the imt CMake flag is set."
1487 Log() << kINFO <<
"Start of deep neural network training on the STANDARD architecture" <<
Endl <<
Endl;
1488 TrainDeepNet<DNN::TReference<ScalarImpl_t> >();
1492 " is not a supported archiectire for TMVA::MethodDL"
1515 if (!
fNet ||
fNet->GetDepth() == 0) {
1516 Log() << kFATAL <<
"The network has not been trained and fNet is not built"
1532 int n1 =
fXInput[0].GetNrows();
1533 int n2 =
fXInput[0].GetNcols();
1538 if (n1*n2 != nVariables) {
1539 Log() << kFATAL <<
"Input Event variable dimensions are not compatible with the built network architecture"
1540 <<
" n-event variables " << nVariables <<
" expected input matrix " << n1 <<
" x " << n2
1544 for (
int j = 0; j < n1; ++j) {
1545 for (
int k = 0; k < n2; k++) {
1546 fXInput[0](j, k) = inputValues[j*n2+k];
1554 double mvaValue = (*fYHat)(0, 0);
1557#ifdef DEBUG_MVAVALUE
1558 using Tensor_t = std::vector<MatrixImpl_t>;
1559 TMatrixF xInput(n1,n2, inputValues.data() );
1562 std::cout <<
"Output of DeepNet " << mvaValue << std::endl;
1563 auto & deepnet = *
fNet;
1564 std::cout <<
"Loop on layers " << std::endl;
1565 for (
int l = 0;
l < deepnet.GetDepth(); ++
l) {
1566 std::cout <<
"Layer " <<
l;
1567 const auto * layer = deepnet.GetLayerAt(
l);
1568 const Tensor_t & layer_output = layer->GetOutput();
1570 std::cout <<
"DNN output " << layer_output.size() << std::endl;
1571 for (
size_t i = 0; i < layer_output.size(); ++i) {
1572#ifdef R__HAS_TMVAGPU
1576 TMatrixD m(layer_output[i].GetNrows(), layer_output[i].GetNcols() , layer_output[i].GetRawDataPointer() );
1580 const Tensor_t & layer_weights = layer->GetWeights();
1581 std::cout <<
"DNN weights " << layer_weights.size() << std::endl;
1582 if (layer_weights.size() > 0) {
1584#ifdef R__HAS_TMVAGPU
1588 TMatrixD m(layer_weights[i].GetNrows(), layer_weights[i].GetNcols() , layer_weights[i].GetRawDataPointer() );
1600template <
typename Architecture_t>
1605 if (!
fNet ||
fNet->GetDepth() == 0) {
1606 Log() << kFATAL <<
"The network has not been trained and fNet is not built"
1624 using Matrix_t =
typename Architecture_t::Matrix_t;
1628 DeepNet_t deepNet(batchSize, inputDepth, inputHeight, inputWidth, batchDepth, batchHeight, batchWidth, J,
I,
R,
weightDecay);
1629 std::vector<DeepNet_t> nets{};
1634 for (
size_t i = 0; i < deepNet.GetDepth(); ++i) {
1635 const auto & nLayer =
fNet->GetLayerAt(i);
1636 const auto & dLayer = deepNet.GetLayerAt(i);
1637 Architecture_t::CopyDiffArch(dLayer->GetWeights(), nLayer->GetWeights() );
1638 Architecture_t::CopyDiffArch(dLayer->GetBiases(), nLayer->GetBiases() );
1641 size_t n1 = deepNet.GetBatchHeight();
1642 size_t n2 = deepNet.GetBatchWidth();
1643 size_t n0 = deepNet.GetBatchSize();
1646 n1 = deepNet.GetBatchSize();
1650 Long64_t nEvents = lastEvt - firstEvt;
1652 TensorDataLoader_t testData(testTuple, nEvents, batchSize, n0, n1, n2, deepNet.GetOutputWidth(), 1);
1660 Matrix_t yHat(deepNet.GetBatchSize(), deepNet.GetOutputWidth() );
1669 <<
" sample (" << nEvents <<
" events)" <<
Endl;
1673 std::vector<double> mvaValues(nEvents);
1676 for (
Long64_t ievt = firstEvt; ievt < lastEvt; ievt+=batchSize) {
1678 Long64_t ievt_end = ievt + batchSize;
1680 if (ievt_end <= lastEvt) {
1682 if (ievt == firstEvt) {
1686 if (n1 == batchSize && n0 == 1) {
1687 if (n2 != nVariables) {
1688 Log() << kFATAL <<
"Input Event variable dimensions are not compatible with the built network architecture"
1689 <<
" n-event variables " << nVariables <<
" expected input matrix " << n1 <<
" x " << n2
1693 if (n1*n2 != nVariables || n0 != batchSize) {
1694 Log() << kFATAL <<
"Input Event variable dimensions are not compatible with the built network architecture"
1695 <<
" n-event variables " << nVariables <<
" expected input tensor " << n0 <<
" x " << n1 <<
" x " << n2
1701 auto batch = testData.GetTensorBatch();
1702 auto inputTensor = batch.GetInput();
1704 auto xInput = batch.GetInput();
1707 for (
size_t i = 0; i < batchSize; ++i) {
1708 double value = yHat(i,0);
1709 mvaValues[ievt + i] = (
TMath::IsNaN(value)) ? -999. : value;
1714 for (
Long64_t i = ievt; i < lastEvt; ++i) {
1723 <<
"Elapsed time for evaluation of " << nEvents <<
" events: "
1732 size_t nVariables = GetEvent()->GetNVariables();
1734 std::vector<MatrixImpl_t> X_vec;
1735 const Event *ev = GetEvent();
1736 const std::vector<Float_t>& inputValues = ev->
GetValues();
1737 for (
size_t i = 0; i < nVariables; i++) {
1738 X(0,i) = inputValues[i];
1740 X_vec.emplace_back(X);
1741 size_t nTargets = std::max(1u, ev->
GetNTargets());
1743 std::vector<Float_t>
output(nTargets);
1744 fNet->Prediction(YHat, X_vec, fOutputFunction);
1746 for (
size_t i = 0; i < nTargets; i++)
1749 if (fRegressionReturnVal == NULL) {
1750 fRegressionReturnVal =
new std::vector<Float_t>();
1752 fRegressionReturnVal->clear();
1755 for (
size_t i = 0; i < nTargets; ++i) {
1759 const Event* evT2 = GetTransformationHandler().InverseTransform(evT);
1760 for (
size_t i = 0; i < nTargets; ++i) {
1761 fRegressionReturnVal->push_back(evT2->
GetTarget(i));
1764 return *fRegressionReturnVal;
1769 size_t nVariables = GetEvent()->GetNVariables();
1771 std::vector<MatrixImpl_t> X_vec;
1773 if (fMulticlassReturnVal == NULL) {
1774 fMulticlassReturnVal =
new std::vector<Float_t>(DataInfo().GetNClasses());
1777 const std::vector<Float_t>& inputValues = GetEvent()->GetValues();
1778 for (
size_t i = 0; i < nVariables; i++) {
1779 X(0,i) = inputValues[i];
1781 X_vec.emplace_back(X);
1782 fNet->Prediction(YHat, X_vec, fOutputFunction);
1783 for (
size_t i = 0; i < (size_t) YHat.GetNcols(); i++) {
1784 (*fMulticlassReturnVal)[i] = YHat(0, i);
1786 return *fMulticlassReturnVal;
1798 if (firstEvt > lastEvt || lastEvt > nEvents) lastEvt = nEvents;
1799 if (firstEvt < 0) firstEvt = 0;
1800 nEvents = lastEvt-firstEvt;
1804 if (
size_t(nEvents) < batchSize ) batchSize = nEvents;
1808#ifdef R__HAS_TMVAGPU
1809 Log() << kINFO <<
"Evaluate deep neural network on GPU using batches with size = " << batchSize <<
Endl <<
Endl;
1810 return PredictDeepNet<DNN::TCuda<ScalarImpl_t> >(firstEvt, lastEvt, batchSize, logProgress);
1813#ifdef R__HAS_TMVACPU
1814 Log() << kINFO <<
"Evaluate deep neural network on CPU using batches with size = " << batchSize <<
Endl <<
Endl;
1815 return PredictDeepNet<DNN::TCpu<ScalarImpl_t> >(firstEvt, lastEvt, batchSize, logProgress);
1818 Log() << kINFO <<
"Evaluate deep neural network on the STANDARD architecture using batches with size = " << batchSize
1820 return PredictDeepNet<DNN::TReference<ScalarImpl_t> >(firstEvt, lastEvt, batchSize, logProgress);
1827 void* nn = xmlEngine.
NewChild(parent, 0,
"Weights");
1835 Int_t inputDepth =
fNet->GetInputDepth();
1836 Int_t inputHeight =
fNet->GetInputHeight();
1837 Int_t inputWidth =
fNet->GetInputWidth();
1841 Int_t batchDepth =
fNet->GetBatchDepth();
1842 Int_t batchHeight =
fNet->GetBatchHeight();
1843 Int_t batchWidth =
fNet->GetBatchWidth();
1845 char lossFunction =
static_cast<char>(
fNet->GetLossFunction());
1846 char initialization =
static_cast<char>(
fNet->GetInitialization());
1867 xmlEngine.NewAttr(nn, 0,
"LossFunction", TString(lossFunction));
1868 xmlEngine.NewAttr(nn, 0,
"Initialization", TString(initialization));
1869 xmlEngine.NewAttr(nn, 0,
"Regularization", TString(
regularization));
1870 xmlEngine.NewAttr(nn, 0,
"OutputFunction", TString(outputFunction));
1875 for (
Int_t i = 0; i < depth; i++)
1895 size_t inputDepth, inputHeight, inputWidth;
1900 size_t batchSize, batchDepth, batchHeight, batchWidth;
1908 char lossFunctionChar;
1910 char initializationChar;
1912 char regularizationChar;
1914 char outputFunctionChar;
1931 fNet = std::unique_ptr<DeepNetImpl_t>(
new DeepNetImpl_t(batchSize, inputDepth, inputHeight, inputWidth, batchDepth,
1932 batchHeight, batchWidth,
1945 for (
size_t i = 0; i < netDepth; i++) {
1950 if (layerName ==
"DenseLayer") {
1966 else if (layerName ==
"ConvLayer") {
1971 size_t fltHeight, fltWidth = 0;
1972 size_t strideRows, strideCols = 0;
1973 size_t padHeight, padWidth = 0;
1987 fNet->AddConvLayer(depth, fltHeight, fltWidth, strideRows, strideCols,
1988 padHeight, padWidth, actFunction);
1993 else if (layerName ==
"MaxPoolLayer") {
1996 size_t filterHeight, filterWidth = 0;
1997 size_t strideRows, strideCols = 0;
2003 fNet->AddMaxPoolLayer(filterHeight, filterWidth, strideRows, strideCols);
2005 else if (layerName ==
"ReshapeLayer") {
2008 size_t depth, height,
width = 0;
2015 fNet->AddReshapeLayer(depth, height,
width, flattening);
2018 else if (layerName ==
"RNNLayer") {
2021 size_t stateSize,inputSize, timeSteps = 0;
2022 int rememberState= 0;
2028 fNet->AddBasicRNNLayer(stateSize, inputSize, timeSteps, rememberState);
2034 fNet->GetLayers().back()->ReadWeightsFromXML(layerXML);
2042 int n1 = batchHeight;
2043 int n2 = batchWidth;
#define REGISTER_METHOD(CLASS)
for example
#define R(a, b, c, d, e, f, g, h, i)
include TDocParser_001 C image html pict1_TDocParser_001 png width
char * Form(const char *fmt,...)
OptionBase * DeclareOptionRef(T &ref, const TString &name, const TString &desc="")
void AddPreDefVal(const T &)
Adadelta Optimizer class.
Generic Deep Neural Network class.
TDenseLayer< Architecture_t > * AddDenseLayer(size_t width, EActivationFunction f, Scalar_t dropoutProbability=1.0)
Function for adding Dense Connected Layer in the Deep Neural Network, with a given width,...
TMaxPoolLayer< Architecture_t > * AddMaxPoolLayer(size_t frameHeight, size_t frameWidth, size_t strideRows, size_t strideCols, Scalar_t dropoutProbability=1.0)
Function for adding Pooling layer in the Deep Neural Network, with a given filter height and width,...
TConvLayer< Architecture_t > * AddConvLayer(size_t depth, size_t filterHeight, size_t filterWidth, size_t strideRows, size_t strideCols, size_t paddingHeight, size_t paddingWidth, EActivationFunction f, Scalar_t dropoutProbability=1.0)
Function for adding Convolution layer in the Deep Neural Network, with a given depth,...
TBasicRNNLayer< Architecture_t > * AddBasicRNNLayer(size_t stateSize, size_t inputSize, size_t timeSteps, bool rememberState=false)
Function for adding Recurrent Layer in the Deep Neural Network, with given parameters.
TReshapeLayer< Architecture_t > * AddReshapeLayer(size_t depth, size_t height, size_t width, bool flattening)
Function for adding Reshape Layer in the Deep Neural Network, with a given height and width.
static void CopyDiffArch(TMatrixT< Scalar_t > &A, const AMatrix_t &B)
Stochastic Batch Gradient Descent Optimizer class.
Generic General Layer class.
void Initialize()
Initialize the weights and biases according to the given initialization method.
Class that contains all the data information.
Types::ETreeType GetCurrentType() const
Long64_t GetNEvents(Types::ETreeType type=Types::kMaxTreeType) const
void SetCurrentEvent(Long64_t ievt) const
void SetTarget(UInt_t itgt, Float_t value)
set the target value (dimension itgt) to value
UInt_t GetNVariables() const
accessor to the number of variables
UInt_t GetNTargets() const
accessor to the number of targets
std::vector< Float_t > & GetValues()
Float_t GetTarget(UInt_t itgt) const
Virtual base Class for all MVA method.
const char * GetName() const
Bool_t IgnoreEventsWithNegWeightsInTraining() const
const std::vector< TMVA::Event * > & GetEventCollection(Types::ETreeType type)
returns the event collection (i.e.
const TString & GetMethodName() const
const Event * GetEvent() const
DataSetInfo & DataInfo() const
Types::EAnalysisType fAnalysisType
IPythonInteractive * fInteractive
void SetInputWidth(size_t inputWidth)
void GetHelpMessage() const
DNN::ELossFunction fLossFunction
The loss function.
virtual const std::vector< Float_t > & GetMulticlassValues()
TString fLayoutString
The string defining the layout of the deep net.
std::unique_ptr< MatrixImpl_t > fYHat
void Train()
Methods for training the deep learning network.
size_t GetBatchHeight() const
virtual std::vector< Double_t > GetMvaValues(Long64_t firstEvt, Long64_t lastEvt, Bool_t logProgress)
Evaluate the DeepNet on a vector of input values stored in the TMVA Event class.
void ParseRnnLayer(DNN::TDeepNet< Architecture_t, Layer_t > &deepNet, std::vector< DNN::TDeepNet< Architecture_t, Layer_t > > &nets, TString layerString, TString delim)
Pases the layer string and creates the appropriate rnn layer.
TString fWeightInitializationString
The string defining the weight initialization method.
void ParseMaxPoolLayer(DNN::TDeepNet< Architecture_t, Layer_t > &deepNet, std::vector< DNN::TDeepNet< Architecture_t, Layer_t > > &nets, TString layerString, TString delim)
Pases the layer string and creates the appropriate max pool layer.
size_t fRandomSeed
The random seed used to initialize the weights and shuffling batches (default is zero)
TString fArchitectureString
The string defining the architecure: CPU or GPU.
void Init()
default initializations
MethodDL(const TString &jobName, const TString &methodTitle, DataSetInfo &theData, const TString &theOption)
Constructor.
void TrainDeepNet()
train of deep neural network using the defined architecture
const std::vector< TTrainingSettings > & GetTrainingSettings() const
DNN::EOutputFunction GetOutputFunction() const
void ParseLstmLayer(DNN::TDeepNet< Architecture_t, Layer_t > &deepNet, std::vector< DNN::TDeepNet< Architecture_t, Layer_t > > &nets, TString layerString, TString delim)
Pases the layer string and creates the appropriate lstm layer.
void SetInputDepth(size_t inputDepth)
Setters.
void ParseDenseLayer(DNN::TDeepNet< Architecture_t, Layer_t > &deepNet, std::vector< DNN::TDeepNet< Architecture_t, Layer_t > > &nets, TString layerString, TString delim)
Pases the layer string and creates the appropriate dense layer.
UInt_t GetNumValidationSamples()
parce the validation string and return the number of event data used for validation
TString GetBatchLayoutString() const
size_t GetInputDepth() const
virtual const std::vector< Float_t > & GetRegressionValues()
std::unique_ptr< DeepNetImpl_t > fNet
TString GetInputLayoutString() const
void SetBatchHeight(size_t batchHeight)
std::vector< MatrixImpl_t > fXInput
size_t GetInputHeight() const
TString GetArchitectureString() const
void ParseBatchLayout()
Parse the input layout.
void ReadWeightsFromStream(std::istream &)
void ReadWeightsFromXML(void *wghtnode)
TString fNumValidationString
The string defining the number (or percentage) of training data used for validation.
std::vector< std::map< TString, TString > > KeyValueVector_t
DNN::EOutputFunction fOutputFunction
The output function for making the predictions.
DNN::EInitialization fWeightInitialization
The initialization method.
size_t GetBatchDepth() const
std::vector< TTrainingSettings > fTrainingSettings
The vector defining each training strategy.
size_t GetInputWidth() const
DNN::ELossFunction GetLossFunction() const
TString fBatchLayoutString
The string defining the layout of the batch.
Bool_t HasAnalysisType(Types::EAnalysisType type, UInt_t numberClasses, UInt_t numberTargets)
Check the type of analysis the deep learning network can do.
void ParseConvLayer(DNN::TDeepNet< Architecture_t, Layer_t > &deepNet, std::vector< DNN::TDeepNet< Architecture_t, Layer_t > > &nets, TString layerString, TString delim)
Pases the layer string and creates the appropriate convolutional layer.
void ParseReshapeLayer(DNN::TDeepNet< Architecture_t, Layer_t > &deepNet, std::vector< DNN::TDeepNet< Architecture_t, Layer_t > > &nets, TString layerString, TString delim)
Pases the layer string and creates the appropriate reshape layer.
TString fTrainingStrategyString
The string defining the training strategy.
const Ranking * CreateRanking()
void SetInputHeight(size_t inputHeight)
void SetBatchDepth(size_t batchDepth)
KeyValueVector_t ParseKeyValueString(TString parseString, TString blockDelim, TString tokenDelim)
Function for parsing the training settings, provided as a string in a key-value form.
void SetBatchWidth(size_t batchWidth)
std::vector< Double_t > PredictDeepNet(Long64_t firstEvt, Long64_t lastEvt, size_t batchSize, Bool_t logProgress)
perform prediction of the deep neural network using batches (called by GetMvaValues)
DNN::EInitialization GetWeightInitialization() const
TString GetLayoutString() const
TMVA::DNN::TDeepNet< ArchitectureImpl_t > DeepNetImpl_t
size_t GetBatchWidth() const
void AddWeightsXMLTo(void *parent) const
typename ArchitectureImpl_t::Matrix_t MatrixImpl_t
Double_t GetMvaValue(Double_t *err=0, Double_t *errUpper=0)
virtual ~MethodDL()
Virtual Destructor.
void ParseInputLayout()
Parse the input layout.
bool fBuildNet
Flag to control whether to build fNet, the stored network used for the evaluation.
void CreateDeepNet(DNN::TDeepNet< Architecture_t, Layer_t > &deepNet, std::vector< DNN::TDeepNet< Architecture_t, Layer_t > > &nets)
After calling the ProcesOptions(), all of the options are parsed, so using the parsed options,...
TString fErrorStrategy
The string defining the error strategy for training.
void DeclareOptions()
The option handling methods.
TString fInputLayoutString
The string defining the layout of the input.
EMsgType GetMinType() const
Ranking for variables in method (implementation)
Timing information for training and evaluation of MVA methods.
TString GetElapsedTime(Bool_t Scientific=kTRUE)
returns pretty string with elapsed time
Singleton class for Global types used by TMVA.
void Print(Option_t *name="") const
Print the matrix as a table of elements.
virtual void Print(Option_t *option="") const
Print TNamed name and title.
Collectable string class.
const TString & GetString() const
virtual void Error(const char *method, const char *msgfmt,...) const
Issue error message.
virtual void Print(Option_t *option="") const
This method must be overridden when a class wants to print itself.
XMLNodePointer_t GetChild(XMLNodePointer_t xmlnode, Bool_t realnode=kTRUE)
returns first child of xmlnode
XMLNodePointer_t NewChild(XMLNodePointer_t parent, XMLNsPointer_t ns, const char *name, const char *content=0)
create new child element for parent node
const char * GetNodeName(XMLNodePointer_t xmlnode)
returns name of xmlnode
EOptimizer
Enum representing the optimizer used for training.
EOutputFunction
Enum that represents output functions.
double weightDecay(double error, ItWeight itWeight, ItWeight itWeightEnd, double factorWeightDecay, EnumRegularization eRegularization)
compute the weight decay for regularization (L1 or L2)
auto regularization(const typename Architecture_t::Matrix_t &A, ERegularization R) -> decltype(Architecture_t::L1Regularization(A))
Evaluate the regularization functional for a given weight matrix.
ERegularization
Enum representing the regularization type applied for a given layer.
EActivationFunction
Enum that represents layer activation functions.
ELossFunction
Enum that represents objective functions for the net, i.e.
std::tuple< const std::vector< Event * > &, const DataSetInfo & > TMVAInput_t
Abstract ClassifierFactory template that handles arbitrary types.
TString fetchValueTmp(const std::map< TString, TString > &keyValueMap, TString key)
MsgLogger & Endl(MsgLogger &ml)
All of the options that can be specified in the training string.
DNN::EOptimizer optimizer
DNN::ERegularization regularization
std::vector< Double_t > dropoutProbabilities