78 std::map<TString, TString>::const_iterator it = keyValueMap.find(key);
79 if (it == keyValueMap.end()) {
132 if (value ==
"TRUE" || value ==
"T" || value ==
"1") {
142 std::vector<double> defaultValue)
145 if (parseString ==
"") {
150 std::vector<double> values;
154 TIter nextToken(tokenStrings);
156 for (; tokenString != NULL; tokenString = (
TObjString *)nextToken()) {
157 std::stringstream sstr;
160 sstr >> currentValue;
161 values.push_back(currentValue);
178 " or cross entropy (binary classification).");
194 "Specify as 0.2 or 20% to use a fifth of the data set as validation set. "
195 "Specify as 100 to use exactly 100 events. (Default: 20%)");
207 "ConvergenceSteps=100,"
213 "Regularization=None,"
215 "TrainingStrategy",
"Defines the training strategies.");
223 Log() << kINFO <<
"Will ignore negative events in training!" <<
Endl;
227 Log() << kWARNING <<
"The STANDARD architecture is not supported anymore. "
228 "Please use Architecture=CPU or Architecture=CPU."
229 "See the TMVA Users' Guide for instructions if you "
230 "encounter problems."
232 Log() << kINFO <<
"We will use instead the CPU architecture" <<
Endl;
236 Log() << kERROR <<
"The OPENCL architecture has not been implemented yet. "
237 "Please use Architecture=CPU or Architecture=CPU for the "
238 "time being. See the TMVA Users' Guide for instructions "
239 "if you encounter problems."
242 Log() << kINFO <<
"We will try using the GPU-CUDA architecture if available" <<
Endl;
251 Log() << kINFO <<
"Will now use the GPU architecture !" <<
Endl;
253 Log() << kERROR <<
"CUDA backend not enabled. Please make sure "
254 "you have CUDA installed and it was successfully "
255 "detected by CMAKE by using -Dtmva-gpu=On "
258 Log() << kINFO <<
"Will now use instead the CPU architecture !" <<
Endl;
264 Log() << kINFO <<
"Will now use the CPU architecture with BLAS and IMT support !" <<
Endl;
266 Log() << kINFO <<
"Multi-core CPU backend not enabled. For better performances, make sure "
267 "you have a BLAS implementation and it was successfully "
268 "detected by CMake as well that the imt CMake flag is set."
270 Log() << kINFO <<
"Will use anyway the CPU architecture but with slower performance" <<
Endl;
290 Log() << kWARNING <<
"For regression only SUMOFSQUARES is a valid "
291 <<
" neural net error function. Setting error function to "
292 <<
" SUMOFSQUARES now." <<
Endl;
331 for (
auto &block : strategyKeyValues) {
354 if (optimizer ==
"SGD") {
356 }
else if (optimizer ==
"ADAM") {
358 }
else if (optimizer ==
"ADAGRAD") {
360 }
else if (optimizer ==
"RMSPROP") {
362 }
else if (optimizer ==
"ADADELTA") {
371 std::vector<TString> optimParamLabels = {
"_beta1",
"_beta2",
"_eps",
"_rho"};
373 std::map<TString, double> defaultValues = {
374 {
"ADADELTA_eps", 1.E-8}, {
"ADADELTA_rho", 0.95},
375 {
"ADAGRAD_eps", 1.E-8},
376 {
"ADAM_beta1", 0.9}, {
"ADAM_beta2", 0.999}, {
"ADAM_eps", 1.E-7},
377 {
"RMSPROP_eps", 1.E-7}, {
"RMSPROP_rho", 0.9},
379 for (
auto &pN : optimParamLabels) {
382 if (defaultValues.count(optimParamName) > 0) {
383 double defValue = defaultValues[optimParamName];
449 TIter nextInputDim(inputDimStrings);
455 std::vector<size_t> inputShape;
456 inputShape.reserve(inputLayoutString.
Length()/2 + 2);
457 inputShape.push_back(0);
458 for (; inputDimString !=
nullptr; inputDimString = (
TObjString *)nextInputDim()) {
463 inputShape.push_back(subDim);
468 if (inputShape.size() == 2) {
470 inputShape.insert(inputShape.begin() + 1, {1,1});
472 else if (inputShape.size() == 3) {
474 inputShape.insert(inputShape.begin() + 2, 1);
490 size_t batchDepth = 0;
491 size_t batchHeight = 0;
492 size_t batchWidth = 0;
496 TIter nextBatchDim(batchDimStrings);
500 for (; batchDimString !=
nullptr; batchDimString = (
TObjString *)nextBatchDim()) {
505 batchDepth = (size_t)strDepth.
Atoi();
510 batchHeight = (size_t)strHeight.
Atoi();
515 batchWidth = (size_t)strWidth.
Atoi();
528template <
typename Architecture_t,
typename Layer_t>
533 const TString layerDelimiter(
",");
534 const TString subDelimiter(
"|");
542 TIter nextLayer(layerStrings);
546 for (; layerString !=
nullptr; layerString = (
TObjString *)nextLayer()) {
550 TIter nextToken(subStrings);
557 if (strLayerType ==
"DENSE") {
559 }
else if (strLayerType ==
"CONV") {
561 }
else if (strLayerType ==
"MAXPOOL") {
563 }
else if (strLayerType ==
"RESHAPE") {
565 }
else if (strLayerType ==
"BNORM") {
567 }
else if (strLayerType ==
"RNN") {
569 }
else if (strLayerType ==
"LSTM") {
571 }
else if (strLayerType ==
"GRU") {
582template <
typename Architecture_t,
typename Layer_t>
593 const size_t inputSize =
GetNvar();
597 TIter nextToken(subStrings);
604 for (; token !=
nullptr; token = (
TObjString *)nextToken()) {
609 if (strActFnc ==
"DENSE")
continue;
611 if (strActFnc ==
"RELU") {
613 }
else if (strActFnc ==
"TANH") {
615 }
else if (strActFnc ==
"FTANH") {
617 }
else if (strActFnc ==
"SYMMRELU") {
619 }
else if (strActFnc ==
"SOFTSIGN") {
621 }
else if (strActFnc ==
"SIGMOID") {
623 }
else if (strActFnc ==
"LINEAR") {
625 }
else if (strActFnc ==
"GAUSS") {
627 }
else if (
width == 0) {
631 TString strNumNodes = strActFnc;
642 size_t outputSize = 1;
670template <
typename Architecture_t,
typename Layer_t>
680 int zeroPadHeight = 0;
681 int zeroPadWidth = 0;
686 TIter nextToken(subStrings);
690 for (; token !=
nullptr; token = (
TObjString *)nextToken()) {
695 depth = strDepth.
Atoi();
700 fltHeight = strFltHeight.
Atoi();
705 fltWidth = strFltWidth.
Atoi();
710 strideRows = strStrideRows.
Atoi();
715 strideCols = strStrideCols.
Atoi();
720 zeroPadHeight = strZeroPadHeight.
Atoi();
725 zeroPadWidth = strZeroPadWidth.
Atoi();
730 if (strActFnc ==
"RELU") {
732 }
else if (strActFnc ==
"TANH") {
734 }
else if (strActFnc ==
"SYMMRELU") {
736 }
else if (strActFnc ==
"SOFTSIGN") {
738 }
else if (strActFnc ==
"SIGMOID") {
740 }
else if (strActFnc ==
"LINEAR") {
742 }
else if (strActFnc ==
"GAUSS") {
752 zeroPadHeight, zeroPadWidth, activationFunction);
756 if (
fBuildNet)
fNet->AddConvLayer(depth, fltHeight, fltWidth, strideRows, strideCols,
757 zeroPadHeight, zeroPadWidth, activationFunction);
769template <
typename Architecture_t,
typename Layer_t>
775 int filterHeight = 0;
782 TIter nextToken(subStrings);
786 for (; token !=
nullptr; token = (
TObjString *)nextToken()) {
791 filterHeight = strFrmHeight.
Atoi();
796 filterWidth = strFrmWidth.
Atoi();
801 strideRows = strStrideRows.
Atoi();
806 strideCols = strStrideCols.
Atoi();
814 deepNet.
AddMaxPoolLayer(filterHeight, filterWidth, strideRows, strideCols);
817 if (
fBuildNet)
fNet->AddMaxPoolLayer(filterHeight, filterWidth, strideRows, strideCols);
830template <
typename Architecture_t,
typename Layer_t>
838 bool flattening =
false;
842 TIter nextToken(subStrings);
846 for (; token !=
nullptr; token = (
TObjString *)nextToken()) {
847 if (token->
GetString() ==
"FLAT") idxToken=4;
851 depth = strDepth.
Atoi();
856 height = strHeight.
Atoi();
866 if (flat ==
"FLAT") {
891template <
typename Architecture_t,
typename Layer_t>
898 double momentum = -1;
903 TIter nextToken(subStrings);
907 for (; token !=
nullptr; token = (
TObjString *)nextToken()) {
932template <
typename Architecture_t,
typename Layer_t>
941 bool rememberState =
false;
942 bool returnSequence =
false;
943 bool resetGateAfter =
false;
947 TIter nextToken(subStrings);
951 for (; token !=
nullptr; token = (
TObjString *)nextToken()) {
956 stateSize = strstateSize.
Atoi();
962 inputSize = strinputSize.
Atoi();
968 timeSteps = strtimeSteps.
Atoi();
974 rememberState = (
bool) strrememberState.
Atoi();
994 auto * recurrentLayer = deepNet.
AddBasicRNNLayer(stateSize, inputSize, timeSteps, rememberState, returnSequence);
995 recurrentLayer->Initialize();
997 if (
fBuildNet)
fNet->AddBasicRNNLayer(stateSize, inputSize, timeSteps, rememberState, returnSequence);
1000 auto *recurrentLayer = deepNet.
AddBasicLSTMLayer(stateSize, inputSize, timeSteps, rememberState, returnSequence);
1001 recurrentLayer->Initialize();
1004 fNet->AddBasicLSTMLayer(stateSize, inputSize, timeSteps, rememberState, returnSequence);
1007 if (Architecture_t::IsCudnn()) resetGateAfter =
true;
1008 auto *recurrentLayer = deepNet.
AddBasicGRULayer(stateSize, inputSize, timeSteps, rememberState, returnSequence, resetGateAfter);
1009 recurrentLayer->Initialize();
1012 fNet->AddBasicGRULayer(stateSize, inputSize, timeSteps, rememberState, returnSequence, resetGateAfter);
1015 Log() << kFATAL <<
"Invalid Recurrent layer type " <<
Endl;
1022 :
MethodBase(jobName,
Types::kDL, methodTitle, theData, theOption), fInputShape(4,0),
1023 fBatchHeight(), fBatchWidth(), fRandomSeed(0), fWeightInitialization(),
1024 fOutputFunction(), fLossFunction(), fInputLayoutString(), fBatchLayoutString(),
1025 fLayoutString(), fErrorStrategy(), fTrainingStrategyString(), fWeightInitializationString(),
1026 fArchitectureString(), fResume(false), fBuildNet(true), fTrainingSettings(),
1035 :
MethodBase(
Types::kDL, theData, theWeightFile), fInputShape(4,0), fBatchHeight(),
1036 fBatchWidth(), fRandomSeed(0), fWeightInitialization(), fOutputFunction(),
1037 fLossFunction(), fInputLayoutString(), fBatchLayoutString(), fLayoutString(),
1038 fErrorStrategy(), fTrainingStrategyString(), fWeightInitializationString(),
1039 fArchitectureString(), fResume(false), fBuildNet(true), fTrainingSettings(),
1057 parseString.ReplaceAll(
" ",
"");
1059 const TString keyValueDelim(
"=");
1061 TObjArray *blockStrings = parseString.Tokenize(blockDelim);
1062 TIter nextBlock(blockStrings);
1065 for (; blockString !=
nullptr; blockString = (
TObjString *)nextBlock()) {
1066 blockKeyValues.push_back(std::map<TString, TString>());
1067 std::map<TString, TString> ¤tBlock = blockKeyValues.back();
1070 TIter nextToken(subStrings);
1073 for (; token !=
nullptr; token = (
TObjString *)nextToken()) {
1075 int delimPos = strKeyValue.
First(keyValueDelim.
Data());
1076 if (delimPos <= 0)
continue;
1085 currentBlock.insert(std::make_pair(strKey, strValue));
1088 return blockKeyValues;
1112 Int_t nValidationSamples = 0;
1117 if (fNumValidationString.EndsWith(
"%")) {
1122 Double_t valSizeAsDouble = fNumValidationString.Atof() / 100.0;
1123 nValidationSamples = GetEventCollection(
Types::kTraining).size() * valSizeAsDouble;
1125 Log() << kFATAL <<
"Cannot parse number \"" << fNumValidationString
1126 <<
"\". Expected string like \"20%\" or \"20.0%\"." <<
Endl;
1128 }
else if (fNumValidationString.IsFloat()) {
1129 Double_t valSizeAsDouble = fNumValidationString.Atof();
1131 if (valSizeAsDouble < 1.0) {
1133 nValidationSamples = GetEventCollection(
Types::kTraining).size() * valSizeAsDouble;
1136 nValidationSamples = valSizeAsDouble;
1139 Log() <<
kFATAL <<
"Cannot parse number \"" << fNumValidationString <<
"\". Expected string like \"0.2\" or \"100\"."
1145 if (nValidationSamples < 0) {
1146 Log() <<
kFATAL <<
"Validation size \"" << fNumValidationString <<
"\" is negative." <<
Endl;
1149 if (nValidationSamples == 0) {
1150 Log() <<
kFATAL <<
"Validation size \"" << fNumValidationString <<
"\" is zero." <<
Endl;
1153 if (nValidationSamples >= (
Int_t)trainingSetSize) {
1154 Log() <<
kFATAL <<
"Validation size \"" << fNumValidationString
1155 <<
"\" is larger than or equal in size to training set (size=\"" << trainingSetSize <<
"\")." <<
Endl;
1158 return nValidationSamples;
1165template <
typename Architecture_t>
1169 using Scalar_t =
typename Architecture_t::Scalar_t;
1187 const std::vector<TMVA::Event *> eventCollectionTraining{allData.begin(), allData.begin() + nTrainingSamples};
1188 const std::vector<TMVA::Event *> eventCollectionValidation{allData.begin() + nTrainingSamples, allData.end()};
1190 size_t trainingPhase = 1;
1194 size_t nThreads = 1;
1198 size_t batchSize = settings.batchSize;
1219 if (batchDepth != batchSize && batchDepth > 1) {
1220 Error(
"Train",
"Given batch depth of %zu (specified in BatchLayout) should be equal to given batch size %zu",batchDepth,batchSize);
1223 if (batchDepth == 1 && batchSize > 1 && batchSize != batchHeight ) {
1224 Error(
"Train",
"Given batch height of %zu (specified in BatchLayout) should be equal to given batch size %zu",batchHeight,batchSize);
1230 bool badLayout =
false;
1232 if (batchDepth == batchSize)
1233 badLayout = ( inputDepth * inputHeight * inputWidth != batchHeight * batchWidth ) ;
1235 if (batchHeight == batchSize && batchDepth == 1)
1236 badLayout |= ( inputDepth * inputHeight * inputWidth != batchWidth);
1238 Error(
"Train",
"Given input layout %zu x %zu x %zu is not compatible with batch layout %zu x %zu x %zu ",
1239 inputDepth,inputHeight,inputWidth,batchDepth,batchHeight,batchWidth);
1244 if (nTrainingSamples < settings.batchSize || nValidationSamples < settings.batchSize) {
1245 Log() << kFATAL <<
"Number of samples in the datasets are train: ("
1246 << nTrainingSamples <<
") test: (" << nValidationSamples
1247 <<
"). One of these is smaller than the batch size of "
1248 << settings.batchSize <<
". Please increase the batch"
1249 <<
" size to be at least the same size as the smallest"
1250 <<
" of them." <<
Endl;
1253 DeepNet_t deepNet(batchSize, inputDepth, inputHeight, inputWidth, batchDepth, batchHeight, batchWidth, J,
I,
R,
weightDecay);
1257 if (trainingPhase == 1) {
1258 fNet = std::unique_ptr<DeepNetImpl_t>(
new DeepNetImpl_t(1, inputDepth, inputHeight, inputWidth, batchDepth,
1266 std::vector<DeepNet_t> nets{};
1267 nets.reserve(nThreads);
1268 for (
size_t i = 0; i < nThreads; i++) {
1270 nets.push_back(deepNet);
1280 std::vector<Double_t> dropoutVector(settings.dropoutProbabilities);
1281 for (
auto & p : dropoutVector) {
1284 deepNet.SetDropoutProbabilities(dropoutVector);
1286 if (trainingPhase > 1) {
1288 for (
size_t i = 0; i < deepNet.GetDepth(); ++i) {
1289 deepNet.GetLayerAt(i)->CopyParameters(*
fNet->GetLayerAt(i));
1310 Log() <<
"***** Deep Learning Network *****" <<
Endl;
1311 if (
Log().GetMinType() <= kINFO)
1314 Log() <<
"Using " << nTrainingSamples <<
" events for training and " << nValidationSamples <<
" for testing" <<
Endl;
1318 TensorDataLoader_t trainingData(trainingTuple, nTrainingSamples, batchSize,
1319 {inputDepth, inputHeight, inputWidth},
1320 {deepNet.GetBatchDepth(), deepNet.GetBatchHeight(), deepNet.GetBatchWidth()} ,
1321 deepNet.GetOutputWidth(), nThreads);
1324 TensorDataLoader_t validationData(validationTuple, nValidationSamples, batchSize,
1325 {inputDepth, inputHeight, inputWidth},
1326 { deepNet.GetBatchDepth(),deepNet.GetBatchHeight(), deepNet.GetBatchWidth()} ,
1327 deepNet.GetOutputWidth(), nThreads);
1336 Log() <<
"Compute initial loss on the validation data " <<
Endl;
1337 for (
auto batch : validationData) {
1338 auto inputTensor = batch.GetInput();
1339 auto outputMatrix = batch.GetOutput();
1340 auto weights = batch.GetWeights();
1344 minValError += deepNet.Loss(inputTensor, outputMatrix, weights,
false, includeRegularization);
1347 Double_t regzTerm = (includeRegularization) ? deepNet.RegularizationTerm() : 0.0;
1348 minValError /= (
Double_t)(nValidationSamples / settings.batchSize);
1349 minValError += regzTerm;
1353 std::unique_ptr<DNN::VOptimizer<Architecture_t, Layer_t, DeepNet_t>> optimizer;
1358 case EOptimizer::kSGD:
1359 optimizer = std::unique_ptr<DNN::TSGD<Architecture_t, Layer_t, DeepNet_t>>(
1363 case EOptimizer::kAdam: {
1364 optimizer = std::unique_ptr<DNN::TAdam<Architecture_t, Layer_t, DeepNet_t>>(
1366 deepNet, settings.learningRate, settings.optimizerParams[
"ADAM_beta1"],
1367 settings.optimizerParams[
"ADAM_beta2"], settings.optimizerParams[
"ADAM_eps"]));
1371 case EOptimizer::kAdagrad:
1372 optimizer = std::unique_ptr<DNN::TAdagrad<Architecture_t, Layer_t, DeepNet_t>>(
1374 settings.optimizerParams[
"ADAGRAD_eps"]));
1377 case EOptimizer::kRMSProp:
1378 optimizer = std::unique_ptr<DNN::TRMSProp<Architecture_t, Layer_t, DeepNet_t>>(
1380 settings.optimizerParams[
"RMSPROP_rho"],
1381 settings.optimizerParams[
"RMSPROP_eps"]));
1384 case EOptimizer::kAdadelta:
1385 optimizer = std::unique_ptr<DNN::TAdadelta<Architecture_t, Layer_t, DeepNet_t>>(
1387 settings.optimizerParams[
"ADADELTA_rho"],
1388 settings.optimizerParams[
"ADADELTA_eps"]));
1394 std::vector<TTensorBatch<Architecture_t>> batches{};
1396 bool converged =
false;
1397 size_t convergenceCount = 0;
1398 size_t batchesInEpoch = nTrainingSamples / deepNet.GetBatchSize();
1401 std::chrono::time_point<std::chrono::system_clock> tstart, tend;
1402 tstart = std::chrono::system_clock::now();
1405 auto optimParametersString = [&]() {
1407 for (
auto & element : settings.optimizerParams) {
1409 key.
ReplaceAll(settings.optimizerName +
"_",
"");
1410 double value = element.second;
1411 if (!optimParameters.
IsNull())
1412 optimParameters +=
",";
1414 optimParameters +=
" (";
1417 if (!optimParameters.
IsNull())
1418 optimParameters +=
")";
1419 return optimParameters;
1423 <<
" Optimizer " << settings.optimizerName
1424 << optimParametersString()
1425 <<
" Learning rate = " << settings.learningRate <<
" regularization " << (char)settings.regularization
1426 <<
" minimum error = " << minValError <<
Endl;
1428 std::string separator(62,
'-');
1430 Log() << std::setw(10) <<
"Epoch"
1431 <<
" | " << std::setw(12) <<
"Train Err." << std::setw(12) <<
"Val. Err." << std::setw(12)
1432 <<
"t(s)/epoch" << std::setw(12) <<
"t(s)/Loss" << std::setw(12) <<
"nEvents/s" << std::setw(12)
1433 <<
"Conv. Steps" <<
Endl;
1439 size_t shuffleSeed = 0;
1445 Log() <<
"Initial Deep Net Weights " <<
Endl;
1446 auto & weights_tensor = deepNet.GetLayerAt(0)->GetWeights();
1447 for (
size_t l = 0;
l < weights_tensor.size(); ++
l)
1448 weights_tensor[
l].
Print();
1449 auto & bias_tensor = deepNet.GetLayerAt(0)->GetBiases();
1450 bias_tensor[0].
Print();
1453 Log() <<
" Start epoch iteration ..." <<
Endl;
1454 bool debugFirstEpoch =
false;
1455 bool computeLossInTraining =
true;
1456 size_t nTrainEpochs = 0;
1457 while (!converged) {
1459 trainingData.Shuffle(rng);
1465 for (
size_t i = 0; i < batchesInEpoch; ++i ) {
1472 if (debugFirstEpoch) std::cout <<
"\n\n----- batch # " << i <<
"\n\n";
1474 auto my_batch = trainingData.GetTensorBatch();
1476 if (debugFirstEpoch)
1477 std::cout <<
"got batch data - doing forward \n";
1481 Architecture_t::PrintTensor(my_batch.GetInput(),
"input tensor",
true);
1482 typename Architecture_t::Tensor_t tOut(my_batch.GetOutput());
1483 typename Architecture_t::Tensor_t tW(my_batch.GetWeights());
1484 Architecture_t::PrintTensor(tOut,
"label tensor",
true) ;
1485 Architecture_t::PrintTensor(tW,
"weight tensor",
true) ;
1488 deepNet.Forward(my_batch.GetInput(),
true);
1490 if (computeLossInTraining) {
1491 auto outputMatrix = my_batch.GetOutput();
1492 auto weights = my_batch.GetWeights();
1493 trainingError += deepNet.Loss(outputMatrix, weights,
false);
1496 if (debugFirstEpoch)
1497 std::cout <<
"- doing backward \n";
1500 size_t nlayers = deepNet.GetLayers().size();
1501 for (
size_t l = 0;
l < nlayers; ++
l) {
1502 if (deepNet.GetLayerAt(
l)->GetWeights().size() > 0)
1503 Architecture_t::PrintTensor(deepNet.GetLayerAt(
l)->GetWeightsAt(0),
1506 Architecture_t::PrintTensor(deepNet.GetLayerAt(
l)->GetOutput(),
1513 deepNet.Backward(my_batch.GetInput(), my_batch.GetOutput(), my_batch.GetWeights());
1515 if (debugFirstEpoch)
1516 std::cout <<
"- doing optimizer update \n";
1519 optimizer->IncrementGlobalStep();
1523 std::cout <<
"minmimizer step - momentum " << settings.momentum <<
" learning rate " << optimizer->GetLearningRate() << std::endl;
1524 for (
size_t l = 0;
l < nlayers; ++
l) {
1525 if (deepNet.GetLayerAt(
l)->GetWeights().size() > 0) {
1526 Architecture_t::PrintTensor(deepNet.GetLayerAt(
l)->GetWeightsAt(0),
TString::Format(
"weights after step layer %d",
l).
Data());
1527 Architecture_t::PrintTensor(deepNet.GetLayerAt(
l)->GetWeightGradientsAt(0),
"weight gradients");
1534 if (debugFirstEpoch) std::cout <<
"\n End batch loop - compute validation loss \n";
1536 debugFirstEpoch =
false;
1537 if ((nTrainEpochs % settings.testInterval) == 0) {
1539 std::chrono::time_point<std::chrono::system_clock>
t1,t2;
1541 t1 = std::chrono::system_clock::now();
1547 bool inTraining =
false;
1548 for (
auto batch : validationData) {
1549 auto inputTensor = batch.GetInput();
1550 auto outputMatrix = batch.GetOutput();
1551 auto weights = batch.GetWeights();
1553 valError += deepNet.Loss(inputTensor, outputMatrix, weights, inTraining, includeRegularization);
1556 Double_t regTerm = (includeRegularization) ? deepNet.RegularizationTerm() : 0.0;
1557 valError /= (
Double_t)(nValidationSamples / settings.batchSize);
1558 valError += regTerm;
1563 t2 = std::chrono::system_clock::now();
1566 if (valError < minValError) {
1567 convergenceCount = 0;
1569 convergenceCount += settings.testInterval;
1573 if (valError < minValError ) {
1575 Log() << std::setw(10) << nTrainEpochs
1576 <<
" Minimum Test error found - save the configuration " <<
Endl;
1577 for (
size_t i = 0; i < deepNet.GetDepth(); ++i) {
1578 fNet->GetLayerAt(i)->CopyParameters(*deepNet.GetLayerAt(i));
1587 minValError = valError;
1589 else if ( minValError <= 0. )
1590 minValError = valError;
1592 if (!computeLossInTraining) {
1593 trainingError = 0.0;
1595 for (
auto batch : trainingData) {
1596 auto inputTensor = batch.GetInput();
1597 auto outputMatrix = batch.GetOutput();
1598 auto weights = batch.GetWeights();
1599 trainingError += deepNet.Loss(inputTensor, outputMatrix, weights,
false,
false);
1603 trainingError /= (
Double_t)(nTrainingSamples / settings.batchSize);
1604 trainingError += regTerm;
1610 tend = std::chrono::system_clock::now();
1613 std::chrono::duration<double> elapsed_seconds = tend - tstart;
1614 std::chrono::duration<double> elapsed1 =
t1-tstart;
1617 std::chrono::duration<double> elapsed_testing = tend-
t1;
1619 double seconds = elapsed_seconds.count();
1622 double eventTime = elapsed1.count()/( batchesInEpoch * settings.testInterval * settings.batchSize);
1625 convergenceCount > settings.convergenceSteps || nTrainEpochs >= settings.maxEpochs;
1628 Log() << std::setw(10) << nTrainEpochs <<
" | "
1629 << std::setw(12) << trainingError
1630 << std::setw(12) << valError
1631 << std::setw(12) << seconds / settings.testInterval
1632 << std::setw(12) << elapsed_testing.count()
1633 << std::setw(12) << 1. / eventTime
1634 << std::setw(12) << convergenceCount
1640 tstart = std::chrono::system_clock::now();
1644 if (converged && debug) {
1645 Log() <<
"Final Deep Net Weights for phase " << trainingPhase <<
" epoch " << nTrainEpochs
1647 auto & weights_tensor = deepNet.GetLayerAt(0)->GetWeights();
1648 auto & bias_tensor = deepNet.GetLayerAt(0)->GetBiases();
1649 for (
size_t l = 0;
l < weights_tensor.size(); ++
l)
1650 weights_tensor[
l].
Print();
1651 bias_tensor[0].
Print();
1664 Log() << kFATAL <<
"Not implemented yet" <<
Endl;
1670#ifdef R__HAS_TMVAGPU
1671 Log() << kINFO <<
"Start of deep neural network training on GPU." <<
Endl <<
Endl;
1673 TrainDeepNet<DNN::TCudnn<ScalarImpl_t> >();
1675 TrainDeepNet<DNN::TCuda<ScalarImpl_t>>();
1678 Log() << kFATAL <<
"CUDA backend not enabled. Please make sure "
1679 "you have CUDA installed and it was successfully "
1680 "detected by CMAKE."
1685#ifdef R__HAS_TMVACPU
1688 Log() << kINFO <<
"Start of deep neural network training on CPU using MT, nthreads = "
1691 Log() << kINFO <<
"Start of deep neural network training on single thread CPU (without ROOT-MT support) " <<
Endl
1694 TrainDeepNet<DNN::TCpu<ScalarImpl_t> >();
1699 " is not a supported architecture for TMVA::MethodDL"
1715 if (!fNet || fNet->GetDepth() == 0) {
1716 Log() << kFATAL <<
"The network has not been trained and fNet is not built" <<
Endl;
1718 if (fNet->GetBatchSize() != 1) {
1719 Log() << kFATAL <<
"FillINputTensor::Network batch size must be equal to 1 when doing single event predicition" <<
Endl;
1723 const std::vector<Float_t> &inputValues = GetEvent()->GetValues();
1724 size_t nVariables = GetEvent()->GetNVariables();
1727 if (fXInput.GetLayout() == TMVA::Experimental::MemoryLayout::ColumnMajor) {
1728 R__ASSERT(fXInput.GetShape().size() < 4);
1730 if (fXInput.GetShape().size() == 2) {
1731 nc = fXInput.GetShape()[0];
1733 ArchitectureImpl_t::PrintTensor(fXInput);
1734 Log() << kFATAL <<
"First tensor dimension should be equal to batch size, i.e. = 1" <<
Endl;
1736 nhw = fXInput.GetShape()[1];
1738 nc = fXInput.GetCSize();
1739 nhw = fXInput.GetWSize();
1741 if (nVariables != nc * nhw) {
1742 Log() << kFATAL <<
"Input Event variable dimensions are not compatible with the built network architecture"
1743 <<
" n-event variables " << nVariables <<
" expected input tensor " << nc <<
" x " << nhw <<
Endl;
1745 for (
size_t j = 0; j < nc; j++) {
1746 for (
size_t k = 0; k < nhw; k++) {
1748 fXInputBuffer[k * nc + j] = inputValues[j * nhw + k];
1753 assert(fXInput.GetShape().size() >= 4);
1754 size_t nc = fXInput.GetCSize();
1755 size_t nh = fXInput.GetHSize();
1756 size_t nw = fXInput.GetWSize();
1757 size_t n = nc * nh * nw;
1758 if (nVariables !=
n) {
1759 Log() << kFATAL <<
"Input Event variable dimensions are not compatible with the built network architecture"
1760 <<
" n-event variables " << nVariables <<
" expected input tensor " << nc <<
" x " << nh <<
" x " << nw
1763 for (
size_t j = 0; j <
n; j++) {
1765 fXInputBuffer[j] = inputValues[j];
1769 fXInput.GetDeviceBuffer().CopyFrom(fXInputBuffer);
1783 double mvaValue = (*fYHat)(0, 0);
1786#ifdef DEBUG_MVAVALUE
1787 using Tensor_t = std::vector<MatrixImpl_t>;
1788 TMatrixF xInput(n1,n2, inputValues.data() );
1791 std::cout <<
"Output of DeepNet " << mvaValue << std::endl;
1792 auto & deepnet = *
fNet;
1793 std::cout <<
"Loop on layers " << std::endl;
1794 for (
int l = 0;
l < deepnet.GetDepth(); ++
l) {
1795 std::cout <<
"Layer " <<
l;
1796 const auto * layer = deepnet.GetLayerAt(
l);
1797 const Tensor_t & layer_output = layer->GetOutput();
1799 std::cout <<
"DNN output " << layer_output.size() << std::endl;
1800 for (
size_t i = 0; i < layer_output.size(); ++i) {
1801#ifdef R__HAS_TMVAGPU
1805 TMatrixD m(layer_output[i].GetNrows(), layer_output[i].GetNcols() , layer_output[i].GetRawDataPointer() );
1809 const Tensor_t & layer_weights = layer->GetWeights();
1810 std::cout <<
"DNN weights " << layer_weights.size() << std::endl;
1811 if (layer_weights.size() > 0) {
1813#ifdef R__HAS_TMVAGPU
1817 TMatrixD m(layer_weights[i].GetNrows(), layer_weights[i].GetNcols() , layer_weights[i].GetRawDataPointer() );
1829template <
typename Architecture_t>
1834 if (!
fNet ||
fNet->GetDepth() == 0) {
1835 Log() << kFATAL <<
"The network has not been trained and fNet is not built"
1853 using Matrix_t =
typename Architecture_t::Matrix_t;
1857 DeepNet_t deepNet(batchSize, inputDepth, inputHeight, inputWidth, batchDepth, batchHeight, batchWidth, J,
I,
R,
weightDecay);
1858 std::vector<DeepNet_t> nets{};
1863 for (
size_t i = 0; i < deepNet.GetDepth(); ++i) {
1864 deepNet.GetLayerAt(i)->CopyParameters(*
fNet->GetLayerAt(i));
1871 size_t n1 = deepNet.GetBatchHeight();
1872 size_t n2 = deepNet.GetBatchWidth();
1873 size_t n0 = deepNet.GetBatchSize();
1876 n1 = deepNet.GetBatchSize();
1880 Long64_t nEvents = lastEvt - firstEvt;
1882 TensorDataLoader_t testData(testTuple, nEvents, batchSize, {inputDepth, inputHeight, inputWidth}, {n0, n1, n2}, deepNet.GetOutputWidth(), 1);
1890 Matrix_t yHat(deepNet.GetBatchSize(), deepNet.GetOutputWidth() );
1899 <<
" sample (" << nEvents <<
" events)" <<
Endl;
1903 std::vector<double> mvaValues(nEvents);
1906 for (
Long64_t ievt = firstEvt; ievt < lastEvt; ievt+=batchSize) {
1908 Long64_t ievt_end = ievt + batchSize;
1910 if (ievt_end <= lastEvt) {
1912 if (ievt == firstEvt) {
1916 if (n1 == batchSize && n0 == 1) {
1917 if (n2 != nVariables) {
1918 Log() << kFATAL <<
"Input Event variable dimensions are not compatible with the built network architecture"
1919 <<
" n-event variables " << nVariables <<
" expected input matrix " << n1 <<
" x " << n2
1923 if (n1*n2 != nVariables || n0 != batchSize) {
1924 Log() << kFATAL <<
"Input Event variable dimensions are not compatible with the built network architecture"
1925 <<
" n-event variables " << nVariables <<
" expected input tensor " << n0 <<
" x " << n1 <<
" x " << n2
1931 auto batch = testData.GetTensorBatch();
1932 auto inputTensor = batch.GetInput();
1934 auto xInput = batch.GetInput();
1937 for (
size_t i = 0; i < batchSize; ++i) {
1938 double value = yHat(i,0);
1939 mvaValues[ievt + i] = (
TMath::IsNaN(value)) ? -999. : value;
1944 for (
Long64_t i = ievt; i < lastEvt; ++i) {
1953 <<
"Elapsed time for evaluation of " << nEvents <<
" events: "
1969 fNet->Prediction(*fYHat, fXInput, fOutputFunction);
1971 size_t nTargets = DataInfo().GetNTargets();
1972 R__ASSERT(nTargets == fYHat->GetNcols());
1974 std::vector<Float_t>
output(nTargets);
1975 for (
size_t i = 0; i < nTargets; i++)
1976 output[i] = (*fYHat)(0, i);
1979 if (fRegressionReturnVal == NULL)
1980 fRegressionReturnVal =
new std::vector<Float_t>(nTargets);
1981 R__ASSERT(fRegressionReturnVal->size() == nTargets);
1985 for (
size_t i = 0; i < nTargets; ++i) {
1988 const Event *evT2 = GetTransformationHandler().InverseTransform(evT);
1989 for (
size_t i = 0; i < nTargets; ++i) {
1990 (*fRegressionReturnVal)[i] = evT2->GetTarget(i);
1993 return *fRegressionReturnVal;
2003 fNet->Prediction(*fYHat, fXInput, fOutputFunction);
2005 size_t nClasses = DataInfo().GetNClasses();
2006 R__ASSERT(nClasses == fYHat->GetNcols());
2008 if (fMulticlassReturnVal == NULL) {
2009 fMulticlassReturnVal =
new std::vector<Float_t>(nClasses);
2011 R__ASSERT(fMulticlassReturnVal->size() == nClasses);
2013 for (
size_t i = 0; i < nClasses; i++) {
2014 (*fMulticlassReturnVal)[i] = (*fYHat)(0, i);
2016 return *fMulticlassReturnVal;
2028 if (firstEvt > lastEvt || lastEvt > nEvents) lastEvt = nEvents;
2029 if (firstEvt < 0) firstEvt = 0;
2030 nEvents = lastEvt-firstEvt;
2033 size_t defaultEvalBatchSize = (
fXInput.GetSize() > 1000) ? 100 : 1000;
2035 if (
size_t(nEvents) < batchSize ) batchSize = nEvents;
2039#ifdef R__HAS_TMVAGPU
2040 Log() << kINFO <<
"Evaluate deep neural network on GPU using batches with size = " << batchSize <<
Endl <<
Endl;
2042 return PredictDeepNet<DNN::TCudnn<ScalarImpl_t>>(firstEvt, lastEvt, batchSize, logProgress);
2044 return PredictDeepNet<DNN::TCuda<ScalarImpl_t>>(firstEvt, lastEvt, batchSize, logProgress);
2049 Log() << kINFO <<
"Evaluate deep neural network on CPU using batches with size = " << batchSize <<
Endl <<
Endl;
2050 return PredictDeepNet<DNN::TCpu<ScalarImpl_t> >(firstEvt, lastEvt, batchSize, logProgress);
2057 void* nn = xmlEngine.
NewChild(parent, 0,
"Weights");
2065 Int_t inputDepth =
fNet->GetInputDepth();
2066 Int_t inputHeight =
fNet->GetInputHeight();
2067 Int_t inputWidth =
fNet->GetInputWidth();
2071 Int_t batchDepth =
fNet->GetBatchDepth();
2072 Int_t batchHeight =
fNet->GetBatchHeight();
2073 Int_t batchWidth =
fNet->GetBatchWidth();
2075 char lossFunction =
static_cast<char>(
fNet->GetLossFunction());
2076 char initialization =
static_cast<char>(
fNet->GetInitialization());
2097 xmlEngine.NewAttr(nn, 0,
"LossFunction",
TString(lossFunction));
2098 xmlEngine.NewAttr(nn, 0,
"Initialization",
TString(initialization));
2100 xmlEngine.NewAttr(nn, 0,
"OutputFunction",
TString(outputFunction));
2105 for (
Int_t i = 0; i < depth; i++)
2125 size_t inputDepth, inputHeight, inputWidth;
2130 size_t batchSize, batchDepth, batchHeight, batchWidth;
2138 char lossFunctionChar;
2140 char initializationChar;
2142 char regularizationChar;
2144 char outputFunctionChar;
2161 fNet = std::unique_ptr<DeepNetImpl_t>(
new DeepNetImpl_t(batchSize, inputDepth, inputHeight, inputWidth, batchDepth,
2162 batchHeight, batchWidth,
2175 for (
size_t i = 0; i < netDepth; i++) {
2180 if (layerName ==
"DenseLayer") {
2196 else if (layerName ==
"ConvLayer") {
2201 size_t fltHeight, fltWidth = 0;
2202 size_t strideRows, strideCols = 0;
2203 size_t padHeight, padWidth = 0;
2217 fNet->AddConvLayer(depth, fltHeight, fltWidth, strideRows, strideCols,
2218 padHeight, padWidth, actFunction);
2223 else if (layerName ==
"MaxPoolLayer") {
2226 size_t filterHeight, filterWidth = 0;
2227 size_t strideRows, strideCols = 0;
2233 fNet->AddMaxPoolLayer(filterHeight, filterWidth, strideRows, strideCols);
2236 else if (layerName ==
"ReshapeLayer") {
2239 size_t depth, height,
width = 0;
2246 fNet->AddReshapeLayer(depth, height,
width, flattening);
2250 else if (layerName ==
"RNNLayer") {
2253 size_t stateSize,inputSize, timeSteps = 0;
2254 int rememberState= 0;
2255 int returnSequence = 0;
2262 fNet->AddBasicRNNLayer(stateSize, inputSize, timeSteps, rememberState, returnSequence);
2266 else if (layerName ==
"LSTMLayer") {
2269 size_t stateSize,inputSize, timeSteps = 0;
2270 int rememberState, returnSequence = 0;
2277 fNet->AddBasicLSTMLayer(stateSize, inputSize, timeSteps, rememberState, returnSequence);
2281 else if (layerName ==
"GRULayer") {
2284 size_t stateSize,inputSize, timeSteps = 0;
2285 int rememberState, returnSequence, resetGateAfter = 0;
2295 "Cannot use a reset gate after to false with CudNN - use implementation with resetgate=true");
2297 fNet->AddBasicGRULayer(stateSize, inputSize, timeSteps, rememberState, returnSequence, resetGateAfter);
2300 else if (layerName ==
"BatchNormLayer") {
2302 fNet->AddBatchNormLayer(0., 0.0);
2305 fNet->GetLayers().back()->ReadWeightsFromXML(layerXML);
#define REGISTER_METHOD(CLASS)
for example
include TDocParser_001 C image html pict1_TDocParser_001 png width
char * Form(const char *fmt,...)
OptionBase * DeclareOptionRef(T &ref, const TString &name, const TString &desc="")
void AddPreDefVal(const T &)
Adadelta Optimizer class.
static Tensor_t CreateTensor(size_t n, size_t c, size_t h, size_t w)
Generic Deep Neural Network class.
TBatchNormLayer< Architecture_t > * AddBatchNormLayer(Scalar_t momentum=-1, Scalar_t epsilon=0.0001)
Function for adding a Batch Normalization layer with given parameters.
TBasicGRULayer< Architecture_t > * AddBasicGRULayer(size_t stateSize, size_t inputSize, size_t timeSteps, bool rememberState=false, bool returnSequence=false, bool resetGateAfter=false)
Function for adding GRU Layer in the Deep Neural Network, with given parameters.
TDenseLayer< Architecture_t > * AddDenseLayer(size_t width, EActivationFunction f, Scalar_t dropoutProbability=1.0)
Function for adding Dense Connected Layer in the Deep Neural Network, with a given width,...
TBasicLSTMLayer< Architecture_t > * AddBasicLSTMLayer(size_t stateSize, size_t inputSize, size_t timeSteps, bool rememberState=false, bool returnSequence=false)
Function for adding LSTM Layer in the Deep Neural Network, with given parameters.
TMaxPoolLayer< Architecture_t > * AddMaxPoolLayer(size_t frameHeight, size_t frameWidth, size_t strideRows, size_t strideCols, Scalar_t dropoutProbability=1.0)
Function for adding Pooling layer in the Deep Neural Network, with a given filter height and width,...
TConvLayer< Architecture_t > * AddConvLayer(size_t depth, size_t filterHeight, size_t filterWidth, size_t strideRows, size_t strideCols, size_t paddingHeight, size_t paddingWidth, EActivationFunction f, Scalar_t dropoutProbability=1.0)
Function for adding Convolution layer in the Deep Neural Network, with a given depth,...
TReshapeLayer< Architecture_t > * AddReshapeLayer(size_t depth, size_t height, size_t width, bool flattening)
Function for adding Reshape Layer in the Deep Neural Network, with a given height and width.
TBasicRNNLayer< Architecture_t > * AddBasicRNNLayer(size_t stateSize, size_t inputSize, size_t timeSteps, bool rememberState=false, bool returnSequence=false, EActivationFunction f=EActivationFunction::kTanh)
Function for adding Recurrent Layer in the Deep Neural Network, with given parameters.
Stochastic Batch Gradient Descent Optimizer class.
Generic General Layer class.
virtual void Initialize()
Initialize the weights and biases according to the given initialization method.
Class that contains all the data information.
UInt_t GetNClasses() const
Types::ETreeType GetCurrentType() const
Long64_t GetNEvents(Types::ETreeType type=Types::kMaxTreeType) const
void SetCurrentEvent(Long64_t ievt) const
void SetTarget(UInt_t itgt, Float_t value)
set the target value (dimension itgt) to value
UInt_t GetNVariables() const
accessor to the number of variables
Virtual base Class for all MVA method.
const char * GetName() const
Bool_t IgnoreEventsWithNegWeightsInTraining() const
const std::vector< TMVA::Event * > & GetEventCollection(Types::ETreeType type)
returns the event collection (i.e.
UInt_t GetNTargets() const
const TString & GetMethodName() const
const Event * GetEvent() const
DataSetInfo & DataInfo() const
UInt_t GetNVariables() const
Types::EAnalysisType fAnalysisType
TrainingHistory fTrainHistory
IPythonInteractive * fInteractive
temporary dataset used when evaluating on a different data (used by MethodCategory::GetMvaValues)
typename ArchitectureImpl_t::Tensor_t TensorImpl_t
size_t fBatchHeight
The height of the batch used to train the deep net.
void GetHelpMessage() const
DNN::ELossFunction fLossFunction
The loss function.
std::vector< size_t > fInputShape
Contains the batch size (no.
TString fLayoutString
The string defining the layout of the deep net.
void SetInputDepth(int inputDepth)
Setters.
std::unique_ptr< MatrixImpl_t > fYHat
void Train()
Methods for training the deep learning network.
size_t GetBatchHeight() const
virtual std::vector< Double_t > GetMvaValues(Long64_t firstEvt, Long64_t lastEvt, Bool_t logProgress)
Evaluate the DeepNet on a vector of input values stored in the TMVA Event class Here we will evaluate...
TString fWeightInitializationString
The string defining the weight initialization method.
void ParseMaxPoolLayer(DNN::TDeepNet< Architecture_t, Layer_t > &deepNet, std::vector< DNN::TDeepNet< Architecture_t, Layer_t > > &nets, TString layerString, TString delim)
Pases the layer string and creates the appropriate max pool layer.
size_t fRandomSeed
The random seed used to initialize the weights and shuffling batches (default is zero)
virtual const std::vector< Float_t > & GetMulticlassValues()
TString fArchitectureString
The string defining the architecure: CPU or GPU.
void Init()
default initializations
MethodDL(const TString &jobName, const TString &methodTitle, DataSetInfo &theData, const TString &theOption)
Constructor.
void TrainDeepNet()
train of deep neural network using the defined architecture
const std::vector< TTrainingSettings > & GetTrainingSettings() const
DNN::EOutputFunction GetOutputFunction() const
void ParseDenseLayer(DNN::TDeepNet< Architecture_t, Layer_t > &deepNet, std::vector< DNN::TDeepNet< Architecture_t, Layer_t > > &nets, TString layerString, TString delim)
Pases the layer string and creates the appropriate dense layer.
UInt_t GetNumValidationSamples()
parce the validation string and return the number of event data used for validation
TString GetBatchLayoutString() const
void SetInputWidth(int inputWidth)
HostBufferImpl_t fXInputBuffer
size_t fBatchWidth
The width of the batch used to train the deep net.
size_t GetInputDepth() const
std::unique_ptr< DeepNetImpl_t > fNet
TString GetInputLayoutString() const
void SetBatchHeight(size_t batchHeight)
size_t GetInputHeight() const
TString GetArchitectureString() const
void ParseBatchLayout()
Parse the input layout.
void ParseBatchNormLayer(DNN::TDeepNet< Architecture_t, Layer_t > &deepNet, std::vector< DNN::TDeepNet< Architecture_t, Layer_t > > &nets, TString layerString, TString delim)
Pases the layer string and creates the appropriate reshape layer.
void ReadWeightsFromStream(std::istream &)
void ReadWeightsFromXML(void *wghtnode)
TString fNumValidationString
The string defining the number (or percentage) of training data used for validation.
std::vector< std::map< TString, TString > > KeyValueVector_t
DNN::EOutputFunction fOutputFunction
The output function for making the predictions.
DNN::EInitialization fWeightInitialization
The initialization method.
size_t GetBatchDepth() const
void ParseRecurrentLayer(ERecurrentLayerType type, DNN::TDeepNet< Architecture_t, Layer_t > &deepNet, std::vector< DNN::TDeepNet< Architecture_t, Layer_t > > &nets, TString layerString, TString delim)
Pases the layer string and creates the appropriate rnn layer.
std::vector< TTrainingSettings > fTrainingSettings
The vector defining each training strategy.
size_t GetInputWidth() const
void SetInputShape(std::vector< size_t > inputShape)
DNN::ELossFunction GetLossFunction() const
TString fBatchLayoutString
The string defining the layout of the batch.
Bool_t HasAnalysisType(Types::EAnalysisType type, UInt_t numberClasses, UInt_t numberTargets)
Check the type of analysis the deep learning network can do.
void ParseConvLayer(DNN::TDeepNet< Architecture_t, Layer_t > &deepNet, std::vector< DNN::TDeepNet< Architecture_t, Layer_t > > &nets, TString layerString, TString delim)
Pases the layer string and creates the appropriate convolutional layer.
void ParseReshapeLayer(DNN::TDeepNet< Architecture_t, Layer_t > &deepNet, std::vector< DNN::TDeepNet< Architecture_t, Layer_t > > &nets, TString layerString, TString delim)
Pases the layer string and creates the appropriate reshape layer.
virtual const std::vector< Float_t > & GetRegressionValues()
TString fTrainingStrategyString
The string defining the training strategy.
const Ranking * CreateRanking()
typename ArchitectureImpl_t::HostBuffer_t HostBufferImpl_t
void SetBatchDepth(size_t batchDepth)
KeyValueVector_t ParseKeyValueString(TString parseString, TString blockDelim, TString tokenDelim)
Function for parsing the training settings, provided as a string in a key-value form.
void SetBatchWidth(size_t batchWidth)
std::vector< Double_t > PredictDeepNet(Long64_t firstEvt, Long64_t lastEvt, size_t batchSize, Bool_t logProgress)
perform prediction of the deep neural network using batches (called by GetMvaValues)
DNN::EInitialization GetWeightInitialization() const
void SetBatchSize(size_t batchSize)
TString GetLayoutString() const
size_t fBatchDepth
The depth of the batch used to train the deep net.
TMVA::DNN::TDeepNet< ArchitectureImpl_t > DeepNetImpl_t
size_t GetBatchWidth() const
void AddWeightsXMLTo(void *parent) const
typename ArchitectureImpl_t::Matrix_t MatrixImpl_t
Double_t GetMvaValue(Double_t *err=0, Double_t *errUpper=0)
virtual ~MethodDL()
Virtual Destructor.
void ParseInputLayout()
Parse the input layout.
void FillInputTensor()
Get the input event tensor for evaluation Internal function to fill the fXInput tensor with the corre...
bool fBuildNet
Flag to control whether to build fNet, the stored network used for the evaluation.
void SetInputHeight(int inputHeight)
void CreateDeepNet(DNN::TDeepNet< Architecture_t, Layer_t > &deepNet, std::vector< DNN::TDeepNet< Architecture_t, Layer_t > > &nets)
After calling the ProcesOptions(), all of the options are parsed, so using the parsed options,...
TString fErrorStrategy
The string defining the error strategy for training.
void DeclareOptions()
The option handling methods.
TString fInputLayoutString
The string defining the layout of the input.
EMsgType GetMinType() const
Ranking for variables in method (implementation)
Timing information for training and evaluation of MVA methods.
TString GetElapsedTime(Bool_t Scientific=kTRUE)
returns pretty string with elapsed time
void AddValue(TString Property, Int_t stage, Double_t value)
Singleton class for Global types used by TMVA.
void Print(Option_t *name="") const
Print the matrix as a table of elements.
virtual void Print(Option_t *option="") const
Print TNamed name and title.
Collectable string class.
const TString & GetString() const
virtual void Warning(const char *method, const char *msgfmt,...) const
Issue warning message.
virtual void Error(const char *method, const char *msgfmt,...) const
Issue error message.
virtual void Print(Option_t *option="") const
This method must be overridden when a class wants to print itself.
Int_t Atoi() const
Return integer value of string.
TSubString Strip(EStripType s=kTrailing, char c=' ') const
Return a substring of self stripped at beginning and/or end.
Double_t Atof() const
Return floating-point value contained in string.
Bool_t IsFloat() const
Returns kTRUE if string contains a floating point or integer number.
Ssiz_t First(char c) const
Find first occurrence of a character c.
const char * Data() const
TString & ReplaceAll(const TString &s1, const TString &s2)
void ToUpper()
Change string to upper case.
TObjArray * Tokenize(const TString &delim) const
This function is used to isolate sequential tokens in a TString.
static TString Format(const char *fmt,...)
Static method which formats a string using a printf style format descriptor and return a TString.
XMLNodePointer_t NewChild(XMLNodePointer_t parent, XMLNsPointer_t ns, const char *name, const char *content=nullptr)
create new child element for parent node
XMLNodePointer_t GetChild(XMLNodePointer_t xmlnode, Bool_t realnode=kTRUE)
returns first child of xmlnode
const char * GetNodeName(XMLNodePointer_t xmlnode)
returns name of xmlnode
EOptimizer
Enum representing the optimizer used for training.
EOutputFunction
Enum that represents output functions.
double weightDecay(double error, ItWeight itWeight, ItWeight itWeightEnd, double factorWeightDecay, EnumRegularization eRegularization)
compute the weight decay for regularization (L1 or L2)
auto regularization(const typename Architecture_t::Matrix_t &A, ERegularization R) -> decltype(Architecture_t::L1Regularization(A))
Evaluate the regularization functional for a given weight matrix.
ERegularization
Enum representing the regularization type applied for a given layer.
EActivationFunction
Enum that represents layer activation functions.
ELossFunction
Enum that represents objective functions for the net, i.e.
std::tuple< const std::vector< Event * > &, const DataSetInfo & > TMVAInput_t
create variable transformations
TString fetchValueTmp(const std::map< TString, TString > &keyValueMap, TString key)
MsgLogger & Endl(MsgLogger &ml)
All of the options that can be specified in the training string.
std::map< TString, double > optimizerParams
DNN::EOptimizer optimizer
DNN::ERegularization regularization
std::vector< Double_t > dropoutProbabilities
static void output(int code)