78 std::map<TString, TString>::const_iterator it = keyValueMap.find(key);
79 if (it == keyValueMap.end()) {
142 std::vector<double> defaultValue)
145 if (parseString ==
"") {
150 std::vector<double> values;
154 TIter nextToken(tokenStrings);
156 for (; tokenString != NULL; tokenString = (
TObjString *)nextToken()) {
157 std::stringstream sstr;
160 sstr >> currentValue;
161 values.push_back(currentValue);
178 " or cross entropy (binary classification).");
194 "Specify as 0.2 or 20% to use a fifth of the data set as validation set. "
195 "Specify as 100 to use exactly 100 events. (Default: 20%)");
207 "ConvergenceSteps=100,"
213 "Regularization=None,"
215 "TrainingStrategy",
"Defines the training strategies.");
223 Log() << kINFO <<
"Will ignore negative events in training!" <<
Endl;
227 Log() << kWARNING <<
"The STANDARD architecture is not supported anymore. "
228 "Please use Architecture=CPU or Architecture=CPU."
229 "See the TMVA Users' Guide for instructions if you "
230 "encounter problems."
232 Log() << kINFO <<
"We will use instead the CPU architecture" <<
Endl;
236 Log() << kERROR <<
"The OPENCL architecture has not been implemented yet. "
237 "Please use Architecture=CPU or Architecture=CPU for the "
238 "time being. See the TMVA Users' Guide for instructions "
239 "if you encounter problems."
242 Log() << kINFO <<
"We will try using the GPU-CUDA architecture if available" <<
Endl;
251 Log() << kINFO <<
"Will now use the GPU architecture !" <<
Endl;
253 Log() << kERROR <<
"CUDA backend not enabled. Please make sure "
254 "you have CUDA installed and it was successfully "
255 "detected by CMAKE by using -Dtmva-gpu=On "
258 Log() << kINFO <<
"Will now use instead the CPU architecture !" <<
Endl;
264 Log() << kINFO <<
"Will now use the CPU architecture with BLAS and IMT support !" <<
Endl;
266 Log() << kINFO <<
"Multi-core CPU backend not enabled. For better performances, make sure "
267 "you have a BLAS implementation and it was successfully "
268 "detected by CMake as well that the imt CMake flag is set."
270 Log() << kINFO <<
"Will use anyway the CPU architecture but with slower performance" <<
Endl;
290 Log() << kWARNING <<
"For regression only SUMOFSQUARES is a valid "
291 <<
" neural net error function. Setting error function to "
292 <<
" SUMOFSQUARES now." <<
Endl;
331 for (
auto &block : strategyKeyValues) {
354 if (optimizer ==
"SGD") {
356 }
else if (optimizer ==
"ADAM") {
358 }
else if (optimizer ==
"ADAGRAD") {
360 }
else if (optimizer ==
"RMSPROP") {
362 }
else if (optimizer ==
"ADADELTA") {
371 std::vector<TString> optimParamLabels = {
"_beta1",
"_beta2",
"_eps",
"_rho"};
373 std::map<TString, double> defaultValues = {
374 {
"ADADELTA_eps", 1.E-8}, {
"ADADELTA_rho", 0.95},
375 {
"ADAGRAD_eps", 1.E-8},
376 {
"ADAM_beta1", 0.9}, {
"ADAM_beta2", 0.999}, {
"ADAM_eps", 1.E-7},
377 {
"RMSPROP_eps", 1.E-7}, {
"RMSPROP_rho", 0.9},
379 for (
auto &pN : optimParamLabels) {
382 if (defaultValues.count(optimParamName) > 0) {
383 double defValue = defaultValues[optimParamName];
449 TIter nextInputDim(inputDimStrings);
455 std::vector<size_t> inputShape;
456 inputShape.reserve(inputLayoutString.
Length()/2 + 2);
457 inputShape.push_back(0);
458 for (; inputDimString !=
nullptr; inputDimString = (
TObjString *)nextInputDim()) {
463 inputShape.push_back(subDim);
468 if (inputShape.size() == 2) {
470 inputShape = {inputShape[0], 1, 1, inputShape[1]};
472 else if (inputShape.size() == 3) {
474 inputShape = {inputShape[0], inputShape[1], 1, inputShape[2]};
490 size_t batchDepth = 0;
491 size_t batchHeight = 0;
492 size_t batchWidth = 0;
496 TIter nextBatchDim(batchDimStrings);
500 for (; batchDimString !=
nullptr; batchDimString = (
TObjString *)nextBatchDim()) {
505 batchDepth = (size_t)strDepth.
Atoi();
510 batchHeight = (size_t)strHeight.
Atoi();
515 batchWidth = (size_t)strWidth.
Atoi();
528template <
typename Architecture_t,
typename Layer_t>
533 const TString layerDelimiter(
",");
534 const TString subDelimiter(
"|");
542 TIter nextLayer(layerStrings);
546 for (; layerString !=
nullptr; layerString = (
TObjString *)nextLayer()) {
550 TIter nextToken(subStrings);
557 if (strLayerType ==
"DENSE") {
559 }
else if (strLayerType ==
"CONV") {
561 }
else if (strLayerType ==
"MAXPOOL") {
563 }
else if (strLayerType ==
"RESHAPE") {
565 }
else if (strLayerType ==
"BNORM") {
567 }
else if (strLayerType ==
"RNN") {
569 }
else if (strLayerType ==
"LSTM") {
571 }
else if (strLayerType ==
"GRU") {
582template <
typename Architecture_t,
typename Layer_t>
593 const size_t inputSize =
GetNvar();
597 TIter nextToken(subStrings);
603 for (; token !=
nullptr; token = (
TObjString *)nextToken()) {
607 if (strActFnc ==
"DENSE")
continue;
609 if (strActFnc ==
"RELU") {
611 }
else if (strActFnc ==
"TANH") {
613 }
else if (strActFnc ==
"FTANH") {
615 }
else if (strActFnc ==
"SYMMRELU") {
617 }
else if (strActFnc ==
"SOFTSIGN") {
619 }
else if (strActFnc ==
"SIGMOID") {
621 }
else if (strActFnc ==
"LINEAR") {
623 }
else if (strActFnc ==
"GAUSS") {
625 }
else if (
width == 0) {
629 TString strNumNodes = strActFnc;
640 size_t outputSize = 1;
668template <
typename Architecture_t,
typename Layer_t>
678 int zeroPadHeight = 0;
679 int zeroPadWidth = 0;
684 TIter nextToken(subStrings);
688 for (; token !=
nullptr; token = (
TObjString *)nextToken()) {
693 depth = strDepth.
Atoi();
698 fltHeight = strFltHeight.
Atoi();
703 fltWidth = strFltWidth.
Atoi();
708 strideRows = strStrideRows.
Atoi();
713 strideCols = strStrideCols.
Atoi();
718 zeroPadHeight = strZeroPadHeight.
Atoi();
723 zeroPadWidth = strZeroPadWidth.
Atoi();
728 if (strActFnc ==
"RELU") {
730 }
else if (strActFnc ==
"TANH") {
732 }
else if (strActFnc ==
"SYMMRELU") {
734 }
else if (strActFnc ==
"SOFTSIGN") {
736 }
else if (strActFnc ==
"SIGMOID") {
738 }
else if (strActFnc ==
"LINEAR") {
740 }
else if (strActFnc ==
"GAUSS") {
750 zeroPadHeight, zeroPadWidth, activationFunction);
754 if (
fBuildNet)
fNet->AddConvLayer(depth, fltHeight, fltWidth, strideRows, strideCols,
755 zeroPadHeight, zeroPadWidth, activationFunction);
767template <
typename Architecture_t,
typename Layer_t>
773 int filterHeight = 0;
780 TIter nextToken(subStrings);
784 for (; token !=
nullptr; token = (
TObjString *)nextToken()) {
789 filterHeight = strFrmHeight.
Atoi();
794 filterWidth = strFrmWidth.
Atoi();
799 strideRows = strStrideRows.
Atoi();
804 strideCols = strStrideCols.
Atoi();
812 deepNet.
AddMaxPoolLayer(filterHeight, filterWidth, strideRows, strideCols);
815 if (
fBuildNet)
fNet->AddMaxPoolLayer(filterHeight, filterWidth, strideRows, strideCols);
828template <
typename Architecture_t,
typename Layer_t>
836 bool flattening =
false;
840 TIter nextToken(subStrings);
844 for (; token !=
nullptr; token = (
TObjString *)nextToken()) {
845 if (token->
GetString() ==
"FLAT") idxToken=4;
849 depth = strDepth.
Atoi();
864 if (flat ==
"FLAT") {
889template <
typename Architecture_t,
typename Layer_t>
896 double momentum = -1;
897 double epsilon = 0.0001;
901 TIter nextToken(subStrings);
905 for (; token !=
nullptr; token = (
TObjString *)nextToken()) {
930template <
typename Architecture_t,
typename Layer_t>
939 bool rememberState =
false;
940 bool returnSequence =
false;
941 bool resetGateAfter =
false;
945 TIter nextToken(subStrings);
949 for (; token !=
nullptr; token = (
TObjString *)nextToken()) {
954 stateSize = strstateSize.
Atoi();
960 inputSize = strinputSize.
Atoi();
966 timeSteps = strtimeSteps.
Atoi();
972 rememberState = (
bool) strrememberState.
Atoi();
992 auto * recurrentLayer = deepNet.
AddBasicRNNLayer(stateSize, inputSize, timeSteps, rememberState, returnSequence);
993 recurrentLayer->Initialize();
995 if (
fBuildNet)
fNet->AddBasicRNNLayer(stateSize, inputSize, timeSteps, rememberState, returnSequence);
998 auto *recurrentLayer = deepNet.
AddBasicLSTMLayer(stateSize, inputSize, timeSteps, rememberState, returnSequence);
999 recurrentLayer->Initialize();
1002 fNet->AddBasicLSTMLayer(stateSize, inputSize, timeSteps, rememberState, returnSequence);
1005 if (Architecture_t::IsCudnn()) resetGateAfter =
true;
1006 auto *recurrentLayer = deepNet.
AddBasicGRULayer(stateSize, inputSize, timeSteps, rememberState, returnSequence, resetGateAfter);
1007 recurrentLayer->Initialize();
1010 fNet->AddBasicGRULayer(stateSize, inputSize, timeSteps, rememberState, returnSequence, resetGateAfter);
1013 Log() << kFATAL <<
"Invalid Recurrent layer type " <<
Endl;
1020 :
MethodBase(jobName,
Types::kDL, methodTitle, theData, theOption), fInputShape(4,0),
1021 fBatchHeight(), fBatchWidth(), fRandomSeed(0), fWeightInitialization(),
1022 fOutputFunction(), fLossFunction(), fInputLayoutString(), fBatchLayoutString(),
1023 fLayoutString(), fErrorStrategy(), fTrainingStrategyString(), fWeightInitializationString(),
1024 fArchitectureString(), fResume(false), fBuildNet(true), fTrainingSettings(),
1033 :
MethodBase(
Types::kDL, theData, theWeightFile), fInputShape(4,0), fBatchHeight(),
1034 fBatchWidth(), fRandomSeed(0), fWeightInitialization(), fOutputFunction(),
1035 fLossFunction(), fInputLayoutString(), fBatchLayoutString(), fLayoutString(),
1036 fErrorStrategy(), fTrainingStrategyString(), fWeightInitializationString(),
1037 fArchitectureString(), fResume(false), fBuildNet(true), fTrainingSettings(),
1055 parseString.ReplaceAll(
" ",
"");
1057 const TString keyValueDelim(
"=");
1059 TObjArray *blockStrings = parseString.Tokenize(blockDelim);
1060 TIter nextBlock(blockStrings);
1063 for (; blockString !=
nullptr; blockString = (
TObjString *)nextBlock()) {
1064 blockKeyValues.push_back(std::map<TString, TString>());
1065 std::map<TString, TString> ¤tBlock = blockKeyValues.back();
1068 TIter nextToken(subStrings);
1071 for (; token !=
nullptr; token = (
TObjString *)nextToken()) {
1073 int delimPos = strKeyValue.
First(keyValueDelim.
Data());
1074 if (delimPos <= 0)
continue;
1083 currentBlock.insert(std::make_pair(strKey, strValue));
1086 return blockKeyValues;
1110 Int_t nValidationSamples = 0;
1115 if (fNumValidationString.EndsWith(
"%")) {
1120 Double_t valSizeAsDouble = fNumValidationString.Atof() / 100.0;
1121 nValidationSamples = GetEventCollection(
Types::kTraining).size() * valSizeAsDouble;
1123 Log() << kFATAL <<
"Cannot parse number \"" << fNumValidationString
1124 <<
"\". Expected string like \"20%\" or \"20.0%\"." <<
Endl;
1126 }
else if (fNumValidationString.IsFloat()) {
1127 Double_t valSizeAsDouble = fNumValidationString.Atof();
1129 if (valSizeAsDouble < 1.0) {
1131 nValidationSamples = GetEventCollection(
Types::kTraining).size() * valSizeAsDouble;
1134 nValidationSamples = valSizeAsDouble;
1137 Log() <<
kFATAL <<
"Cannot parse number \"" << fNumValidationString <<
"\". Expected string like \"0.2\" or \"100\"."
1143 if (nValidationSamples < 0) {
1144 Log() <<
kFATAL <<
"Validation size \"" << fNumValidationString <<
"\" is negative." <<
Endl;
1147 if (nValidationSamples == 0) {
1148 Log() <<
kFATAL <<
"Validation size \"" << fNumValidationString <<
"\" is zero." <<
Endl;
1151 if (nValidationSamples >= (
Int_t)trainingSetSize) {
1152 Log() <<
kFATAL <<
"Validation size \"" << fNumValidationString
1153 <<
"\" is larger than or equal in size to training set (size=\"" << trainingSetSize <<
"\")." <<
Endl;
1156 return nValidationSamples;
1163template <
typename Architecture_t>
1167 using Scalar_t =
typename Architecture_t::Scalar_t;
1185 const std::vector<TMVA::Event *> eventCollectionTraining{allData.begin(), allData.begin() + nTrainingSamples};
1186 const std::vector<TMVA::Event *> eventCollectionValidation{allData.begin() + nTrainingSamples, allData.end()};
1188 size_t trainingPhase = 1;
1192 size_t nThreads = 1;
1196 size_t batchSize = settings.batchSize;
1217 if (batchDepth != batchSize && batchDepth > 1) {
1218 Error(
"Train",
"Given batch depth of %zu (specified in BatchLayout) should be equal to given batch size %zu",batchDepth,batchSize);
1221 if (batchDepth == 1 && batchSize > 1 && batchSize != batchHeight ) {
1222 Error(
"Train",
"Given batch height of %zu (specified in BatchLayout) should be equal to given batch size %zu",batchHeight,batchSize);
1228 bool badLayout =
false;
1230 if (batchDepth == batchSize)
1231 badLayout = ( inputDepth * inputHeight * inputWidth != batchHeight * batchWidth ) ;
1233 if (batchHeight == batchSize && batchDepth == 1)
1234 badLayout |= ( inputDepth * inputHeight * inputWidth != batchWidth);
1236 Error(
"Train",
"Given input layout %zu x %zu x %zu is not compatible with batch layout %zu x %zu x %zu ",
1237 inputDepth,inputHeight,inputWidth,batchDepth,batchHeight,batchWidth);
1242 if (nTrainingSamples < settings.batchSize || nValidationSamples < settings.batchSize) {
1243 Log() << kFATAL <<
"Number of samples in the datasets are train: ("
1244 << nTrainingSamples <<
") test: (" << nValidationSamples
1245 <<
"). One of these is smaller than the batch size of "
1246 << settings.batchSize <<
". Please increase the batch"
1247 <<
" size to be at least the same size as the smallest"
1248 <<
" of them." <<
Endl;
1251 DeepNet_t deepNet(batchSize, inputDepth, inputHeight, inputWidth, batchDepth, batchHeight, batchWidth, J,
I,
R,
weightDecay);
1255 if (trainingPhase == 1) {
1256 fNet = std::unique_ptr<DeepNetImpl_t>(
new DeepNetImpl_t(1, inputDepth, inputHeight, inputWidth, batchDepth,
1264 std::vector<DeepNet_t> nets{};
1265 nets.reserve(nThreads);
1266 for (
size_t i = 0; i < nThreads; i++) {
1268 nets.push_back(deepNet);
1278 std::vector<Double_t> dropoutVector(settings.dropoutProbabilities);
1279 for (
auto &
p : dropoutVector) {
1282 deepNet.SetDropoutProbabilities(dropoutVector);
1284 if (trainingPhase > 1) {
1286 for (
size_t i = 0; i < deepNet.GetDepth(); ++i) {
1287 deepNet.GetLayerAt(i)->CopyParameters(*
fNet->GetLayerAt(i));
1308 Log() <<
"***** Deep Learning Network *****" <<
Endl;
1309 if (
Log().GetMinType() <= kINFO)
1312 Log() <<
"Using " << nTrainingSamples <<
" events for training and " << nValidationSamples <<
" for testing" <<
Endl;
1316 TensorDataLoader_t trainingData(trainingTuple, nTrainingSamples, batchSize,
1317 {inputDepth, inputHeight, inputWidth},
1318 {deepNet.GetBatchDepth(), deepNet.GetBatchHeight(), deepNet.GetBatchWidth()} ,
1319 deepNet.GetOutputWidth(), nThreads);
1322 TensorDataLoader_t validationData(validationTuple, nValidationSamples, batchSize,
1323 {inputDepth, inputHeight, inputWidth},
1324 { deepNet.GetBatchDepth(),deepNet.GetBatchHeight(), deepNet.GetBatchWidth()} ,
1325 deepNet.GetOutputWidth(), nThreads);
1334 Log() <<
"Compute initial loss on the validation data " <<
Endl;
1335 for (
auto batch : validationData) {
1336 auto inputTensor = batch.GetInput();
1337 auto outputMatrix = batch.GetOutput();
1338 auto weights = batch.GetWeights();
1342 minValError += deepNet.Loss(inputTensor, outputMatrix, weights,
false, includeRegularization);
1345 Double_t regzTerm = (includeRegularization) ? deepNet.RegularizationTerm() : 0.0;
1346 minValError /= (
Double_t)(nValidationSamples / settings.batchSize);
1347 minValError += regzTerm;
1351 std::unique_ptr<DNN::VOptimizer<Architecture_t, Layer_t, DeepNet_t>> optimizer;
1356 case EOptimizer::kSGD:
1357 optimizer = std::unique_ptr<DNN::TSGD<Architecture_t, Layer_t, DeepNet_t>>(
1361 case EOptimizer::kAdam: {
1362 optimizer = std::unique_ptr<DNN::TAdam<Architecture_t, Layer_t, DeepNet_t>>(
1364 deepNet, settings.learningRate, settings.optimizerParams[
"ADAM_beta1"],
1365 settings.optimizerParams[
"ADAM_beta2"], settings.optimizerParams[
"ADAM_eps"]));
1369 case EOptimizer::kAdagrad:
1370 optimizer = std::unique_ptr<DNN::TAdagrad<Architecture_t, Layer_t, DeepNet_t>>(
1372 settings.optimizerParams[
"ADAGRAD_eps"]));
1375 case EOptimizer::kRMSProp:
1376 optimizer = std::unique_ptr<DNN::TRMSProp<Architecture_t, Layer_t, DeepNet_t>>(
1378 settings.optimizerParams[
"RMSPROP_rho"],
1379 settings.optimizerParams[
"RMSPROP_eps"]));
1382 case EOptimizer::kAdadelta:
1383 optimizer = std::unique_ptr<DNN::TAdadelta<Architecture_t, Layer_t, DeepNet_t>>(
1385 settings.optimizerParams[
"ADADELTA_rho"],
1386 settings.optimizerParams[
"ADADELTA_eps"]));
1392 std::vector<TTensorBatch<Architecture_t>> batches{};
1394 bool converged =
false;
1395 size_t convergenceCount = 0;
1396 size_t batchesInEpoch = nTrainingSamples / deepNet.GetBatchSize();
1399 std::chrono::time_point<std::chrono::system_clock> tstart, tend;
1400 tstart = std::chrono::system_clock::now();
1403 auto optimParametersString = [&]() {
1405 for (
auto & element : settings.optimizerParams) {
1407 key.
ReplaceAll(settings.optimizerName +
"_",
"");
1408 double value = element.second;
1409 if (!optimParameters.
IsNull())
1410 optimParameters +=
",";
1412 optimParameters +=
" (";
1415 if (!optimParameters.
IsNull())
1416 optimParameters +=
")";
1417 return optimParameters;
1421 <<
" Optimizer " << settings.optimizerName
1422 << optimParametersString()
1423 <<
" Learning rate = " << settings.learningRate <<
" regularization " << (char)settings.regularization
1424 <<
" minimum error = " << minValError <<
Endl;
1426 std::string separator(62,
'-');
1428 Log() << std::setw(10) <<
"Epoch"
1429 <<
" | " << std::setw(12) <<
"Train Err." << std::setw(12) <<
"Val. Err." << std::setw(12)
1430 <<
"t(s)/epoch" << std::setw(12) <<
"t(s)/Loss" << std::setw(12) <<
"nEvents/s" << std::setw(12)
1431 <<
"Conv. Steps" <<
Endl;
1437 size_t shuffleSeed = 0;
1443 Log() <<
"Initial Deep Net Weights " <<
Endl;
1444 auto & weights_tensor = deepNet.GetLayerAt(0)->GetWeights();
1445 for (
size_t l = 0;
l < weights_tensor.size(); ++
l)
1446 weights_tensor[
l].
Print();
1447 auto & bias_tensor = deepNet.GetLayerAt(0)->GetBiases();
1448 bias_tensor[0].
Print();
1451 Log() <<
" Start epoch iteration ..." <<
Endl;
1452 bool debugFirstEpoch =
false;
1453 bool computeLossInTraining =
true;
1454 size_t nTrainEpochs = 0;
1455 while (!converged) {
1457 trainingData.Shuffle(rng);
1463 for (
size_t i = 0; i < batchesInEpoch; ++i ) {
1470 if (debugFirstEpoch) std::cout <<
"\n\n----- batch # " << i <<
"\n\n";
1472 auto my_batch = trainingData.GetTensorBatch();
1474 if (debugFirstEpoch)
1475 std::cout <<
"got batch data - doing forward \n";
1479 Architecture_t::PrintTensor(my_batch.GetInput(),
"input tensor",
true);
1480 typename Architecture_t::Tensor_t tOut(my_batch.GetOutput());
1481 typename Architecture_t::Tensor_t tW(my_batch.GetWeights());
1482 Architecture_t::PrintTensor(tOut,
"label tensor",
true) ;
1483 Architecture_t::PrintTensor(tW,
"weight tensor",
true) ;
1486 deepNet.Forward(my_batch.GetInput(),
true);
1488 if (computeLossInTraining) {
1489 auto outputMatrix = my_batch.GetOutput();
1490 auto weights = my_batch.GetWeights();
1491 trainingError += deepNet.Loss(outputMatrix, weights,
false);
1494 if (debugFirstEpoch)
1495 std::cout <<
"- doing backward \n";
1498 size_t nlayers = deepNet.GetLayers().size();
1499 for (
size_t l = 0;
l < nlayers; ++
l) {
1500 if (deepNet.GetLayerAt(
l)->GetWeights().size() > 0)
1501 Architecture_t::PrintTensor(deepNet.GetLayerAt(
l)->GetWeightsAt(0),
1504 Architecture_t::PrintTensor(deepNet.GetLayerAt(
l)->GetOutput(),
1511 deepNet.Backward(my_batch.GetInput(), my_batch.GetOutput(), my_batch.GetWeights());
1513 if (debugFirstEpoch)
1514 std::cout <<
"- doing optimizer update \n";
1517 optimizer->IncrementGlobalStep();
1521 std::cout <<
"minmimizer step - momentum " << settings.momentum <<
" learning rate " << optimizer->GetLearningRate() << std::endl;
1522 for (
size_t l = 0;
l < nlayers; ++
l) {
1523 if (deepNet.GetLayerAt(
l)->GetWeights().size() > 0) {
1524 Architecture_t::PrintTensor(deepNet.GetLayerAt(
l)->GetWeightsAt(0),
TString::Format(
"weights after step layer %d",
l).
Data());
1525 Architecture_t::PrintTensor(deepNet.GetLayerAt(
l)->GetWeightGradientsAt(0),
"weight gradients");
1532 if (debugFirstEpoch) std::cout <<
"\n End batch loop - compute validation loss \n";
1534 debugFirstEpoch =
false;
1535 if ((nTrainEpochs % settings.testInterval) == 0) {
1537 std::chrono::time_point<std::chrono::system_clock>
t1,t2;
1539 t1 = std::chrono::system_clock::now();
1545 bool inTraining =
false;
1546 for (
auto batch : validationData) {
1547 auto inputTensor = batch.GetInput();
1548 auto outputMatrix = batch.GetOutput();
1549 auto weights = batch.GetWeights();
1551 valError += deepNet.Loss(inputTensor, outputMatrix, weights, inTraining, includeRegularization);
1554 Double_t regTerm = (includeRegularization) ? deepNet.RegularizationTerm() : 0.0;
1555 valError /= (
Double_t)(nValidationSamples / settings.batchSize);
1556 valError += regTerm;
1561 t2 = std::chrono::system_clock::now();
1564 if (valError < minValError) {
1565 convergenceCount = 0;
1567 convergenceCount += settings.testInterval;
1571 if (valError < minValError ) {
1573 Log() << std::setw(10) << nTrainEpochs
1574 <<
" Minimum Test error found - save the configuration " <<
Endl;
1575 for (
size_t i = 0; i < deepNet.GetDepth(); ++i) {
1576 fNet->GetLayerAt(i)->CopyParameters(*deepNet.GetLayerAt(i));
1585 minValError = valError;
1587 else if ( minValError <= 0. )
1588 minValError = valError;
1590 if (!computeLossInTraining) {
1591 trainingError = 0.0;
1593 for (
auto batch : trainingData) {
1594 auto inputTensor = batch.GetInput();
1595 auto outputMatrix = batch.GetOutput();
1596 auto weights = batch.GetWeights();
1597 trainingError += deepNet.Loss(inputTensor, outputMatrix, weights,
false,
false);
1601 trainingError /= (
Double_t)(nTrainingSamples / settings.batchSize);
1602 trainingError += regTerm;
1608 tend = std::chrono::system_clock::now();
1611 std::chrono::duration<double> elapsed_seconds = tend - tstart;
1612 std::chrono::duration<double> elapsed1 =
t1-tstart;
1615 std::chrono::duration<double> elapsed_testing = tend-
t1;
1617 double seconds = elapsed_seconds.count();
1620 double eventTime = elapsed1.count()/( batchesInEpoch * settings.testInterval * settings.batchSize);
1623 convergenceCount > settings.convergenceSteps || nTrainEpochs >= settings.maxEpochs;
1626 Log() << std::setw(10) << nTrainEpochs <<
" | "
1627 << std::setw(12) << trainingError
1628 << std::setw(12) << valError
1629 << std::setw(12) << seconds / settings.testInterval
1630 << std::setw(12) << elapsed_testing.count()
1631 << std::setw(12) << 1. / eventTime
1632 << std::setw(12) << convergenceCount
1638 tstart = std::chrono::system_clock::now();
1642 if (converged && debug) {
1643 Log() <<
"Final Deep Net Weights for phase " << trainingPhase <<
" epoch " << nTrainEpochs
1645 auto & weights_tensor = deepNet.GetLayerAt(0)->GetWeights();
1646 auto & bias_tensor = deepNet.GetLayerAt(0)->GetBiases();
1647 for (
size_t l = 0;
l < weights_tensor.size(); ++
l)
1648 weights_tensor[
l].
Print();
1649 bias_tensor[0].
Print();
1662 Log() << kFATAL <<
"Not implemented yet" <<
Endl;
1668#ifdef R__HAS_TMVAGPU
1669 Log() << kINFO <<
"Start of deep neural network training on GPU." <<
Endl <<
Endl;
1671 TrainDeepNet<DNN::TCudnn<ScalarImpl_t> >();
1673 TrainDeepNet<DNN::TCuda<ScalarImpl_t>>();
1676 Log() << kFATAL <<
"CUDA backend not enabled. Please make sure "
1677 "you have CUDA installed and it was successfully "
1678 "detected by CMAKE."
1683#ifdef R__HAS_TMVACPU
1686 Log() << kINFO <<
"Start of deep neural network training on CPU using MT, nthreads = "
1689 Log() << kINFO <<
"Start of deep neural network training on single thread CPU (without ROOT-MT support) " <<
Endl
1692 TrainDeepNet<DNN::TCpu<ScalarImpl_t> >();
1697 " is not a supported architecture for TMVA::MethodDL"
1713 if (!fNet || fNet->GetDepth() == 0) {
1714 Log() << kFATAL <<
"The network has not been trained and fNet is not built" <<
Endl;
1716 if (fNet->GetBatchSize() != 1) {
1717 Log() << kFATAL <<
"FillINputTensor::Network batch size must be equal to 1 when doing single event predicition" <<
Endl;
1721 const std::vector<Float_t> &inputValues = GetEvent()->GetValues();
1722 size_t nVariables = GetEvent()->GetNVariables();
1725 if (fXInput.GetLayout() == TMVA::Experimental::MemoryLayout::ColumnMajor) {
1726 R__ASSERT(fXInput.GetShape().size() < 4);
1728 if (fXInput.GetShape().size() == 2) {
1729 nc = fXInput.GetShape()[0];
1731 ArchitectureImpl_t::PrintTensor(fXInput);
1732 Log() << kFATAL <<
"First tensor dimension should be equal to batch size, i.e. = 1" <<
Endl;
1734 nhw = fXInput.GetShape()[1];
1736 nc = fXInput.GetCSize();
1737 nhw = fXInput.GetWSize();
1739 if (nVariables != nc * nhw) {
1740 Log() << kFATAL <<
"Input Event variable dimensions are not compatible with the built network architecture"
1741 <<
" n-event variables " << nVariables <<
" expected input tensor " << nc <<
" x " << nhw <<
Endl;
1743 for (
size_t j = 0; j < nc; j++) {
1744 for (
size_t k = 0; k < nhw; k++) {
1746 fXInputBuffer[k * nc + j] = inputValues[j * nhw + k];
1751 assert(fXInput.GetShape().size() >= 4);
1752 size_t nc = fXInput.GetCSize();
1753 size_t nh = fXInput.GetHSize();
1754 size_t nw = fXInput.GetWSize();
1755 size_t n = nc * nh * nw;
1756 if (nVariables !=
n) {
1757 Log() << kFATAL <<
"Input Event variable dimensions are not compatible with the built network architecture"
1758 <<
" n-event variables " << nVariables <<
" expected input tensor " << nc <<
" x " << nh <<
" x " << nw
1761 for (
size_t j = 0; j <
n; j++) {
1763 fXInputBuffer[j] = inputValues[j];
1767 fXInput.GetDeviceBuffer().CopyFrom(fXInputBuffer);
1781 double mvaValue = (*fYHat)(0, 0);
1784#ifdef DEBUG_MVAVALUE
1785 using Tensor_t = std::vector<MatrixImpl_t>;
1786 TMatrixF xInput(n1,n2, inputValues.data() );
1789 std::cout <<
"Output of DeepNet " << mvaValue << std::endl;
1790 auto & deepnet = *
fNet;
1791 std::cout <<
"Loop on layers " << std::endl;
1792 for (
int l = 0;
l < deepnet.GetDepth(); ++
l) {
1793 std::cout <<
"Layer " <<
l;
1794 const auto * layer = deepnet.GetLayerAt(
l);
1795 const Tensor_t & layer_output = layer->GetOutput();
1797 std::cout <<
"DNN output " << layer_output.size() << std::endl;
1798 for (
size_t i = 0; i < layer_output.size(); ++i) {
1799#ifdef R__HAS_TMVAGPU
1803 TMatrixD m(layer_output[i].GetNrows(), layer_output[i].GetNcols() , layer_output[i].GetRawDataPointer() );
1807 const Tensor_t & layer_weights = layer->GetWeights();
1808 std::cout <<
"DNN weights " << layer_weights.size() << std::endl;
1809 if (layer_weights.size() > 0) {
1811#ifdef R__HAS_TMVAGPU
1815 TMatrixD m(layer_weights[i].GetNrows(), layer_weights[i].GetNcols() , layer_weights[i].GetRawDataPointer() );
1827template <
typename Architecture_t>
1832 if (!
fNet ||
fNet->GetDepth() == 0) {
1833 Log() << kFATAL <<
"The network has not been trained and fNet is not built"
1851 using Matrix_t =
typename Architecture_t::Matrix_t;
1855 DeepNet_t deepNet(batchSize, inputDepth, inputHeight, inputWidth, batchDepth, batchHeight, batchWidth, J,
I,
R,
weightDecay);
1856 std::vector<DeepNet_t> nets{};
1861 for (
size_t i = 0; i < deepNet.GetDepth(); ++i) {
1862 deepNet.GetLayerAt(i)->CopyParameters(*
fNet->GetLayerAt(i));
1869 size_t n1 = deepNet.GetBatchHeight();
1870 size_t n2 = deepNet.GetBatchWidth();
1871 size_t n0 = deepNet.GetBatchSize();
1874 n1 = deepNet.GetBatchSize();
1878 Long64_t nEvents = lastEvt - firstEvt;
1880 TensorDataLoader_t testData(testTuple, nEvents, batchSize, {inputDepth, inputHeight, inputWidth}, {n0, n1, n2}, deepNet.GetOutputWidth(), 1);
1888 Matrix_t yHat(deepNet.GetBatchSize(), deepNet.GetOutputWidth() );
1897 <<
" sample (" << nEvents <<
" events)" <<
Endl;
1901 std::vector<double> mvaValues(nEvents);
1904 for (
Long64_t ievt = firstEvt; ievt < lastEvt; ievt+=batchSize) {
1906 Long64_t ievt_end = ievt + batchSize;
1908 if (ievt_end <= lastEvt) {
1910 if (ievt == firstEvt) {
1914 if (n1 == batchSize && n0 == 1) {
1915 if (n2 != nVariables) {
1916 Log() << kFATAL <<
"Input Event variable dimensions are not compatible with the built network architecture"
1917 <<
" n-event variables " << nVariables <<
" expected input matrix " << n1 <<
" x " << n2
1921 if (n1*n2 != nVariables || n0 != batchSize) {
1922 Log() << kFATAL <<
"Input Event variable dimensions are not compatible with the built network architecture"
1923 <<
" n-event variables " << nVariables <<
" expected input tensor " << n0 <<
" x " << n1 <<
" x " << n2
1929 auto batch = testData.GetTensorBatch();
1930 auto inputTensor = batch.GetInput();
1932 auto xInput = batch.GetInput();
1935 for (
size_t i = 0; i < batchSize; ++i) {
1936 double value = yHat(i,0);
1942 for (
Long64_t i = ievt; i < lastEvt; ++i) {
1951 <<
"Elapsed time for evaluation of " << nEvents <<
" events: "
1967 fNet->Prediction(*fYHat, fXInput, fOutputFunction);
1969 size_t nTargets = DataInfo().GetNTargets();
1970 R__ASSERT(nTargets == fYHat->GetNcols());
1972 std::vector<Float_t>
output(nTargets);
1973 for (
size_t i = 0; i < nTargets; i++)
1974 output[i] = (*fYHat)(0, i);
1977 if (fRegressionReturnVal == NULL)
1978 fRegressionReturnVal =
new std::vector<Float_t>(nTargets);
1979 R__ASSERT(fRegressionReturnVal->size() == nTargets);
1983 for (
size_t i = 0; i < nTargets; ++i) {
1986 const Event *evT2 = GetTransformationHandler().InverseTransform(evT);
1987 for (
size_t i = 0; i < nTargets; ++i) {
1988 (*fRegressionReturnVal)[i] = evT2->GetTarget(i);
1991 return *fRegressionReturnVal;
2001 fNet->Prediction(*fYHat, fXInput, fOutputFunction);
2003 size_t nClasses = DataInfo().GetNClasses();
2004 R__ASSERT(nClasses == fYHat->GetNcols());
2006 if (fMulticlassReturnVal == NULL) {
2007 fMulticlassReturnVal =
new std::vector<Float_t>(nClasses);
2009 R__ASSERT(fMulticlassReturnVal->size() == nClasses);
2011 for (
size_t i = 0; i < nClasses; i++) {
2012 (*fMulticlassReturnVal)[i] = (*fYHat)(0, i);
2014 return *fMulticlassReturnVal;
2026 if (firstEvt > lastEvt || lastEvt > nEvents) lastEvt = nEvents;
2027 if (firstEvt < 0) firstEvt = 0;
2028 nEvents = lastEvt-firstEvt;
2031 size_t defaultEvalBatchSize = (
fXInput.GetSize() > 1000) ? 100 : 1000;
2033 if (
size_t(nEvents) < batchSize ) batchSize = nEvents;
2037#ifdef R__HAS_TMVAGPU
2038 Log() << kINFO <<
"Evaluate deep neural network on GPU using batches with size = " << batchSize <<
Endl <<
Endl;
2040 return PredictDeepNet<DNN::TCudnn<ScalarImpl_t>>(firstEvt, lastEvt, batchSize, logProgress);
2042 return PredictDeepNet<DNN::TCuda<ScalarImpl_t>>(firstEvt, lastEvt, batchSize, logProgress);
2047 Log() << kINFO <<
"Evaluate deep neural network on CPU using batches with size = " << batchSize <<
Endl <<
Endl;
2048 return PredictDeepNet<DNN::TCpu<ScalarImpl_t> >(firstEvt, lastEvt, batchSize, logProgress);
2055 void* nn = xmlEngine.
NewChild(parent, 0,
"Weights");
2063 Int_t inputDepth =
fNet->GetInputDepth();
2064 Int_t inputHeight =
fNet->GetInputHeight();
2065 Int_t inputWidth =
fNet->GetInputWidth();
2069 Int_t batchDepth =
fNet->GetBatchDepth();
2070 Int_t batchHeight =
fNet->GetBatchHeight();
2071 Int_t batchWidth =
fNet->GetBatchWidth();
2073 char lossFunction =
static_cast<char>(
fNet->GetLossFunction());
2074 char initialization =
static_cast<char>(
fNet->GetInitialization());
2095 xmlEngine.NewAttr(nn, 0,
"LossFunction",
TString(lossFunction));
2096 xmlEngine.NewAttr(nn, 0,
"Initialization",
TString(initialization));
2098 xmlEngine.NewAttr(nn, 0,
"OutputFunction",
TString(outputFunction));
2103 for (
Int_t i = 0; i < depth; i++)
2123 size_t inputDepth, inputHeight, inputWidth;
2128 size_t batchSize, batchDepth, batchHeight, batchWidth;
2136 char lossFunctionChar;
2138 char initializationChar;
2140 char regularizationChar;
2142 char outputFunctionChar;
2159 fNet = std::unique_ptr<DeepNetImpl_t>(
new DeepNetImpl_t(batchSize, inputDepth, inputHeight, inputWidth, batchDepth,
2160 batchHeight, batchWidth,
2173 for (
size_t i = 0; i < netDepth; i++) {
2178 if (layerName ==
"DenseLayer") {
2194 else if (layerName ==
"ConvLayer") {
2199 size_t fltHeight, fltWidth = 0;
2200 size_t strideRows, strideCols = 0;
2201 size_t padHeight, padWidth = 0;
2215 fNet->AddConvLayer(depth, fltHeight, fltWidth, strideRows, strideCols,
2216 padHeight, padWidth, actFunction);
2221 else if (layerName ==
"MaxPoolLayer") {
2224 size_t filterHeight, filterWidth = 0;
2225 size_t strideRows, strideCols = 0;
2231 fNet->AddMaxPoolLayer(filterHeight, filterWidth, strideRows, strideCols);
2234 else if (layerName ==
"ReshapeLayer") {
2248 else if (layerName ==
"RNNLayer") {
2251 size_t stateSize,inputSize, timeSteps = 0;
2252 int rememberState= 0;
2253 int returnSequence = 0;
2260 fNet->AddBasicRNNLayer(stateSize, inputSize, timeSteps, rememberState, returnSequence);
2264 else if (layerName ==
"LSTMLayer") {
2267 size_t stateSize,inputSize, timeSteps = 0;
2268 int rememberState, returnSequence = 0;
2275 fNet->AddBasicLSTMLayer(stateSize, inputSize, timeSteps, rememberState, returnSequence);
2279 else if (layerName ==
"GRULayer") {
2282 size_t stateSize,inputSize, timeSteps = 0;
2283 int rememberState, returnSequence, resetGateAfter = 0;
2293 "Cannot use a reset gate after to false with CudNN - use implementation with resetgate=true");
2295 fNet->AddBasicGRULayer(stateSize, inputSize, timeSteps, rememberState, returnSequence, resetGateAfter);
2298 else if (layerName ==
"BatchNormLayer") {
2300 fNet->AddBatchNormLayer(0., 0.0);
2303 fNet->GetLayers().back()->ReadWeightsFromXML(layerXML);
#define REGISTER_METHOD(CLASS)
for example
#define R__ASSERT(e)
Checks condition e and reports a fatal error if it's false.
winID h TVirtualViewer3D TVirtualGLPainter p
Option_t Option_t TPoint TPoint const char GetTextMagnitude GetFillStyle GetLineColor GetLineWidth GetMarkerStyle GetTextAlign GetTextColor GetTextSize void value
Option_t Option_t TPoint TPoint const char GetTextMagnitude GetFillStyle GetLineColor GetLineWidth GetMarkerStyle GetTextAlign GetTextColor GetTextSize void char Point_t Rectangle_t WindowAttributes_t Float_t Float_t Float_t Int_t Int_t UInt_t UInt_t Rectangle_t Int_t Int_t Window_t TString Int_t GCValues_t GetPrimarySelectionOwner GetDisplay GetScreen GetColormap GetNativeEvent const char const char dpyName wid window const char font_name cursor keysym reg const char only_if_exist regb h Point_t winding char text const char depth char const char Int_t count const char ColorStruct_t color const char Pixmap_t Pixmap_t PictureAttributes_t attr const char char ret_data h unsigned char height h Atom_t Int_t ULong_t ULong_t unsigned char prop_list Atom_t Atom_t Atom_t Time_t type
Option_t Option_t TPoint TPoint const char GetTextMagnitude GetFillStyle GetLineColor GetLineWidth GetMarkerStyle GetTextAlign GetTextColor GetTextSize void char Point_t Rectangle_t height
char * Form(const char *fmt,...)
Formats a string in a circular formatting buffer.
void Print(Option_t *option="") const override
Dump this line with its attributes.
OptionBase * DeclareOptionRef(T &ref, const TString &name, const TString &desc="")
void AddPreDefVal(const T &)
Adadelta Optimizer class.
static Tensor_t CreateTensor(size_t n, size_t c, size_t h, size_t w)
Generic Deep Neural Network class.
TBatchNormLayer< Architecture_t > * AddBatchNormLayer(Scalar_t momentum=-1, Scalar_t epsilon=0.0001)
Function for adding a Batch Normalization layer with given parameters.
TBasicGRULayer< Architecture_t > * AddBasicGRULayer(size_t stateSize, size_t inputSize, size_t timeSteps, bool rememberState=false, bool returnSequence=false, bool resetGateAfter=false)
Function for adding GRU Layer in the Deep Neural Network, with given parameters.
TDenseLayer< Architecture_t > * AddDenseLayer(size_t width, EActivationFunction f, Scalar_t dropoutProbability=1.0)
Function for adding Dense Connected Layer in the Deep Neural Network, with a given width,...
TBasicLSTMLayer< Architecture_t > * AddBasicLSTMLayer(size_t stateSize, size_t inputSize, size_t timeSteps, bool rememberState=false, bool returnSequence=false)
Function for adding LSTM Layer in the Deep Neural Network, with given parameters.
TMaxPoolLayer< Architecture_t > * AddMaxPoolLayer(size_t frameHeight, size_t frameWidth, size_t strideRows, size_t strideCols, Scalar_t dropoutProbability=1.0)
Function for adding Pooling layer in the Deep Neural Network, with a given filter height and width,...
TConvLayer< Architecture_t > * AddConvLayer(size_t depth, size_t filterHeight, size_t filterWidth, size_t strideRows, size_t strideCols, size_t paddingHeight, size_t paddingWidth, EActivationFunction f, Scalar_t dropoutProbability=1.0)
Function for adding Convolution layer in the Deep Neural Network, with a given depth,...
TReshapeLayer< Architecture_t > * AddReshapeLayer(size_t depth, size_t height, size_t width, bool flattening)
Function for adding Reshape Layer in the Deep Neural Network, with a given height and width.
TBasicRNNLayer< Architecture_t > * AddBasicRNNLayer(size_t stateSize, size_t inputSize, size_t timeSteps, bool rememberState=false, bool returnSequence=false, EActivationFunction f=EActivationFunction::kTanh)
Function for adding Recurrent Layer in the Deep Neural Network, with given parameters.
Stochastic Batch Gradient Descent Optimizer class.
Generic General Layer class.
virtual void Initialize()
Initialize the weights and biases according to the given initialization method.
Class that contains all the data information.
UInt_t GetNClasses() const
Types::ETreeType GetCurrentType() const
Long64_t GetNEvents(Types::ETreeType type=Types::kMaxTreeType) const
void SetCurrentEvent(Long64_t ievt) const
void SetTarget(UInt_t itgt, Float_t value)
set the target value (dimension itgt) to value
UInt_t GetNVariables() const
accessor to the number of variables
Virtual base Class for all MVA method.
const char * GetName() const
Bool_t IgnoreEventsWithNegWeightsInTraining() const
const std::vector< TMVA::Event * > & GetEventCollection(Types::ETreeType type)
returns the event collection (i.e.
UInt_t GetNTargets() const
const TString & GetMethodName() const
const Event * GetEvent() const
DataSetInfo & DataInfo() const
UInt_t GetNVariables() const
Types::EAnalysisType fAnalysisType
TrainingHistory fTrainHistory
IPythonInteractive * fInteractive
temporary dataset used when evaluating on a different data (used by MethodCategory::GetMvaValues)
typename ArchitectureImpl_t::Tensor_t TensorImpl_t
size_t fBatchHeight
The height of the batch used to train the deep net.
void GetHelpMessage() const
DNN::ELossFunction fLossFunction
The loss function.
std::vector< size_t > fInputShape
Contains the batch size (no.
TString fLayoutString
The string defining the layout of the deep net.
void SetInputDepth(int inputDepth)
Setters.
std::unique_ptr< MatrixImpl_t > fYHat
void Train()
Methods for training the deep learning network.
size_t GetBatchHeight() const
virtual std::vector< Double_t > GetMvaValues(Long64_t firstEvt, Long64_t lastEvt, Bool_t logProgress)
Evaluate the DeepNet on a vector of input values stored in the TMVA Event class Here we will evaluate...
TString fWeightInitializationString
The string defining the weight initialization method.
void ParseMaxPoolLayer(DNN::TDeepNet< Architecture_t, Layer_t > &deepNet, std::vector< DNN::TDeepNet< Architecture_t, Layer_t > > &nets, TString layerString, TString delim)
Pases the layer string and creates the appropriate max pool layer.
size_t fRandomSeed
The random seed used to initialize the weights and shuffling batches (default is zero)
virtual const std::vector< Float_t > & GetMulticlassValues()
TString fArchitectureString
The string defining the architecture: CPU or GPU.
void Init()
default initializations
MethodDL(const TString &jobName, const TString &methodTitle, DataSetInfo &theData, const TString &theOption)
Constructor.
void TrainDeepNet()
train of deep neural network using the defined architecture
const std::vector< TTrainingSettings > & GetTrainingSettings() const
DNN::EOutputFunction GetOutputFunction() const
void ParseDenseLayer(DNN::TDeepNet< Architecture_t, Layer_t > &deepNet, std::vector< DNN::TDeepNet< Architecture_t, Layer_t > > &nets, TString layerString, TString delim)
Pases the layer string and creates the appropriate dense layer.
UInt_t GetNumValidationSamples()
parce the validation string and return the number of event data used for validation
TString GetBatchLayoutString() const
void SetInputWidth(int inputWidth)
HostBufferImpl_t fXInputBuffer
size_t fBatchWidth
The width of the batch used to train the deep net.
size_t GetInputDepth() const
std::unique_ptr< DeepNetImpl_t > fNet
TString GetInputLayoutString() const
void SetBatchHeight(size_t batchHeight)
size_t GetInputHeight() const
TString GetArchitectureString() const
void ParseBatchLayout()
Parse the input layout.
void ParseBatchNormLayer(DNN::TDeepNet< Architecture_t, Layer_t > &deepNet, std::vector< DNN::TDeepNet< Architecture_t, Layer_t > > &nets, TString layerString, TString delim)
Pases the layer string and creates the appropriate reshape layer.
void ReadWeightsFromStream(std::istream &)
void ReadWeightsFromXML(void *wghtnode)
TString fNumValidationString
The string defining the number (or percentage) of training data used for validation.
std::vector< std::map< TString, TString > > KeyValueVector_t
DNN::EOutputFunction fOutputFunction
The output function for making the predictions.
DNN::EInitialization fWeightInitialization
The initialization method.
size_t GetBatchDepth() const
void ParseRecurrentLayer(ERecurrentLayerType type, DNN::TDeepNet< Architecture_t, Layer_t > &deepNet, std::vector< DNN::TDeepNet< Architecture_t, Layer_t > > &nets, TString layerString, TString delim)
Pases the layer string and creates the appropriate rnn layer.
std::vector< TTrainingSettings > fTrainingSettings
The vector defining each training strategy.
size_t GetInputWidth() const
void SetInputShape(std::vector< size_t > inputShape)
DNN::ELossFunction GetLossFunction() const
TString fBatchLayoutString
The string defining the layout of the batch.
Bool_t HasAnalysisType(Types::EAnalysisType type, UInt_t numberClasses, UInt_t numberTargets)
Check the type of analysis the deep learning network can do.
void ParseConvLayer(DNN::TDeepNet< Architecture_t, Layer_t > &deepNet, std::vector< DNN::TDeepNet< Architecture_t, Layer_t > > &nets, TString layerString, TString delim)
Pases the layer string and creates the appropriate convolutional layer.
void ParseReshapeLayer(DNN::TDeepNet< Architecture_t, Layer_t > &deepNet, std::vector< DNN::TDeepNet< Architecture_t, Layer_t > > &nets, TString layerString, TString delim)
Pases the layer string and creates the appropriate reshape layer.
virtual const std::vector< Float_t > & GetRegressionValues()
TString fTrainingStrategyString
The string defining the training strategy.
const Ranking * CreateRanking()
typename ArchitectureImpl_t::HostBuffer_t HostBufferImpl_t
void SetBatchDepth(size_t batchDepth)
KeyValueVector_t ParseKeyValueString(TString parseString, TString blockDelim, TString tokenDelim)
Function for parsing the training settings, provided as a string in a key-value form.
void SetBatchWidth(size_t batchWidth)
std::vector< Double_t > PredictDeepNet(Long64_t firstEvt, Long64_t lastEvt, size_t batchSize, Bool_t logProgress)
perform prediction of the deep neural network using batches (called by GetMvaValues)
DNN::EInitialization GetWeightInitialization() const
void SetBatchSize(size_t batchSize)
TString GetLayoutString() const
size_t fBatchDepth
The depth of the batch used to train the deep net.
TMVA::DNN::TDeepNet< ArchitectureImpl_t > DeepNetImpl_t
size_t GetBatchWidth() const
void AddWeightsXMLTo(void *parent) const
typename ArchitectureImpl_t::Matrix_t MatrixImpl_t
virtual ~MethodDL()
Virtual Destructor.
Double_t GetMvaValue(Double_t *err=nullptr, Double_t *errUpper=nullptr)
void ParseInputLayout()
Parse the input layout.
void FillInputTensor()
Get the input event tensor for evaluation Internal function to fill the fXInput tensor with the corre...
bool fBuildNet
Flag to control whether to build fNet, the stored network used for the evaluation.
void SetInputHeight(int inputHeight)
void CreateDeepNet(DNN::TDeepNet< Architecture_t, Layer_t > &deepNet, std::vector< DNN::TDeepNet< Architecture_t, Layer_t > > &nets)
After calling the ProcesOptions(), all of the options are parsed, so using the parsed options,...
TString fErrorStrategy
The string defining the error strategy for training.
void DeclareOptions()
The option handling methods.
TString fInputLayoutString
The string defining the layout of the input.
EMsgType GetMinType() const
Ranking for variables in method (implementation)
Timing information for training and evaluation of MVA methods.
TString GetElapsedTime(Bool_t Scientific=kTRUE)
returns pretty string with elapsed time
void AddValue(TString Property, Int_t stage, Double_t value)
Singleton class for Global types used by TMVA.
void Print(Option_t *option="") const override
Dump this marker with its attributes.
void Print(Option_t *name="") const override
Print the matrix as a table of elements.
void Print(Option_t *option="") const override
Print TNamed name and title.
Collectable string class.
const TString & GetString() const
virtual void Warning(const char *method, const char *msgfmt,...) const
Issue warning message.
virtual void Error(const char *method, const char *msgfmt,...) const
Issue error message.
virtual void Print(Option_t *option="") const
This method must be overridden when a class wants to print itself.
Int_t Atoi() const
Return integer value of string.
TSubString Strip(EStripType s=kTrailing, char c=' ') const
Return a substring of self stripped at beginning and/or end.
Bool_t IsFloat() const
Returns kTRUE if string contains a floating point or integer number.
Ssiz_t First(char c) const
Find first occurrence of a character c.
const char * Data() const
TString & ReplaceAll(const TString &s1, const TString &s2)
void ToUpper()
Change string to upper case.
TObjArray * Tokenize(const TString &delim) const
This function is used to isolate sequential tokens in a TString.
static TString Format(const char *fmt,...)
Static method which formats a string using a printf style format descriptor and return a TString.
XMLNodePointer_t NewChild(XMLNodePointer_t parent, XMLNsPointer_t ns, const char *name, const char *content=nullptr)
create new child element for parent node
XMLNodePointer_t GetChild(XMLNodePointer_t xmlnode, Bool_t realnode=kTRUE)
returns first child of xmlnode
const char * GetNodeName(XMLNodePointer_t xmlnode)
returns name of xmlnode
EOptimizer
Enum representing the optimizer used for training.
EOutputFunction
Enum that represents output functions.
double weightDecay(double error, ItWeight itWeight, ItWeight itWeightEnd, double factorWeightDecay, EnumRegularization eRegularization)
compute the weight decay for regularization (L1 or L2)
auto regularization(const typename Architecture_t::Matrix_t &A, ERegularization R) -> decltype(Architecture_t::L1Regularization(A))
Evaluate the regularization functional for a given weight matrix.
ERegularization
Enum representing the regularization type applied for a given layer.
EActivationFunction
Enum that represents layer activation functions.
ELossFunction
Enum that represents objective functions for the net, i.e.
std::tuple< const std::vector< Event * > &, const DataSetInfo & > TMVAInput_t
create variable transformations
TString fetchValueTmp(const std::map< TString, TString > &keyValueMap, TString key)
MsgLogger & Endl(MsgLogger &ml)
Double_t Log(Double_t x)
Returns the natural logarithm of x.
All of the options that can be specified in the training string.
std::map< TString, double > optimizerParams
DNN::EOptimizer optimizer
DNN::ERegularization regularization
std::vector< Double_t > dropoutProbabilities