77 std::map<TString, TString>::const_iterator it = keyValueMap.find(key);
78 if (it == keyValueMap.end()) {
131 if (value ==
"TRUE" || value ==
"T" || value ==
"1") {
141 std::vector<double> defaultValue)
144 if (parseString ==
"") {
149 std::vector<double> values;
153 TIter nextToken(tokenStrings);
155 for (; tokenString != NULL; tokenString = (
TObjString *)nextToken()) {
156 std::stringstream sstr;
159 sstr >> currentValue;
160 values.push_back(currentValue);
177 " or cross entropy (binary classification).");
193 "Specify as 0.2 or 20% to use a fifth of the data set as validation set. "
194 "Specify as 100 to use exactly 100 events. (Default: 20%)");
206 "ConvergenceSteps=100,"
212 "Regularization=None,"
214 "TrainingStrategy",
"Defines the training strategies.");
222 Log() << kINFO <<
"Will ignore negative events in training!" <<
Endl;
226 Log() << kWARNING <<
"The STANDARD architecture is not supported anymore. "
227 "Please use Architecture=CPU or Architecture=CPU."
228 "See the TMVA Users' Guide for instructions if you "
229 "encounter problems."
231 Log() << kINFO <<
"We will use instead the CPU architecture" <<
Endl;
235 Log() << kERROR <<
"The OPENCL architecture has not been implemented yet. "
236 "Please use Architecture=CPU or Architecture=CPU for the "
237 "time being. See the TMVA Users' Guide for instructions "
238 "if you encounter problems."
241 Log() << kINFO <<
"We will try using the GPU-CUDA architecture if available" <<
Endl;
250 Log() << kINFO <<
"Will now use the GPU architecture !" <<
Endl;
252 Log() << kERROR <<
"CUDA backend not enabled. Please make sure "
253 "you have CUDA installed and it was successfully "
254 "detected by CMAKE by using -Dtmva-gpu=On "
257 Log() << kINFO <<
"Will now use instead the CPU architecture !" <<
Endl;
263 Log() << kINFO <<
"Will now use the CPU architecture with BLAS and IMT support !" <<
Endl;
265 Log() << kINFO <<
"Multi-core CPU backend not enabled. For better performances, make sure "
266 "you have a BLAS implementation and it was successfully "
267 "detected by CMake as well that the imt CMake flag is set."
269 Log() << kINFO <<
"Will use anyway the CPU architecture but with slower performance" <<
Endl;
289 Log() << kWARNING <<
"For regression only SUMOFSQUARES is a valid "
290 <<
" neural net error function. Setting error function to "
291 <<
" SUMOFSQUARES now." <<
Endl;
330 for (
auto &block : strategyKeyValues) {
353 if (optimizer ==
"SGD") {
355 }
else if (optimizer ==
"ADAM") {
357 }
else if (optimizer ==
"ADAGRAD") {
359 }
else if (optimizer ==
"RMSPROP") {
361 }
else if (optimizer ==
"ADADELTA") {
370 std::vector<TString> optimParamLabels = {
"_beta1",
"_beta2",
"_eps",
"_rho"};
372 std::map<TString, double> defaultValues = {
373 {
"ADADELTA_eps", 1.E-8}, {
"ADADELTA_rho", 0.95},
374 {
"ADAGRAD_eps", 1.E-8},
375 {
"ADAM_beta1", 0.9}, {
"ADAM_beta2", 0.999}, {
"ADAM_eps", 1.E-7},
376 {
"RMSPROP_eps", 1.E-7}, {
"RMSPROP_rho", 0.9},
378 for (
auto &pN : optimParamLabels) {
381 if (defaultValues.count(optimParamName) > 0) {
382 double defValue = defaultValues[optimParamName];
448 TIter nextInputDim(inputDimStrings);
454 std::vector<size_t> inputShape;
455 inputShape.reserve(inputLayoutString.
Length()/2 + 2);
456 inputShape.push_back(0);
457 for (; inputDimString !=
nullptr; inputDimString = (
TObjString *)nextInputDim()) {
462 inputShape.push_back(subDim);
467 if (inputShape.size() == 2) {
469 inputShape = {inputShape[0], 1, 1, inputShape[1]};
471 else if (inputShape.size() == 3) {
473 inputShape = {inputShape[0], inputShape[1], 1, inputShape[2]};
489 size_t batchDepth = 0;
490 size_t batchHeight = 0;
491 size_t batchWidth = 0;
495 TIter nextBatchDim(batchDimStrings);
499 for (; batchDimString !=
nullptr; batchDimString = (
TObjString *)nextBatchDim()) {
504 batchDepth = (size_t)strDepth.
Atoi();
509 batchHeight = (size_t)strHeight.
Atoi();
514 batchWidth = (size_t)strWidth.
Atoi();
527template <
typename Architecture_t,
typename Layer_t>
532 const TString layerDelimiter(
",");
533 const TString subDelimiter(
"|");
541 TIter nextLayer(layerStrings);
545 for (; layerString !=
nullptr; layerString = (
TObjString *)nextLayer()) {
549 TIter nextToken(subStrings);
556 if (strLayerType ==
"DENSE") {
558 }
else if (strLayerType ==
"CONV") {
560 }
else if (strLayerType ==
"MAXPOOL") {
562 }
else if (strLayerType ==
"RESHAPE") {
564 }
else if (strLayerType ==
"BNORM") {
566 }
else if (strLayerType ==
"RNN") {
568 }
else if (strLayerType ==
"LSTM") {
570 }
else if (strLayerType ==
"GRU") {
581template <
typename Architecture_t,
typename Layer_t>
592 const size_t inputSize =
GetNvar();
596 TIter nextToken(subStrings);
602 for (; token !=
nullptr; token = (
TObjString *)nextToken()) {
606 if (strActFnc ==
"DENSE")
continue;
608 if (strActFnc ==
"RELU") {
610 }
else if (strActFnc ==
"TANH") {
612 }
else if (strActFnc ==
"FTANH") {
614 }
else if (strActFnc ==
"SYMMRELU") {
616 }
else if (strActFnc ==
"SOFTSIGN") {
618 }
else if (strActFnc ==
"SIGMOID") {
620 }
else if (strActFnc ==
"LINEAR") {
622 }
else if (strActFnc ==
"GAUSS") {
624 }
else if (width == 0) {
628 TString strNumNodes = strActFnc;
634 width = fml.
Eval(inputSize);
639 size_t outputSize = 1;
645 if (width == 0) width = outputSize;
652 if (
fBuildNet)
fNet->AddDenseLayer(width, activationFunction);
667template <
typename Architecture_t,
typename Layer_t>
677 int zeroPadHeight = 0;
678 int zeroPadWidth = 0;
683 TIter nextToken(subStrings);
687 for (; token !=
nullptr; token = (
TObjString *)nextToken()) {
692 depth = strDepth.
Atoi();
697 fltHeight = strFltHeight.
Atoi();
702 fltWidth = strFltWidth.
Atoi();
707 strideRows = strStrideRows.
Atoi();
712 strideCols = strStrideCols.
Atoi();
717 zeroPadHeight = strZeroPadHeight.
Atoi();
722 zeroPadWidth = strZeroPadWidth.
Atoi();
727 if (strActFnc ==
"RELU") {
729 }
else if (strActFnc ==
"TANH") {
731 }
else if (strActFnc ==
"SYMMRELU") {
733 }
else if (strActFnc ==
"SOFTSIGN") {
735 }
else if (strActFnc ==
"SIGMOID") {
737 }
else if (strActFnc ==
"LINEAR") {
739 }
else if (strActFnc ==
"GAUSS") {
749 zeroPadHeight, zeroPadWidth, activationFunction);
753 if (
fBuildNet)
fNet->AddConvLayer(depth, fltHeight, fltWidth, strideRows, strideCols,
754 zeroPadHeight, zeroPadWidth, activationFunction);
766template <
typename Architecture_t,
typename Layer_t>
772 int filterHeight = 0;
779 TIter nextToken(subStrings);
783 for (; token !=
nullptr; token = (
TObjString *)nextToken()) {
788 filterHeight = strFrmHeight.
Atoi();
793 filterWidth = strFrmWidth.
Atoi();
798 strideRows = strStrideRows.
Atoi();
803 strideCols = strStrideCols.
Atoi();
811 deepNet.
AddMaxPoolLayer(filterHeight, filterWidth, strideRows, strideCols);
814 if (
fBuildNet)
fNet->AddMaxPoolLayer(filterHeight, filterWidth, strideRows, strideCols);
827template <
typename Architecture_t,
typename Layer_t>
835 bool flattening =
false;
839 TIter nextToken(subStrings);
843 for (; token !=
nullptr; token = (
TObjString *)nextToken()) {
844 if (token->
GetString() ==
"FLAT") idxToken=4;
848 depth = strDepth.
Atoi();
853 height = strHeight.
Atoi();
858 width = strWidth.
Atoi();
863 if (flat ==
"FLAT") {
876 if (
fBuildNet)
fNet->AddReshapeLayer(depth, height, width, flattening);
888template <
typename Architecture_t,
typename Layer_t>
895 double momentum = -1;
896 double epsilon = 0.0001;
900 TIter nextToken(subStrings);
904 for (; token !=
nullptr; token = (
TObjString *)nextToken()) {
929template <
typename Architecture_t,
typename Layer_t>
938 bool rememberState =
false;
939 bool returnSequence =
false;
940 bool resetGateAfter =
false;
944 TIter nextToken(subStrings);
948 for (; token !=
nullptr; token = (
TObjString *)nextToken()) {
953 stateSize = strstateSize.
Atoi();
959 inputSize = strinputSize.
Atoi();
965 timeSteps = strtimeSteps.
Atoi();
971 rememberState = (
bool) strrememberState.
Atoi();
977 returnSequence = (
bool)str.Atoi();
983 resetGateAfter = (
bool)str.Atoi();
991 auto * recurrentLayer = deepNet.
AddBasicRNNLayer(stateSize, inputSize, timeSteps, rememberState, returnSequence);
992 recurrentLayer->Initialize();
994 if (
fBuildNet)
fNet->AddBasicRNNLayer(stateSize, inputSize, timeSteps, rememberState, returnSequence);
997 auto *recurrentLayer = deepNet.
AddBasicLSTMLayer(stateSize, inputSize, timeSteps, rememberState, returnSequence);
998 recurrentLayer->Initialize();
1001 fNet->AddBasicLSTMLayer(stateSize, inputSize, timeSteps, rememberState, returnSequence);
1004 if (Architecture_t::IsCudnn()) resetGateAfter =
true;
1005 auto *recurrentLayer = deepNet.
AddBasicGRULayer(stateSize, inputSize, timeSteps, rememberState, returnSequence, resetGateAfter);
1006 recurrentLayer->Initialize();
1009 fNet->AddBasicGRULayer(stateSize, inputSize, timeSteps, rememberState, returnSequence, resetGateAfter);
1012 Log() << kFATAL <<
"Invalid Recurrent layer type " <<
Endl;
1054 parseString.ReplaceAll(
" ",
"");
1056 const TString keyValueDelim(
"=");
1058 TObjArray *blockStrings = parseString.Tokenize(blockDelim);
1059 TIter nextBlock(blockStrings);
1062 for (; blockString !=
nullptr; blockString = (
TObjString *)nextBlock()) {
1063 blockKeyValues.push_back(std::map<TString, TString>());
1064 std::map<TString, TString> ¤tBlock = blockKeyValues.back();
1067 TIter nextToken(subStrings);
1070 for (; token !=
nullptr; token = (
TObjString *)nextToken()) {
1072 int delimPos = strKeyValue.
First(keyValueDelim.
Data());
1073 if (delimPos <= 0)
continue;
1082 currentBlock.insert(std::make_pair(strKey, strValue));
1085 return blockKeyValues;
1109 Int_t nValidationSamples = 0;
1114 if (fNumValidationString.EndsWith(
"%")) {
1119 Double_t valSizeAsDouble = fNumValidationString.Atof() / 100.0;
1120 nValidationSamples = GetEventCollection(
Types::kTraining).size() * valSizeAsDouble;
1122 Log() << kFATAL <<
"Cannot parse number \"" << fNumValidationString
1123 <<
"\". Expected string like \"20%\" or \"20.0%\"." <<
Endl;
1125 }
else if (fNumValidationString.IsFloat()) {
1126 Double_t valSizeAsDouble = fNumValidationString.Atof();
1128 if (valSizeAsDouble < 1.0) {
1130 nValidationSamples = GetEventCollection(
Types::kTraining).size() * valSizeAsDouble;
1133 nValidationSamples = valSizeAsDouble;
1136 Log() <<
kFATAL <<
"Cannot parse number \"" << fNumValidationString <<
"\". Expected string like \"0.2\" or \"100\"."
1142 if (nValidationSamples < 0) {
1143 Log() <<
kFATAL <<
"Validation size \"" << fNumValidationString <<
"\" is negative." <<
Endl;
1146 if (nValidationSamples == 0) {
1147 Log() <<
kFATAL <<
"Validation size \"" << fNumValidationString <<
"\" is zero." <<
Endl;
1150 if (nValidationSamples >= (
Int_t)trainingSetSize) {
1151 Log() <<
kFATAL <<
"Validation size \"" << fNumValidationString
1152 <<
"\" is larger than or equal in size to training set (size=\"" << trainingSetSize <<
"\")." <<
Endl;
1155 return nValidationSamples;
1162template <
typename Architecture_t>
1166 using Scalar_t =
typename Architecture_t::Scalar_t;
1184 const std::vector<TMVA::Event *> eventCollectionTraining{allData.begin(), allData.begin() + nTrainingSamples};
1185 const std::vector<TMVA::Event *> eventCollectionValidation{allData.begin() + nTrainingSamples, allData.end()};
1187 size_t trainingPhase = 1;
1191 size_t nThreads = 1;
1195 size_t batchSize = settings.batchSize;
1216 if (batchDepth != batchSize && batchDepth > 1) {
1217 Error(
"Train",
"Given batch depth of %zu (specified in BatchLayout) should be equal to given batch size %zu",batchDepth,batchSize);
1220 if (batchDepth == 1 && batchSize > 1 && batchSize != batchHeight ) {
1221 Error(
"Train",
"Given batch height of %zu (specified in BatchLayout) should be equal to given batch size %zu",batchHeight,batchSize);
1227 bool badLayout =
false;
1229 if (batchDepth == batchSize)
1230 badLayout = ( inputDepth * inputHeight * inputWidth != batchHeight * batchWidth ) ;
1232 if (batchHeight == batchSize && batchDepth == 1)
1233 badLayout |= ( inputDepth * inputHeight * inputWidth != batchWidth);
1235 Error(
"Train",
"Given input layout %zu x %zu x %zu is not compatible with batch layout %zu x %zu x %zu ",
1236 inputDepth,inputHeight,inputWidth,batchDepth,batchHeight,batchWidth);
1241 if (nTrainingSamples < settings.batchSize || nValidationSamples < settings.batchSize) {
1242 Log() << kFATAL <<
"Number of samples in the datasets are train: ("
1243 << nTrainingSamples <<
") test: (" << nValidationSamples
1244 <<
"). One of these is smaller than the batch size of "
1245 << settings.batchSize <<
". Please increase the batch"
1246 <<
" size to be at least the same size as the smallest"
1247 <<
" of them." <<
Endl;
1250 DeepNet_t deepNet(batchSize, inputDepth, inputHeight, inputWidth, batchDepth, batchHeight, batchWidth, J,
I,
R,
weightDecay);
1254 if (trainingPhase == 1) {
1255 fNet = std::unique_ptr<DeepNetImpl_t>(
new DeepNetImpl_t(1, inputDepth, inputHeight, inputWidth, batchDepth,
1263 std::vector<DeepNet_t> nets{};
1264 nets.reserve(nThreads);
1265 for (
size_t i = 0; i < nThreads; i++) {
1267 nets.push_back(deepNet);
1277 std::vector<Double_t> dropoutVector(settings.dropoutProbabilities);
1278 for (
auto & p : dropoutVector) {
1281 deepNet.SetDropoutProbabilities(dropoutVector);
1283 if (trainingPhase > 1) {
1285 for (
size_t i = 0; i < deepNet.GetDepth(); ++i) {
1286 deepNet.GetLayerAt(i)->CopyParameters(*
fNet->GetLayerAt(i));
1307 Log() <<
"***** Deep Learning Network *****" <<
Endl;
1308 if (
Log().GetMinType() <= kINFO)
1311 Log() <<
"Using " << nTrainingSamples <<
" events for training and " << nValidationSamples <<
" for testing" <<
Endl;
1315 TensorDataLoader_t trainingData(trainingTuple, nTrainingSamples, batchSize,
1316 {inputDepth, inputHeight, inputWidth},
1317 {deepNet.GetBatchDepth(), deepNet.GetBatchHeight(), deepNet.GetBatchWidth()} ,
1318 deepNet.GetOutputWidth(), nThreads);
1321 TensorDataLoader_t validationData(validationTuple, nValidationSamples, batchSize,
1322 {inputDepth, inputHeight, inputWidth},
1323 { deepNet.GetBatchDepth(),deepNet.GetBatchHeight(), deepNet.GetBatchWidth()} ,
1324 deepNet.GetOutputWidth(), nThreads);
1333 Log() <<
"Compute initial loss on the validation data " <<
Endl;
1334 for (
auto batch : validationData) {
1335 auto inputTensor = batch.GetInput();
1336 auto outputMatrix = batch.GetOutput();
1337 auto weights = batch.GetWeights();
1341 minValError += deepNet.Loss(inputTensor, outputMatrix, weights,
false, includeRegularization);
1344 Double_t regzTerm = (includeRegularization) ? deepNet.RegularizationTerm() : 0.0;
1345 minValError /= (
Double_t)(nValidationSamples / settings.batchSize);
1346 minValError += regzTerm;
1350 std::unique_ptr<DNN::VOptimizer<Architecture_t, Layer_t, DeepNet_t>> optimizer;
1356 optimizer = std::unique_ptr<DNN::TSGD<Architecture_t, Layer_t, DeepNet_t>>(
1361 optimizer = std::unique_ptr<DNN::TAdam<Architecture_t, Layer_t, DeepNet_t>>(
1363 deepNet, settings.learningRate, settings.optimizerParams[
"ADAM_beta1"],
1364 settings.optimizerParams[
"ADAM_beta2"], settings.optimizerParams[
"ADAM_eps"]));
1369 optimizer = std::unique_ptr<DNN::TAdagrad<Architecture_t, Layer_t, DeepNet_t>>(
1371 settings.optimizerParams[
"ADAGRAD_eps"]));
1375 optimizer = std::unique_ptr<DNN::TRMSProp<Architecture_t, Layer_t, DeepNet_t>>(
1377 settings.optimizerParams[
"RMSPROP_rho"],
1378 settings.optimizerParams[
"RMSPROP_eps"]));
1382 optimizer = std::unique_ptr<DNN::TAdadelta<Architecture_t, Layer_t, DeepNet_t>>(
1384 settings.optimizerParams[
"ADADELTA_rho"],
1385 settings.optimizerParams[
"ADADELTA_eps"]));
1391 std::vector<TTensorBatch<Architecture_t>> batches{};
1393 bool converged =
false;
1394 size_t convergenceCount = 0;
1395 size_t batchesInEpoch = nTrainingSamples / deepNet.GetBatchSize();
1398 std::chrono::time_point<std::chrono::system_clock> tstart, tend;
1399 tstart = std::chrono::system_clock::now();
1402 auto optimParametersString = [&]() {
1404 for (
auto & element : settings.optimizerParams) {
1406 key.
ReplaceAll(settings.optimizerName +
"_",
"");
1407 double value = element.second;
1408 if (!optimParameters.
IsNull())
1409 optimParameters +=
",";
1411 optimParameters +=
" (";
1414 if (!optimParameters.
IsNull())
1415 optimParameters +=
")";
1416 return optimParameters;
1420 <<
" Optimizer " << settings.optimizerName
1421 << optimParametersString()
1422 <<
" Learning rate = " << settings.learningRate <<
" regularization " << (char)settings.regularization
1423 <<
" minimum error = " << minValError <<
Endl;
1425 std::string separator(62,
'-');
1427 Log() << std::setw(10) <<
"Epoch"
1428 <<
" | " << std::setw(12) <<
"Train Err." << std::setw(12) <<
"Val. Err." << std::setw(12)
1429 <<
"t(s)/epoch" << std::setw(12) <<
"t(s)/Loss" << std::setw(12) <<
"nEvents/s" << std::setw(12)
1430 <<
"Conv. Steps" <<
Endl;
1436 size_t shuffleSeed = 0;
1442 Log() <<
"Initial Deep Net Weights " <<
Endl;
1443 auto & weights_tensor = deepNet.GetLayerAt(0)->GetWeights();
1444 for (
size_t l = 0;
l < weights_tensor.size(); ++
l)
1445 weights_tensor[
l].
Print();
1446 auto & bias_tensor = deepNet.GetLayerAt(0)->GetBiases();
1447 bias_tensor[0].Print();
1450 Log() <<
" Start epoch iteration ..." <<
Endl;
1451 bool debugFirstEpoch =
false;
1452 bool computeLossInTraining =
true;
1453 size_t nTrainEpochs = 0;
1454 while (!converged) {
1456 trainingData.Shuffle(rng);
1462 for (
size_t i = 0; i < batchesInEpoch; ++i ) {
1469 if (debugFirstEpoch) std::cout <<
"\n\n----- batch # " << i <<
"\n\n";
1471 auto my_batch = trainingData.GetTensorBatch();
1473 if (debugFirstEpoch)
1474 std::cout <<
"got batch data - doing forward \n";
1478 Architecture_t::PrintTensor(my_batch.GetInput(),
"input tensor",
true);
1479 typename Architecture_t::Tensor_t tOut(my_batch.GetOutput());
1480 typename Architecture_t::Tensor_t tW(my_batch.GetWeights());
1481 Architecture_t::PrintTensor(tOut,
"label tensor",
true) ;
1482 Architecture_t::PrintTensor(tW,
"weight tensor",
true) ;
1485 deepNet.Forward(my_batch.GetInput(),
true);
1487 if (computeLossInTraining) {
1488 auto outputMatrix = my_batch.GetOutput();
1489 auto weights = my_batch.GetWeights();
1490 trainingError += deepNet.Loss(outputMatrix, weights,
false);
1493 if (debugFirstEpoch)
1494 std::cout <<
"- doing backward \n";
1497 size_t nlayers = deepNet.GetLayers().size();
1498 for (
size_t l = 0;
l < nlayers; ++
l) {
1499 if (deepNet.GetLayerAt(
l)->GetWeights().size() > 0)
1500 Architecture_t::PrintTensor(deepNet.GetLayerAt(
l)->GetWeightsAt(0),
1503 Architecture_t::PrintTensor(deepNet.GetLayerAt(
l)->GetOutput(),
1510 deepNet.Backward(my_batch.GetInput(), my_batch.GetOutput(), my_batch.GetWeights());
1512 if (debugFirstEpoch)
1513 std::cout <<
"- doing optimizer update \n";
1516 optimizer->IncrementGlobalStep();
1520 std::cout <<
"minmimizer step - momentum " << settings.momentum <<
" learning rate " << optimizer->GetLearningRate() << std::endl;
1521 for (
size_t l = 0;
l < nlayers; ++
l) {
1522 if (deepNet.GetLayerAt(
l)->GetWeights().size() > 0) {
1523 Architecture_t::PrintTensor(deepNet.GetLayerAt(
l)->GetWeightsAt(0),
TString::Format(
"weights after step layer %d",
l).Data());
1524 Architecture_t::PrintTensor(deepNet.GetLayerAt(
l)->GetWeightGradientsAt(0),
"weight gradients");
1531 if (debugFirstEpoch) std::cout <<
"\n End batch loop - compute validation loss \n";
1533 debugFirstEpoch =
false;
1534 if ((nTrainEpochs % settings.testInterval) == 0) {
1536 std::chrono::time_point<std::chrono::system_clock>
t1,t2;
1538 t1 = std::chrono::system_clock::now();
1544 bool inTraining =
false;
1545 for (
auto batch : validationData) {
1546 auto inputTensor = batch.GetInput();
1547 auto outputMatrix = batch.GetOutput();
1548 auto weights = batch.GetWeights();
1550 valError += deepNet.Loss(inputTensor, outputMatrix, weights, inTraining, includeRegularization);
1553 Double_t regTerm = (includeRegularization) ? deepNet.RegularizationTerm() : 0.0;
1554 valError /= (
Double_t)(nValidationSamples / settings.batchSize);
1555 valError += regTerm;
1560 t2 = std::chrono::system_clock::now();
1563 if (valError < minValError) {
1564 convergenceCount = 0;
1566 convergenceCount += settings.testInterval;
1570 if (valError < minValError ) {
1572 Log() << std::setw(10) << nTrainEpochs
1573 <<
" Minimum Test error found - save the configuration " <<
Endl;
1574 for (
size_t i = 0; i < deepNet.GetDepth(); ++i) {
1575 fNet->GetLayerAt(i)->CopyParameters(*deepNet.GetLayerAt(i));
1584 minValError = valError;
1586 else if ( minValError <= 0. )
1587 minValError = valError;
1589 if (!computeLossInTraining) {
1590 trainingError = 0.0;
1592 for (
auto batch : trainingData) {
1593 auto inputTensor = batch.GetInput();
1594 auto outputMatrix = batch.GetOutput();
1595 auto weights = batch.GetWeights();
1596 trainingError += deepNet.Loss(inputTensor, outputMatrix, weights,
false,
false);
1600 trainingError /= (
Double_t)(nTrainingSamples / settings.batchSize);
1601 trainingError += regTerm;
1604 fTrainHistory.AddValue(
"trainingError",nTrainEpochs,trainingError);
1607 tend = std::chrono::system_clock::now();
1610 std::chrono::duration<double> elapsed_seconds = tend - tstart;
1611 std::chrono::duration<double> elapsed1 =
t1-tstart;
1614 std::chrono::duration<double> elapsed_testing = tend-
t1;
1616 double seconds = elapsed_seconds.count();
1619 double eventTime = elapsed1.count()/( batchesInEpoch * settings.testInterval * settings.batchSize);
1622 convergenceCount > settings.convergenceSteps || nTrainEpochs >= settings.maxEpochs;
1625 Log() << std::setw(10) << nTrainEpochs <<
" | "
1626 << std::setw(12) << trainingError
1627 << std::setw(12) << valError
1628 << std::setw(12) << seconds / settings.testInterval
1629 << std::setw(12) << elapsed_testing.count()
1630 << std::setw(12) << 1. / eventTime
1631 << std::setw(12) << convergenceCount
1637 tstart = std::chrono::system_clock::now();
1641 if (converged && debug) {
1642 Log() <<
"Final Deep Net Weights for phase " << trainingPhase <<
" epoch " << nTrainEpochs
1644 auto & weights_tensor = deepNet.GetLayerAt(0)->GetWeights();
1645 auto & bias_tensor = deepNet.GetLayerAt(0)->GetBiases();
1646 for (
size_t l = 0;
l < weights_tensor.size(); ++
l)
1647 weights_tensor[
l].
Print();
1648 bias_tensor[0].Print();
1661 Log() << kFATAL <<
"Not implemented yet" <<
Endl;
1667#ifdef R__HAS_TMVAGPU
1668 Log() << kINFO <<
"Start of deep neural network training on GPU." <<
Endl <<
Endl;
1675 Log() << kFATAL <<
"CUDA backend not enabled. Please make sure "
1676 "you have CUDA installed and it was successfully "
1677 "detected by CMAKE."
1682#ifdef R__HAS_TMVACPU
1685 Log() << kINFO <<
"Start of deep neural network training on CPU using MT, nthreads = "
1688 Log() << kINFO <<
"Start of deep neural network training on single thread CPU (without ROOT-MT support) " <<
Endl
1696 " is not a supported architecture for TMVA::MethodDL"
1712 if (!
fNet ||
fNet->GetDepth() == 0) {
1713 Log() << kFATAL <<
"The network has not been trained and fNet is not built" <<
Endl;
1715 if (
fNet->GetBatchSize() != 1) {
1716 Log() << kFATAL <<
"FillINputTensor::Network batch size must be equal to 1 when doing single event predicition" <<
Endl;
1720 const std::vector<Float_t> &inputValues =
GetEvent()->GetValues();
1721 size_t nVariables =
GetEvent()->GetNVariables();
1724 if (
fXInput.GetLayout() == TMVA::Experimental::MemoryLayout::ColumnMajor) {
1727 if (
fXInput.GetShape().size() == 2) {
1731 Log() << kFATAL <<
"First tensor dimension should be equal to batch size, i.e. = 1" <<
Endl;
1738 if (nVariables != nc * nhw) {
1739 Log() << kFATAL <<
"Input Event variable dimensions are not compatible with the built network architecture"
1740 <<
" n-event variables " << nVariables <<
" expected input tensor " << nc <<
" x " << nhw <<
Endl;
1742 for (
size_t j = 0; j < nc; j++) {
1743 for (
size_t k = 0; k < nhw; k++) {
1750 assert(
fXInput.GetShape().size() >= 4);
1751 size_t nc =
fXInput.GetCSize();
1752 size_t nh =
fXInput.GetHSize();
1753 size_t nw =
fXInput.GetWSize();
1754 size_t n = nc * nh * nw;
1755 if (nVariables !=
n) {
1756 Log() << kFATAL <<
"Input Event variable dimensions are not compatible with the built network architecture"
1757 <<
" n-event variables " << nVariables <<
" expected input tensor " << nc <<
" x " << nh <<
" x " << nw
1760 for (
size_t j = 0; j <
n; j++) {
1780 double mvaValue = (*fYHat)(0, 0);
1783#ifdef DEBUG_MVAVALUE
1784 using Tensor_t = std::vector<MatrixImpl_t>;
1785 TMatrixF xInput(n1,n2, inputValues.data() );
1788 std::cout <<
"Output of DeepNet " << mvaValue << std::endl;
1789 auto & deepnet = *
fNet;
1790 std::cout <<
"Loop on layers " << std::endl;
1791 for (
int l = 0;
l < deepnet.GetDepth(); ++
l) {
1792 std::cout <<
"Layer " <<
l;
1793 const auto * layer = deepnet.GetLayerAt(
l);
1794 const Tensor_t & layer_output = layer->GetOutput();
1796 std::cout <<
"DNN output " << layer_output.size() << std::endl;
1797 for (
size_t i = 0; i < layer_output.size(); ++i) {
1798#ifdef R__HAS_TMVAGPU
1802 TMatrixD m(layer_output[i].GetNrows(), layer_output[i].GetNcols() , layer_output[i].GetRawDataPointer() );
1806 const Tensor_t & layer_weights = layer->GetWeights();
1807 std::cout <<
"DNN weights " << layer_weights.size() << std::endl;
1808 if (layer_weights.size() > 0) {
1810#ifdef R__HAS_TMVAGPU
1814 TMatrixD m(layer_weights[i].GetNrows(), layer_weights[i].GetNcols() , layer_weights[i].GetRawDataPointer() );
1826template <
typename Architecture_t>
1831 if (!
fNet ||
fNet->GetDepth() == 0) {
1832 Log() << kFATAL <<
"The network has not been trained and fNet is not built"
1850 using Matrix_t =
typename Architecture_t::Matrix_t;
1854 DeepNet_t deepNet(batchSize, inputDepth, inputHeight, inputWidth, batchDepth, batchHeight, batchWidth, J,
I,
R,
weightDecay);
1855 std::vector<DeepNet_t> nets{};
1860 for (
size_t i = 0; i < deepNet.GetDepth(); ++i) {
1861 deepNet.GetLayerAt(i)->CopyParameters(*
fNet->GetLayerAt(i));
1868 size_t n1 = deepNet.GetBatchHeight();
1869 size_t n2 = deepNet.GetBatchWidth();
1870 size_t n0 = deepNet.GetBatchSize();
1873 n1 = deepNet.GetBatchSize();
1877 Long64_t nEvents = lastEvt - firstEvt;
1879 TensorDataLoader_t testData(testTuple, nEvents, batchSize, {inputDepth, inputHeight, inputWidth}, {n0, n1, n2}, deepNet.GetOutputWidth(), 1);
1887 Matrix_t yHat(deepNet.GetBatchSize(), deepNet.GetOutputWidth() );
1896 <<
" sample (" << nEvents <<
" events)" <<
Endl;
1900 std::vector<double> mvaValues(nEvents);
1903 for (
Long64_t ievt = firstEvt; ievt < lastEvt; ievt+=batchSize) {
1905 Long64_t ievt_end = ievt + batchSize;
1907 if (ievt_end <= lastEvt) {
1909 if (ievt == firstEvt) {
1913 if (n1 == batchSize && n0 == 1) {
1914 if (n2 != nVariables) {
1915 Log() << kFATAL <<
"Input Event variable dimensions are not compatible with the built network architecture"
1916 <<
" n-event variables " << nVariables <<
" expected input matrix " << n1 <<
" x " << n2
1920 if (n1*n2 != nVariables || n0 != batchSize) {
1921 Log() << kFATAL <<
"Input Event variable dimensions are not compatible with the built network architecture"
1922 <<
" n-event variables " << nVariables <<
" expected input tensor " << n0 <<
" x " << n1 <<
" x " << n2
1928 auto batch = testData.GetTensorBatch();
1929 auto inputTensor = batch.GetInput();
1931 auto xInput = batch.GetInput();
1934 for (
size_t i = 0; i < batchSize; ++i) {
1935 double value = yHat(i,0);
1936 mvaValues[ievt + i] = (
TMath::IsNaN(value)) ? -999. : value;
1941 for (
Long64_t i = ievt; i < lastEvt; ++i) {
1950 <<
"Elapsed time for evaluation of " << nEvents <<
" events: "
1966 fNet->Prediction(*fYHat, fXInput, fOutputFunction);
1968 size_t nTargets = DataInfo().GetNTargets();
1969 R__ASSERT(nTargets == fYHat->GetNcols());
1971 std::vector<Float_t> output(nTargets);
1972 for (
size_t i = 0; i < nTargets; i++)
1973 output[i] = (*fYHat)(0, i);
1976 if (fRegressionReturnVal == NULL)
1977 fRegressionReturnVal =
new std::vector<Float_t>(nTargets);
1978 R__ASSERT(fRegressionReturnVal->size() == nTargets);
1982 for (
size_t i = 0; i < nTargets; ++i) {
1985 const Event *evT2 = GetTransformationHandler().InverseTransform(evT);
1986 for (
size_t i = 0; i < nTargets; ++i) {
1987 (*fRegressionReturnVal)[i] = evT2->GetTarget(i);
1990 return *fRegressionReturnVal;
2000 fNet->Prediction(*fYHat, fXInput, fOutputFunction);
2002 size_t nClasses = DataInfo().GetNClasses();
2003 R__ASSERT(nClasses == fYHat->GetNcols());
2005 if (fMulticlassReturnVal == NULL) {
2006 fMulticlassReturnVal =
new std::vector<Float_t>(nClasses);
2008 R__ASSERT(fMulticlassReturnVal->size() == nClasses);
2010 for (
size_t i = 0; i < nClasses; i++) {
2011 (*fMulticlassReturnVal)[i] = (*fYHat)(0, i);
2013 return *fMulticlassReturnVal;
2025 if (firstEvt > lastEvt || lastEvt > nEvents) lastEvt = nEvents;
2026 if (firstEvt < 0) firstEvt = 0;
2027 nEvents = lastEvt-firstEvt;
2030 size_t defaultEvalBatchSize = (
fXInput.GetSize() > 1000) ? 100 : 1000;
2032 if (
size_t(nEvents) < batchSize ) batchSize = nEvents;
2036#ifdef R__HAS_TMVAGPU
2037 Log() << kINFO <<
"Evaluate deep neural network on GPU using batches with size = " << batchSize <<
Endl <<
Endl;
2046 Log() << kINFO <<
"Evaluate deep neural network on CPU using batches with size = " << batchSize <<
Endl <<
Endl;
2054 void* nn = xmlEngine.
NewChild(parent, 0,
"Weights");
2062 Int_t inputDepth =
fNet->GetInputDepth();
2063 Int_t inputHeight =
fNet->GetInputHeight();
2064 Int_t inputWidth =
fNet->GetInputWidth();
2068 Int_t batchDepth =
fNet->GetBatchDepth();
2069 Int_t batchHeight =
fNet->GetBatchHeight();
2070 Int_t batchWidth =
fNet->GetBatchWidth();
2072 char lossFunction =
static_cast<char>(
fNet->GetLossFunction());
2073 char initialization =
static_cast<char>(
fNet->GetInitialization());
2083 xmlEngine.NewAttr(nn, 0,
"NetDepth",
gTools().StringFromInt(depth));
2085 xmlEngine.NewAttr(nn, 0,
"InputDepth",
gTools().StringFromInt(inputDepth));
2086 xmlEngine.NewAttr(nn, 0,
"InputHeight",
gTools().StringFromInt(inputHeight));
2087 xmlEngine.NewAttr(nn, 0,
"InputWidth",
gTools().StringFromInt(inputWidth));
2089 xmlEngine.NewAttr(nn, 0,
"BatchSize",
gTools().StringFromInt(batchSize));
2090 xmlEngine.NewAttr(nn, 0,
"BatchDepth",
gTools().StringFromInt(batchDepth));
2091 xmlEngine.NewAttr(nn, 0,
"BatchHeight",
gTools().StringFromInt(batchHeight));
2092 xmlEngine.NewAttr(nn, 0,
"BatchWidth",
gTools().StringFromInt(batchWidth));
2094 xmlEngine.NewAttr(nn, 0,
"LossFunction",
TString(lossFunction));
2095 xmlEngine.NewAttr(nn, 0,
"Initialization",
TString(initialization));
2097 xmlEngine.NewAttr(nn, 0,
"OutputFunction",
TString(outputFunction));
2102 for (
Int_t i = 0; i < depth; i++)
2122 size_t inputDepth, inputHeight, inputWidth;
2127 size_t batchSize, batchDepth, batchHeight, batchWidth;
2135 char lossFunctionChar;
2137 char initializationChar;
2139 char regularizationChar;
2141 char outputFunctionChar;
2158 fNet = std::unique_ptr<DeepNetImpl_t>(
new DeepNetImpl_t(batchSize, inputDepth, inputHeight, inputWidth, batchDepth,
2159 batchHeight, batchWidth,
2172 for (
size_t i = 0; i < netDepth; i++) {
2177 if (layerName ==
"DenseLayer") {
2189 fNet->AddDenseLayer(width, func, 0.0);
2193 else if (layerName ==
"ConvLayer") {
2198 size_t fltHeight, fltWidth = 0;
2199 size_t strideRows, strideCols = 0;
2200 size_t padHeight, padWidth = 0;
2214 fNet->AddConvLayer(depth, fltHeight, fltWidth, strideRows, strideCols,
2215 padHeight, padWidth, actFunction);
2220 else if (layerName ==
"MaxPoolLayer") {
2223 size_t filterHeight, filterWidth = 0;
2224 size_t strideRows, strideCols = 0;
2230 fNet->AddMaxPoolLayer(filterHeight, filterWidth, strideRows, strideCols);
2233 else if (layerName ==
"ReshapeLayer") {
2236 size_t depth, height, width = 0;
2243 fNet->AddReshapeLayer(depth, height, width, flattening);
2247 else if (layerName ==
"RNNLayer") {
2250 size_t stateSize,inputSize, timeSteps = 0;
2251 int rememberState= 0;
2252 int returnSequence = 0;
2259 fNet->AddBasicRNNLayer(stateSize, inputSize, timeSteps, rememberState, returnSequence);
2263 else if (layerName ==
"LSTMLayer") {
2266 size_t stateSize,inputSize, timeSteps = 0;
2267 int rememberState, returnSequence = 0;
2274 fNet->AddBasicLSTMLayer(stateSize, inputSize, timeSteps, rememberState, returnSequence);
2278 else if (layerName ==
"GRULayer") {
2281 size_t stateSize,inputSize, timeSteps = 0;
2282 int rememberState, returnSequence, resetGateAfter = 0;
2292 "Cannot use a reset gate after to false with CudNN - use implementation with resetgate=true");
2294 fNet->AddBasicGRULayer(stateSize, inputSize, timeSteps, rememberState, returnSequence, resetGateAfter);
2297 else if (layerName ==
"BatchNormLayer") {
2299 fNet->AddBatchNormLayer(0., 0.0);
2302 fNet->GetLayers().back()->ReadWeightsFromXML(layerXML);
#define REGISTER_METHOD(CLASS)
for example
int Int_t
Signed integer 4 bytes (int).
bool Bool_t
Boolean (0=false, 1=true) (bool).
double Double_t
Double 8 bytes.
#define R__ASSERT(e)
Checks condition e and reports a fatal error if it's false.
TMatrixT< Double_t > TMatrixD
TMatrixT< Float_t > TMatrixF
char * Form(const char *fmt,...)
Formats a string in a circular formatting buffer.
OptionBase * DeclareOptionRef(T &ref, const TString &name, const TString &desc="")
void AddPreDefVal(const T &)
Adadelta Optimizer class.
static void PrintTensor(const Tensor_t &A, const std::string name="Cpu-tensor", bool truncate=false)
static Tensor_t CreateTensor(size_t n, size_t c, size_t h, size_t w)
Generic Deep Neural Network class.
TBatchNormLayer< Architecture_t > * AddBatchNormLayer(Scalar_t momentum=-1, Scalar_t epsilon=0.0001)
Function for adding a Batch Normalization layer with given parameters.
TBasicGRULayer< Architecture_t > * AddBasicGRULayer(size_t stateSize, size_t inputSize, size_t timeSteps, bool rememberState=false, bool returnSequence=false, bool resetGateAfter=false)
Function for adding GRU Layer in the Deep Neural Network, with given parameters.
TDenseLayer< Architecture_t > * AddDenseLayer(size_t width, EActivationFunction f, Scalar_t dropoutProbability=1.0)
Function for adding Dense Connected Layer in the Deep Neural Network, with a given width,...
TBasicLSTMLayer< Architecture_t > * AddBasicLSTMLayer(size_t stateSize, size_t inputSize, size_t timeSteps, bool rememberState=false, bool returnSequence=false)
Function for adding LSTM Layer in the Deep Neural Network, with given parameters.
TMaxPoolLayer< Architecture_t > * AddMaxPoolLayer(size_t frameHeight, size_t frameWidth, size_t strideRows, size_t strideCols, Scalar_t dropoutProbability=1.0)
Function for adding Pooling layer in the Deep Neural Network, with a given filter height and width,...
TConvLayer< Architecture_t > * AddConvLayer(size_t depth, size_t filterHeight, size_t filterWidth, size_t strideRows, size_t strideCols, size_t paddingHeight, size_t paddingWidth, EActivationFunction f, Scalar_t dropoutProbability=1.0)
Function for adding Convolution layer in the Deep Neural Network, with a given depth,...
TReshapeLayer< Architecture_t > * AddReshapeLayer(size_t depth, size_t height, size_t width, bool flattening)
Function for adding Reshape Layer in the Deep Neural Network, with a given height and width.
TBasicRNNLayer< Architecture_t > * AddBasicRNNLayer(size_t stateSize, size_t inputSize, size_t timeSteps, bool rememberState=false, bool returnSequence=false, EActivationFunction f=EActivationFunction::kTanh)
Function for adding Recurrent Layer in the Deep Neural Network, with given parameters.
Stochastic Batch Gradient Descent Optimizer class.
Generic General Layer class.
virtual void Initialize()
Initialize the weights and biases according to the given initialization method.
Class that contains all the data information.
UInt_t GetNClasses() const
Types::ETreeType GetCurrentType() const
Long64_t GetNEvents(Types::ETreeType type=Types::kMaxTreeType) const
void SetCurrentEvent(Long64_t ievt) const
void SetTarget(UInt_t itgt, Float_t value)
set the target value (dimension itgt) to value
UInt_t GetNVariables() const
accessor to the number of variables
MethodBase(const TString &jobName, Types::EMVA methodType, const TString &methodTitle, DataSetInfo &dsi, const TString &theOption="")
standard constructor
const char * GetName() const override
Bool_t IgnoreEventsWithNegWeightsInTraining() const
const std::vector< TMVA::Event * > & GetEventCollection(Types::ETreeType type)
returns the event collection (i.e.
UInt_t GetNTargets() const
const TString & GetMethodName() const
const Event * GetEvent() const
DataSetInfo & DataInfo() const
UInt_t GetNVariables() const
Types::EAnalysisType fAnalysisType
TrainingHistory fTrainHistory
IPythonInteractive * fInteractive
typename ArchitectureImpl_t::Tensor_t TensorImpl_t
size_t fBatchHeight
The height of the batch used to train the deep net.
DNN::ELossFunction fLossFunction
The loss function.
std::vector< size_t > fInputShape
Contains the batch size (no.
Bool_t HasAnalysisType(Types::EAnalysisType type, UInt_t numberClasses, UInt_t numberTargets) override
Check the type of analysis the deep learning network can do.
TString fLayoutString
The string defining the layout of the deep net.
std::vector< Double_t > GetMvaValues(Long64_t firstEvt, Long64_t lastEvt, Bool_t logProgress) override
Evaluate the DeepNet on a vector of input values stored in the TMVA Event class Here we will evaluate...
void SetInputDepth(int inputDepth)
Setters.
std::unique_ptr< MatrixImpl_t > fYHat
size_t GetBatchHeight() const
void ReadWeightsFromXML(void *wghtnode) override
TString fWeightInitializationString
The string defining the weight initialization method.
void ParseMaxPoolLayer(DNN::TDeepNet< Architecture_t, Layer_t > &deepNet, std::vector< DNN::TDeepNet< Architecture_t, Layer_t > > &nets, TString layerString, TString delim)
Pases the layer string and creates the appropriate max pool layer.
size_t fRandomSeed
The random seed used to initialize the weights and shuffling batches (default is zero).
TString fArchitectureString
The string defining the architecture: CPU or GPU.
MethodDL(const TString &jobName, const TString &methodTitle, DataSetInfo &theData, const TString &theOption)
Constructor.
void Train() override
Methods for training the deep learning network.
void TrainDeepNet()
train of deep neural network using the defined architecture
const std::vector< TTrainingSettings > & GetTrainingSettings() const
DNN::EOutputFunction GetOutputFunction() const
void ParseDenseLayer(DNN::TDeepNet< Architecture_t, Layer_t > &deepNet, std::vector< DNN::TDeepNet< Architecture_t, Layer_t > > &nets, TString layerString, TString delim)
Pases the layer string and creates the appropriate dense layer.
UInt_t GetNumValidationSamples()
parce the validation string and return the number of event data used for validation
TString GetBatchLayoutString() const
void SetInputWidth(int inputWidth)
HostBufferImpl_t fXInputBuffer
size_t fBatchWidth
The width of the batch used to train the deep net.
size_t GetInputDepth() const
std::unique_ptr< DeepNetImpl_t > fNet
TString GetInputLayoutString() const
void SetBatchHeight(size_t batchHeight)
void GetHelpMessage() const override
std::vector< std::map< TString, TString > > KeyValueVector_t
size_t GetInputHeight() const
TString GetArchitectureString() const
void ParseBatchLayout()
Parse the input layout.
void ParseBatchNormLayer(DNN::TDeepNet< Architecture_t, Layer_t > &deepNet, std::vector< DNN::TDeepNet< Architecture_t, Layer_t > > &nets, TString layerString, TString delim)
Pases the layer string and creates the appropriate reshape layer.
Double_t GetMvaValue(Double_t *err=nullptr, Double_t *errUpper=nullptr) override
const std::vector< Float_t > & GetRegressionValues() override
TString fNumValidationString
The string defining the number (or percentage) of training data used for validation.
DNN::EOutputFunction fOutputFunction
The output function for making the predictions.
DNN::EInitialization fWeightInitialization
The initialization method.
void AddWeightsXMLTo(void *parent) const override
size_t GetBatchDepth() const
void ParseRecurrentLayer(ERecurrentLayerType type, DNN::TDeepNet< Architecture_t, Layer_t > &deepNet, std::vector< DNN::TDeepNet< Architecture_t, Layer_t > > &nets, TString layerString, TString delim)
Pases the layer string and creates the appropriate rnn layer.
std::vector< TTrainingSettings > fTrainingSettings
The vector defining each training strategy.
size_t GetInputWidth() const
void SetInputShape(std::vector< size_t > inputShape)
DNN::ELossFunction GetLossFunction() const
TString fBatchLayoutString
The string defining the layout of the batch.
void DeclareOptions() override
The option handling methods.
void ParseConvLayer(DNN::TDeepNet< Architecture_t, Layer_t > &deepNet, std::vector< DNN::TDeepNet< Architecture_t, Layer_t > > &nets, TString layerString, TString delim)
Pases the layer string and creates the appropriate convolutional layer.
void ParseReshapeLayer(DNN::TDeepNet< Architecture_t, Layer_t > &deepNet, std::vector< DNN::TDeepNet< Architecture_t, Layer_t > > &nets, TString layerString, TString delim)
Pases the layer string and creates the appropriate reshape layer.
TString fTrainingStrategyString
The string defining the training strategy.
typename ArchitectureImpl_t::HostBuffer_t HostBufferImpl_t
void SetBatchDepth(size_t batchDepth)
KeyValueVector_t ParseKeyValueString(TString parseString, TString blockDelim, TString tokenDelim)
Function for parsing the training settings, provided as a string in a key-value form.
void SetBatchWidth(size_t batchWidth)
std::vector< Double_t > PredictDeepNet(Long64_t firstEvt, Long64_t lastEvt, size_t batchSize, Bool_t logProgress)
perform prediction of the deep neural network using batches (called by GetMvaValues)
void ReadWeightsFromStream(std::istream &) override
DNN::EInitialization GetWeightInitialization() const
void SetBatchSize(size_t batchSize)
TString GetLayoutString() const
size_t fBatchDepth
The depth of the batch used to train the deep net.
void ProcessOptions() override
TMVA::DNN::TDeepNet< ArchitectureImpl_t > DeepNetImpl_t
void Init() override
default initializations
size_t GetBatchWidth() const
typename ArchitectureImpl_t::Matrix_t MatrixImpl_t
const std::vector< Float_t > & GetMulticlassValues() override
virtual ~MethodDL()
Virtual Destructor.
void ParseInputLayout()
Parse the input layout.
void FillInputTensor()
Get the input event tensor for evaluation Internal function to fill the fXInput tensor with the corre...
bool fBuildNet
Flag to control whether to build fNet, the stored network used for the evaluation.
const Ranking * CreateRanking() override
void SetInputHeight(int inputHeight)
void CreateDeepNet(DNN::TDeepNet< Architecture_t, Layer_t > &deepNet, std::vector< DNN::TDeepNet< Architecture_t, Layer_t > > &nets)
After calling the ProcesOptions(), all of the options are parsed, so using the parsed options,...
TString fErrorStrategy
The string defining the error strategy for training.
TString fInputLayoutString
The string defining the layout of the input.
EMsgType GetMinType() const
Ranking for variables in method (implementation).
Timing information for training and evaluation of MVA methods.
TString GetElapsedTime(Bool_t Scientific=kTRUE)
returns pretty string with elapsed time
Singleton class for Global types used by TMVA.
void Print(Option_t *name="") const override
Print the matrix as a table of elements.
void Print(Option_t *option="") const override
Print TNamed name and title.
Collectable string class.
const TString & GetString() const
virtual void Warning(const char *method, const char *msgfmt,...) const
Issue warning message.
virtual void Error(const char *method, const char *msgfmt,...) const
Issue error message.
Int_t Atoi() const
Return integer value of string.
TSubString Strip(EStripType s=kTrailing, char c=' ') const
Return a substring of self stripped at beginning and/or end.
Double_t Atof() const
Return floating-point value contained in string.
Bool_t IsFloat() const
Returns kTRUE if string contains a floating point or integer number.
Ssiz_t First(char c) const
Find first occurrence of a character c.
const char * Data() const
TString & ReplaceAll(const TString &s1, const TString &s2)
void ToUpper()
Change string to upper case.
TObjArray * Tokenize(const TString &delim) const
This function is used to isolate sequential tokens in a TString.
static TString Format(const char *fmt,...)
Static method which formats a string using a printf style format descriptor and return a TString.
XMLNodePointer_t NewChild(XMLNodePointer_t parent, XMLNsPointer_t ns, const char *name, const char *content=nullptr)
create new child element for parent node
XMLNodePointer_t GetChild(XMLNodePointer_t xmlnode, Bool_t realnode=kTRUE)
returns first child of xmlnode
const char * GetNodeName(XMLNodePointer_t xmlnode)
returns name of xmlnode
EOptimizer
Enum representing the optimizer used for training.
std::tuple< const std::vector< Event * > &, const DataSetInfo & > TMVAInput_t
EOutputFunction
Enum that represents output functions.
double weightDecay(double error, ItWeight itWeight, ItWeight itWeightEnd, double factorWeightDecay, EnumRegularization eRegularization)
compute the weight decay for regularization (L1 or L2)
auto regularization(const typename Architecture_t::Matrix_t &A, ERegularization R) -> decltype(Architecture_t::L1Regularization(A))
Evaluate the regularization functional for a given weight matrix.
ERegularization
Enum representing the regularization type applied for a given layer.
EActivationFunction
Enum that represents layer activation functions.
ELossFunction
Enum that represents objective functions for the net, i.e.
create variable transformations
TString fetchValueTmp(const std::map< TString, TString > &keyValueMap, TString key)
MsgLogger & Endl(MsgLogger &ml)
Double_t Log(Double_t x)
Returns the natural logarithm of x.
All of the options that can be specified in the training string.
std::map< TString, double > optimizerParams
DNN::EOptimizer optimizer
DNN::ERegularization regularization
std::vector< Double_t > dropoutProbabilities