72 std::map<TString, TString>::const_iterator it = keyValueMap.find(key);
73 if (it == keyValueMap.end()) {
126 if (value ==
"TRUE" || value ==
"T" || value ==
"1") {
136 std::vector<double> defaultValue)
139 if (parseString ==
"") {
144 std::vector<double> values;
148 TIter nextToken(tokenStrings);
150 for (; tokenString != NULL; tokenString = (
TObjString *)nextToken()) {
151 std::stringstream sstr;
154 sstr >> currentValue;
155 values.push_back(currentValue);
172 " or cross entropy (binary classification).");
184 "Specify as 0.2 or 20% to use a fifth of the data set as validation set. "
185 "Specify as 100 to use exactly 100 events. (Default: 20%)");
197 "ConvergenceSteps=50,"
201 "Regularization=None,"
208 "ConvergenceSteps=50,"
214 "DropConfig=0.0+0.5+0.5,"
216 "Multithreading=True",
217 "TrainingStrategy",
"Defines the training strategies.");
225 Log() << kINFO <<
"Will ignore negative events in training!" <<
Endl;
229 Log() << kINFO <<
"The STANDARD architecture has been deprecated. "
230 "Please use Architecture=CPU or Architecture=CPU."
231 "See the TMVA Users' Guide for instructions if you "
232 "encounter problems."
236 Log() << kERROR <<
"The OPENCL architecture has not been implemented yet. "
237 "Please use Architecture=CPU or Architecture=CPU for the "
238 "time being. See the TMVA Users' Guide for instructions "
239 "if you encounter problems."
247#ifndef R__HAS_TMVAGPU
248 Log() << kERROR <<
"CUDA backend not enabled. Please make sure "
249 "you have CUDA installed and it was successfully "
250 "detected by CMAKE by using -Dcuda=On "
254 Log() << kINFO <<
"Will use now the CPU architecture !" <<
Endl;
257 Log() << kINFO <<
"Will use now the Standard architecture !" <<
Endl;
260 Log() << kINFO <<
"Will use now the GPU architecture !" <<
Endl;
265#ifndef R__HAS_TMVACPU
266 Log() << kERROR <<
"Multi-core CPU backend not enabled. Please make sure "
267 "you have a BLAS implementation and it was successfully "
268 "detected by CMake as well that the imt CMake flag is set."
272 Log() << kINFO <<
"Will use now the GPU architecture !" <<
Endl;
275 Log() << kINFO <<
"Will use now the Standard architecture !" <<
Endl;
278 Log() << kINFO <<
"Will use now the CPU architecture !" <<
Endl;
283 Log() << kINFO <<
"Will use the deprecated STANDARD architecture !" <<
Endl;
303 Log() << kWARNING <<
"For regression only SUMOFSQUARES is a valid "
304 <<
" neural net error function. Setting error function to "
305 <<
" SUMOFSQUARES now." <<
Endl;
342 for (
auto &block : strategyKeyValues) {
365 if (optimizer ==
"SGD") {
367 }
else if (optimizer ==
"ADAM") {
369 }
else if (optimizer ==
"ADAGRAD") {
371 }
else if (optimizer ==
"RMSPROP") {
373 }
else if (optimizer ==
"ADADELTA") {
439 TIter nextInputDim(inputDimStrings);
443 for (; inputDimString !=
nullptr; inputDimString = (
TObjString *)nextInputDim()) {
448 depth = (size_t)strDepth.
Atoi();
453 height = (size_t)strHeight.
Atoi();
479 size_t batchDepth = 0;
480 size_t batchHeight = 0;
481 size_t batchWidth = 0;
485 TIter nextBatchDim(batchDimStrings);
489 for (; batchDimString !=
nullptr; batchDimString = (
TObjString *)nextBatchDim()) {
494 batchDepth = (size_t)strDepth.
Atoi();
499 batchHeight = (size_t)strHeight.
Atoi();
504 batchWidth = (size_t)strWidth.
Atoi();
517template <
typename Architecture_t,
typename Layer_t>
522 const TString layerDelimiter(
",");
523 const TString subDelimiter(
"|");
531 TIter nextLayer(layerStrings);
535 for (; layerString !=
nullptr; layerString = (
TObjString *)nextLayer()) {
539 TIter nextToken(subStrings);
546 if (strLayerType ==
"DENSE") {
548 }
else if (strLayerType ==
"CONV") {
550 }
else if (strLayerType ==
"MAXPOOL") {
552 }
else if (strLayerType ==
"RESHAPE") {
554 }
else if (strLayerType ==
"RNN") {
569template <
typename Architecture_t,
typename Layer_t>
580 const size_t inputSize =
GetNvar();
584 TIter nextToken(subStrings);
591 for (; token !=
nullptr; token = (
TObjString *)nextToken()) {
596 if (strActFnc ==
"DENSE")
continue;
598 if (strActFnc ==
"RELU") {
600 }
else if (strActFnc ==
"TANH") {
602 }
else if (strActFnc ==
"SYMMRELU") {
604 }
else if (strActFnc ==
"SOFTSIGN") {
606 }
else if (strActFnc ==
"SIGMOID") {
608 }
else if (strActFnc ==
"LINEAR") {
610 }
else if (strActFnc ==
"GAUSS") {
612 }
else if (
width == 0) {
616 TString strNumNodes = strActFnc;
649template <
typename Architecture_t,
typename Layer_t>
659 int zeroPadHeight = 0;
660 int zeroPadWidth = 0;
665 TIter nextToken(subStrings);
669 for (; token !=
nullptr; token = (
TObjString *)nextToken()) {
674 depth = strDepth.
Atoi();
679 fltHeight = strFltHeight.
Atoi();
684 fltWidth = strFltWidth.
Atoi();
689 strideRows = strStrideRows.
Atoi();
694 strideCols = strStrideCols.
Atoi();
699 zeroPadHeight = strZeroPadHeight.
Atoi();
704 zeroPadWidth = strZeroPadWidth.
Atoi();
709 if (strActFnc ==
"RELU") {
711 }
else if (strActFnc ==
"TANH") {
713 }
else if (strActFnc ==
"SYMMRELU") {
715 }
else if (strActFnc ==
"SOFTSIGN") {
717 }
else if (strActFnc ==
"SIGMOID") {
719 }
else if (strActFnc ==
"LINEAR") {
721 }
else if (strActFnc ==
"GAUSS") {
731 zeroPadHeight, zeroPadWidth, activationFunction);
735 if (
fBuildNet)
fNet->AddConvLayer(depth, fltHeight, fltWidth, strideRows, strideCols,
736 zeroPadHeight, zeroPadWidth, activationFunction);
748template <
typename Architecture_t,
typename Layer_t>
754 int filterHeight = 0;
761 TIter nextToken(subStrings);
765 for (; token !=
nullptr; token = (
TObjString *)nextToken()) {
770 filterHeight = strFrmHeight.
Atoi();
775 filterWidth = strFrmWidth.
Atoi();
780 strideRows = strStrideRows.
Atoi();
785 strideCols = strStrideCols.
Atoi();
793 deepNet.
AddMaxPoolLayer(filterHeight, filterWidth, strideRows, strideCols);
796 if (
fBuildNet)
fNet->AddMaxPoolLayer(filterHeight, filterWidth, strideRows, strideCols);
809template <
typename Architecture_t,
typename Layer_t>
817 bool flattening =
false;
821 TIter nextToken(subStrings);
825 for (; token !=
nullptr; token = (
TObjString *)nextToken()) {
826 if (token->
GetString() ==
"FLAT") idxToken=4;
830 depth = strDepth.
Atoi();
835 height = strHeight.
Atoi();
845 if (flat ==
"FLAT") {
870template <
typename Architecture_t,
typename Layer_t>
879 bool rememberState =
false;
883 TIter nextToken(subStrings);
887 for (; token !=
nullptr; token = (
TObjString *)nextToken()) {
892 stateSize = strstateSize.
Atoi();
897 inputSize = strinputSize.
Atoi();
902 timeSteps = strtimeSteps.
Atoi();
907 rememberState = (bool) strrememberState.
Atoi();
915 timeSteps, rememberState);
919 if (
fBuildNet)
fNet->AddBasicRNNLayer(stateSize, inputSize, timeSteps, rememberState);
931template <
typename Architecture_t,
typename Layer_t>
938 TIter nextToken(subStrings);
942 for (; token !=
nullptr; token = (
TObjString *)nextToken()) {
952 :
MethodBase(jobName,
Types::kDL, methodTitle, theData, theOption), fInputDepth(), fInputHeight(), fInputWidth(),
953 fBatchDepth(), fBatchHeight(), fBatchWidth(), fRandomSeed(0), fWeightInitialization(), fOutputFunction(), fLossFunction(),
954 fInputLayoutString(), fBatchLayoutString(), fLayoutString(), fErrorStrategy(), fTrainingStrategyString(),
955 fWeightInitializationString(), fArchitectureString(), fResume(false), fBuildNet(true), fTrainingSettings()
963 :
MethodBase(
Types::kDL, theData, theWeightFile), fInputDepth(), fInputHeight(), fInputWidth(), fBatchDepth(),
964 fBatchHeight(), fBatchWidth(), fRandomSeed(0), fWeightInitialization(), fOutputFunction(), fLossFunction(), fInputLayoutString(),
965 fBatchLayoutString(), fLayoutString(), fErrorStrategy(), fTrainingStrategyString(), fWeightInitializationString(),
966 fArchitectureString(), fResume(false), fBuildNet(true), fTrainingSettings()
983 parseString.ReplaceAll(
" ",
"");
985 const TString keyValueDelim(
"=");
987 TObjArray *blockStrings = parseString.Tokenize(blockDelim);
988 TIter nextBlock(blockStrings);
991 for (; blockString !=
nullptr; blockString = (
TObjString *)nextBlock()) {
992 blockKeyValues.push_back(std::map<TString, TString>());
993 std::map<TString, TString> ¤tBlock = blockKeyValues.back();
996 TIter nextToken(subStrings);
999 for (; token !=
nullptr; token = (
TObjString *)nextToken()) {
1001 int delimPos = strKeyValue.
First(keyValueDelim.
Data());
1002 if (delimPos <= 0)
continue;
1011 currentBlock.insert(std::make_pair(strKey, strValue));
1014 return blockKeyValues;
1038 Int_t nValidationSamples = 0;
1043 if (fNumValidationString.EndsWith(
"%")) {
1048 Double_t valSizeAsDouble = fNumValidationString.Atof() / 100.0;
1049 nValidationSamples = GetEventCollection(
Types::kTraining).size() * valSizeAsDouble;
1051 Log() << kFATAL <<
"Cannot parse number \"" << fNumValidationString
1052 <<
"\". Expected string like \"20%\" or \"20.0%\"." <<
Endl;
1054 }
else if (fNumValidationString.IsFloat()) {
1055 Double_t valSizeAsDouble = fNumValidationString.Atof();
1057 if (valSizeAsDouble < 1.0) {
1059 nValidationSamples = GetEventCollection(
Types::kTraining).size() * valSizeAsDouble;
1062 nValidationSamples = valSizeAsDouble;
1065 Log() << kFATAL <<
"Cannot parse number \"" << fNumValidationString <<
"\". Expected string like \"0.2\" or \"100\"."
1071 if (nValidationSamples < 0) {
1072 Log() << kFATAL <<
"Validation size \"" << fNumValidationString <<
"\" is negative." <<
Endl;
1075 if (nValidationSamples == 0) {
1076 Log() << kFATAL <<
"Validation size \"" << fNumValidationString <<
"\" is zero." <<
Endl;
1079 if (nValidationSamples >= (
Int_t)trainingSetSize) {
1080 Log() << kFATAL <<
"Validation size \"" << fNumValidationString
1081 <<
"\" is larger than or equal in size to training set (size=\"" << trainingSetSize <<
"\")." <<
Endl;
1084 return nValidationSamples;
1091template <
typename Architecture_t>
1095 using Scalar_t =
typename Architecture_t::Scalar_t;
1121 const std::vector<TMVA::Event *> eventCollectionTraining{allData.begin(), allData.begin() + nTrainingSamples};
1122 const std::vector<TMVA::Event *> eventCollectionValidation{allData.begin() + nTrainingSamples, allData.end()};
1124 size_t trainingPhase = 1;
1128 size_t nThreads = 1;
1132 size_t batchSize = settings.batchSize;
1152 if (batchDepth != batchSize && batchDepth > 1) {
1153 Error(
"Train",
"Given batch depth of %zu (specified in BatchLayout) should be equal to given batch size %zu",batchDepth,batchSize);
1156 if (batchDepth == 1 && batchSize > 1 && batchSize != batchHeight ) {
1157 Error(
"Train",
"Given batch height of %zu (specified in BatchLayout) should be equal to given batch size %zu",batchHeight,batchSize);
1163 bool badLayout =
false;
1165 if (batchDepth == batchSize)
1166 badLayout = ( inputDepth * inputHeight * inputWidth != batchHeight * batchWidth ) ;
1168 if (batchHeight == batchSize && batchDepth == 1)
1169 badLayout |= ( inputDepth * inputHeight * inputWidth != batchWidth);
1171 Error(
"Train",
"Given input layout %zu x %zu x %zu is not compatible with batch layout %zu x %zu x %zu ",
1172 inputDepth,inputHeight,inputWidth,batchDepth,batchHeight,batchWidth);
1177 if (nTrainingSamples < settings.batchSize || nValidationSamples < settings.batchSize) {
1178 Log() << kFATAL <<
"Number of samples in the datasets are train: ("
1179 << nTrainingSamples <<
") test: (" << nValidationSamples
1180 <<
"). One of these is smaller than the batch size of "
1181 << settings.batchSize <<
". Please increase the batch"
1182 <<
" size to be at least the same size as the smallest"
1183 <<
" of them." <<
Endl;
1186 DeepNet_t deepNet(batchSize, inputDepth, inputHeight, inputWidth, batchDepth, batchHeight, batchWidth, J,
I,
R,
weightDecay);
1190 if (trainingPhase == 1) {
1191 fNet = std::unique_ptr<DeepNetImpl_t>(
new DeepNetImpl_t(1, inputDepth, inputHeight, inputWidth, batchDepth,
1199 std::vector<DeepNet_t> nets{};
1200 nets.reserve(nThreads);
1201 for (
size_t i = 0; i < nThreads; i++) {
1203 nets.push_back(deepNet);
1213 std::vector<Double_t> dropoutVector(settings.dropoutProbabilities);
1214 for (
auto & p : dropoutVector) {
1217 deepNet.SetDropoutProbabilities(dropoutVector);
1219 if (trainingPhase > 1) {
1221 for (
size_t i = 0; i < deepNet.GetDepth(); ++i) {
1222 const auto & nLayer =
fNet->GetLayerAt(i);
1223 const auto & dLayer = deepNet.GetLayerAt(i);
1227 Architecture_t::CopyDiffArch(dLayer->GetWeights(), nLayer->GetWeights() );
1228 Architecture_t::CopyDiffArch(dLayer->GetBiases(), nLayer->GetBiases() );
1234 int n1 = batchHeight;
1235 int n2 = batchWidth;
1243 Log() <<
"***** Deep Learning Network *****" <<
Endl;
1244 if (
Log().GetMinType() <= kINFO)
1247 Log() <<
"Using " << nTrainingSamples <<
" events for training and " << nValidationSamples <<
" for testing" <<
Endl;
1251 TensorDataLoader_t trainingData(trainingTuple, nTrainingSamples, deepNet.GetBatchSize(),
1252 deepNet.GetBatchDepth(), deepNet.GetBatchHeight(), deepNet.GetBatchWidth(),
1253 deepNet.GetOutputWidth(), nThreads);
1256 TensorDataLoader_t validationData(validationTuple, nValidationSamples, deepNet.GetBatchSize(),
1257 deepNet.GetBatchDepth(), deepNet.GetBatchHeight(), deepNet.GetBatchWidth(),
1258 deepNet.GetOutputWidth(), nThreads);
1267 for (
auto batch : validationData) {
1268 auto inputTensor = batch.GetInput();
1269 auto outputMatrix = batch.GetOutput();
1270 auto weights = batch.GetWeights();
1272 minValError += deepNet.Loss(inputTensor, outputMatrix, weights,
false,
false);
1275 Double_t regzTerm = (includeRegularization) ? deepNet.RegularizationTerm() : 0.0;
1276 minValError /= (
Double_t)(nValidationSamples / settings.batchSize);
1277 minValError += regzTerm;
1281 std::unique_ptr<DNN::VOptimizer<Architecture_t, Layer_t, DeepNet_t>> optimizer;
1286 case EOptimizer::kSGD:
1287 optimizer = std::unique_ptr<DNN::TSGD<Architecture_t, Layer_t, DeepNet_t>>(
1291 case EOptimizer::kAdam:
1292 optimizer = std::unique_ptr<DNN::TAdam<Architecture_t, Layer_t, DeepNet_t>>(
1296 case EOptimizer::kAdagrad:
1297 optimizer = std::unique_ptr<DNN::TAdagrad<Architecture_t, Layer_t, DeepNet_t>>(
1301 case EOptimizer::kRMSProp:
1302 optimizer = std::unique_ptr<DNN::TRMSProp<Architecture_t, Layer_t, DeepNet_t>>(
1306 case EOptimizer::kAdadelta:
1307 optimizer = std::unique_ptr<DNN::TAdadelta<Architecture_t, Layer_t, DeepNet_t>>(
1314 std::vector<TTensorBatch<Architecture_t>> batches{};
1316 bool converged =
false;
1317 size_t convergenceCount = 0;
1318 size_t batchesInEpoch = nTrainingSamples / deepNet.GetBatchSize();
1321 std::chrono::time_point<std::chrono::system_clock> tstart, tend;
1322 tstart = std::chrono::system_clock::now();
1325 <<
" Optimizer " << settings.optimizerName
1326 <<
" Learning rate = " << settings.learningRate
1327 <<
" regularization " << (char) settings.regularization
1328 <<
" minimum error = " << minValError
1331 std::string separator(62,
'-');
1333 Log() << std::setw(10) <<
"Epoch"
1334 <<
" | " << std::setw(12) <<
"Train Err." << std::setw(12) <<
"Val. Err."
1335 << std::setw(12) <<
"t(s)/epoch" << std::setw(12) <<
"t(s)/Loss"
1336 << std::setw(12) <<
"nEvents/s"
1337 << std::setw(12) <<
"Conv. Steps" <<
Endl;
1343 size_t shuffleSeed = 0;
1349 Log() <<
"Initial Deep Net Weights " <<
Endl;
1350 auto & weights_tensor = deepNet.GetLayerAt(0)->GetWeights();
1351 for (
size_t l = 0;
l < weights_tensor.size(); ++
l)
1352 weights_tensor[
l].
Print();
1353 auto & bias_tensor = deepNet.GetLayerAt(0)->GetBiases();
1354 bias_tensor[0].Print();
1358 while (!converged) {
1359 optimizer->IncrementGlobalStep();
1360 trainingData.Shuffle(rng);
1365 for (
size_t i = 0; i < batchesInEpoch; ++i ) {
1373 auto my_batch = trainingData.GetTensorBatch();
1377 deepNet.Forward(my_batch.GetInput(),
true);
1378 deepNet.Backward(my_batch.GetInput(), my_batch.GetOutput(), my_batch.GetWeights());
1383 if ((optimizer->GetGlobalStep() % settings.testInterval) == 0) {
1385 std::chrono::time_point<std::chrono::system_clock>
t1,t2;
1387 t1 = std::chrono::system_clock::now();
1391 for (
auto batch : validationData) {
1392 auto inputTensor = batch.GetInput();
1393 auto outputMatrix = batch.GetOutput();
1394 auto weights = batch.GetWeights();
1396 valError += deepNet.Loss(inputTensor, outputMatrix, weights,
false,
false);
1399 Double_t regTerm = (includeRegularization) ? deepNet.RegularizationTerm() : 0.0;
1400 valError /= (
Double_t)(nValidationSamples / settings.batchSize);
1401 valError += regTerm;
1403 t2 = std::chrono::system_clock::now();
1406 if (valError < minValError) {
1407 convergenceCount = 0;
1409 convergenceCount += settings.testInterval;
1413 if (valError < minValError ) {
1415 Log() << std::setw(10) << optimizer->GetGlobalStep()
1416 <<
" Minimum Test error found - save the configuration " <<
Endl;
1417 for (
size_t i = 0; i < deepNet.GetDepth(); ++i) {
1418 const auto & nLayer =
fNet->GetLayerAt(i);
1419 const auto & dLayer = deepNet.GetLayerAt(i);
1426 minValError = valError;
1428 else if ( minValError <= 0. )
1429 minValError = valError;
1434 for (
auto batch : trainingData) {
1435 auto inputTensor = batch.GetInput();
1436 auto outputMatrix = batch.GetOutput();
1437 auto weights = batch.GetWeights();
1438 trainingError += deepNet.Loss(inputTensor, outputMatrix, weights,
false,
false);
1441 trainingError /= (
Double_t)(nTrainingSamples / settings.batchSize);
1442 trainingError += regTerm;
1445 tend = std::chrono::system_clock::now();
1448 std::chrono::duration<double> elapsed_seconds = tend - tstart;
1449 std::chrono::duration<double> elapsed1 =
t1-tstart;
1452 std::chrono::duration<double> elapsed_testing = tend-
t1;
1454 double seconds = elapsed_seconds.count();
1457 double eventTime = elapsed1.count()/( batchesInEpoch * settings.testInterval * settings.batchSize);
1460 convergenceCount > settings.convergenceSteps || optimizer->GetGlobalStep() >= settings.maxEpochs;
1463 Log() << std::setw(10) << optimizer->GetGlobalStep() <<
" | "
1464 << std::setw(12) << trainingError
1465 << std::setw(12) << valError
1466 << std::setw(12) << seconds / settings.testInterval
1467 << std::setw(12) << elapsed_testing.count()
1468 << std::setw(12) << 1. / eventTime
1469 << std::setw(12) << convergenceCount
1475 tstart = std::chrono::system_clock::now();
1479 if (converged && debug) {
1480 Log() <<
"Final Deep Net Weights for phase " << trainingPhase <<
" epoch " << optimizer->GetGlobalStep()
1482 auto & weights_tensor = deepNet.GetLayerAt(0)->GetWeights();
1483 auto & bias_tensor = deepNet.GetLayerAt(0)->GetBiases();
1484 for (
size_t l = 0;
l < weights_tensor.size(); ++
l)
1485 weights_tensor[
l].
Print();
1486 bias_tensor[0].Print();
1499 Log() << kFATAL <<
"Not implemented yet" <<
Endl;
1505#ifdef R__HAS_TMVAGPU
1506 Log() << kINFO <<
"Start of deep neural network training on GPU." <<
Endl <<
Endl;
1507 TrainDeepNet<DNN::TCuda<ScalarImpl_t> >();
1509 Log() << kFATAL <<
"CUDA backend not enabled. Please make sure "
1510 "you have CUDA installed and it was successfully "
1511 "detected by CMAKE."
1516 Log() << kFATAL <<
"OPENCL backend not yet supported." <<
Endl;
1519#ifdef R__HAS_TMVACPU
1522 Log() << kINFO <<
"Start of deep neural network training on CPU using (for ROOT-IMT) nthreads = "
1524 TrainDeepNet<DNN::TCpu<ScalarImpl_t> >();
1526 Log() << kFATAL <<
"Multi-core CPU backend not enabled. Please make sure "
1527 "you have a BLAS implementation and it was successfully "
1528 "detected by CMake as well that the imt CMake flag is set."
1533 Log() << kINFO <<
"Start of deep neural network training on the STANDARD architecture" <<
Endl <<
Endl;
1534 TrainDeepNet<DNN::TReference<ScalarImpl_t> >();
1538 " is not a supported archiectire for TMVA::MethodDL"
1561 if (!
fNet ||
fNet->GetDepth() == 0) {
1562 Log() << kFATAL <<
"The network has not been trained and fNet is not built"
1578 int n1 =
fXInput[0].GetNrows();
1579 int n2 =
fXInput[0].GetNcols();
1584 if (n1*n2 != nVariables) {
1585 Log() << kFATAL <<
"Input Event variable dimensions are not compatible with the built network architecture"
1586 <<
" n-event variables " << nVariables <<
" expected input matrix " << n1 <<
" x " << n2
1590 for (
int j = 0; j < n1; ++j) {
1591 for (
int k = 0; k < n2; k++) {
1592 fXInput[0](j, k) = inputValues[j*n2+k];
1600 double mvaValue = (*fYHat)(0, 0);
1603#ifdef DEBUG_MVAVALUE
1604 using Tensor_t = std::vector<MatrixImpl_t>;
1605 TMatrixF xInput(n1,n2, inputValues.data() );
1608 std::cout <<
"Output of DeepNet " << mvaValue << std::endl;
1609 auto & deepnet = *
fNet;
1610 std::cout <<
"Loop on layers " << std::endl;
1611 for (
int l = 0;
l < deepnet.GetDepth(); ++
l) {
1612 std::cout <<
"Layer " <<
l;
1613 const auto * layer = deepnet.GetLayerAt(
l);
1614 const Tensor_t & layer_output = layer->GetOutput();
1616 std::cout <<
"DNN output " << layer_output.size() << std::endl;
1617 for (
size_t i = 0; i < layer_output.size(); ++i) {
1618#ifdef R__HAS_TMVAGPU
1622 TMatrixD m(layer_output[i].GetNrows(), layer_output[i].GetNcols() , layer_output[i].GetRawDataPointer() );
1626 const Tensor_t & layer_weights = layer->GetWeights();
1627 std::cout <<
"DNN weights " << layer_weights.size() << std::endl;
1628 if (layer_weights.size() > 0) {
1630#ifdef R__HAS_TMVAGPU
1634 TMatrixD m(layer_weights[i].GetNrows(), layer_weights[i].GetNcols() , layer_weights[i].GetRawDataPointer() );
1646template <
typename Architecture_t>
1651 if (!
fNet ||
fNet->GetDepth() == 0) {
1652 Log() << kFATAL <<
"The network has not been trained and fNet is not built"
1670 using Matrix_t =
typename Architecture_t::Matrix_t;
1674 DeepNet_t deepNet(batchSize, inputDepth, inputHeight, inputWidth, batchDepth, batchHeight, batchWidth, J,
I,
R,
weightDecay);
1675 std::vector<DeepNet_t> nets{};
1680 for (
size_t i = 0; i < deepNet.GetDepth(); ++i) {
1681 const auto & nLayer =
fNet->GetLayerAt(i);
1682 const auto & dLayer = deepNet.GetLayerAt(i);
1683 Architecture_t::CopyDiffArch(dLayer->GetWeights(), nLayer->GetWeights() );
1684 Architecture_t::CopyDiffArch(dLayer->GetBiases(), nLayer->GetBiases() );
1687 size_t n1 = deepNet.GetBatchHeight();
1688 size_t n2 = deepNet.GetBatchWidth();
1689 size_t n0 = deepNet.GetBatchSize();
1692 n1 = deepNet.GetBatchSize();
1696 Long64_t nEvents = lastEvt - firstEvt;
1698 TensorDataLoader_t testData(testTuple, nEvents, batchSize, n0, n1, n2, deepNet.GetOutputWidth(), 1);
1706 Matrix_t yHat(deepNet.GetBatchSize(), deepNet.GetOutputWidth() );
1715 <<
" sample (" << nEvents <<
" events)" <<
Endl;
1719 std::vector<double> mvaValues(nEvents);
1722 for (
Long64_t ievt = firstEvt; ievt < lastEvt; ievt+=batchSize) {
1724 Long64_t ievt_end = ievt + batchSize;
1726 if (ievt_end <= lastEvt) {
1728 if (ievt == firstEvt) {
1732 if (n1 == batchSize && n0 == 1) {
1733 if (n2 != nVariables) {
1734 Log() << kFATAL <<
"Input Event variable dimensions are not compatible with the built network architecture"
1735 <<
" n-event variables " << nVariables <<
" expected input matrix " << n1 <<
" x " << n2
1739 if (n1*n2 != nVariables || n0 != batchSize) {
1740 Log() << kFATAL <<
"Input Event variable dimensions are not compatible with the built network architecture"
1741 <<
" n-event variables " << nVariables <<
" expected input tensor " << n0 <<
" x " << n1 <<
" x " << n2
1747 auto batch = testData.GetTensorBatch();
1748 auto inputTensor = batch.GetInput();
1750 auto xInput = batch.GetInput();
1753 for (
size_t i = 0; i < batchSize; ++i) {
1754 double value = yHat(i,0);
1755 mvaValues[ievt + i] = (
TMath::IsNaN(value)) ? -999. : value;
1760 for (
Long64_t i = ievt; i < lastEvt; ++i) {
1769 <<
"Elapsed time for evaluation of " << nEvents <<
" events: "
1778 size_t nVariables = GetEvent()->GetNVariables();
1780 std::vector<MatrixImpl_t> X_vec;
1781 const Event *ev = GetEvent();
1782 const std::vector<Float_t>& inputValues = ev->
GetValues();
1783 for (
size_t i = 0; i < nVariables; i++) {
1784 X(0,i) = inputValues[i];
1786 X_vec.emplace_back(X);
1787 size_t nTargets = std::max(1u, ev->
GetNTargets());
1789 std::vector<Float_t>
output(nTargets);
1790 fNet->Prediction(YHat, X_vec, fOutputFunction);
1792 for (
size_t i = 0; i < nTargets; i++)
1795 if (fRegressionReturnVal == NULL) {
1796 fRegressionReturnVal =
new std::vector<Float_t>();
1798 fRegressionReturnVal->clear();
1801 for (
size_t i = 0; i < nTargets; ++i) {
1805 const Event* evT2 = GetTransformationHandler().InverseTransform(evT);
1806 for (
size_t i = 0; i < nTargets; ++i) {
1807 fRegressionReturnVal->push_back(evT2->
GetTarget(i));
1810 return *fRegressionReturnVal;
1815 size_t nVariables = GetEvent()->GetNVariables();
1817 std::vector<MatrixImpl_t> X_vec;
1819 if (fMulticlassReturnVal == NULL) {
1820 fMulticlassReturnVal =
new std::vector<Float_t>(DataInfo().GetNClasses());
1823 const std::vector<Float_t>& inputValues = GetEvent()->GetValues();
1824 for (
size_t i = 0; i < nVariables; i++) {
1825 X(0,i) = inputValues[i];
1827 X_vec.emplace_back(X);
1828 fNet->Prediction(YHat, X_vec, fOutputFunction);
1829 for (
size_t i = 0; i < (size_t) YHat.GetNcols(); i++) {
1830 (*fMulticlassReturnVal)[i] = YHat(0, i);
1832 return *fMulticlassReturnVal;
1844 if (firstEvt > lastEvt || lastEvt > nEvents) lastEvt = nEvents;
1845 if (firstEvt < 0) firstEvt = 0;
1846 nEvents = lastEvt-firstEvt;
1850 if (
size_t(nEvents) < batchSize ) batchSize = nEvents;
1854#ifdef R__HAS_TMVAGPU
1855 Log() << kINFO <<
"Evaluate deep neural network on GPU using batches with size = " << batchSize <<
Endl <<
Endl;
1856 return PredictDeepNet<DNN::TCuda<ScalarImpl_t> >(firstEvt, lastEvt, batchSize, logProgress);
1859#ifdef R__HAS_TMVACPU
1860 Log() << kINFO <<
"Evaluate deep neural network on CPU using batches with size = " << batchSize <<
Endl <<
Endl;
1861 return PredictDeepNet<DNN::TCpu<ScalarImpl_t> >(firstEvt, lastEvt, batchSize, logProgress);
1864 Log() << kINFO <<
"Evaluate deep neural network on the STANDARD architecture using batches with size = " << batchSize
1866 return PredictDeepNet<DNN::TReference<ScalarImpl_t> >(firstEvt, lastEvt, batchSize, logProgress);
1873 void* nn = xmlEngine.
NewChild(parent, 0,
"Weights");
1881 Int_t inputDepth =
fNet->GetInputDepth();
1882 Int_t inputHeight =
fNet->GetInputHeight();
1883 Int_t inputWidth =
fNet->GetInputWidth();
1887 Int_t batchDepth =
fNet->GetBatchDepth();
1888 Int_t batchHeight =
fNet->GetBatchHeight();
1889 Int_t batchWidth =
fNet->GetBatchWidth();
1891 char lossFunction =
static_cast<char>(
fNet->GetLossFunction());
1892 char initialization =
static_cast<char>(
fNet->GetInitialization());
1913 xmlEngine.NewAttr(nn, 0,
"LossFunction",
TString(lossFunction));
1914 xmlEngine.NewAttr(nn, 0,
"Initialization",
TString(initialization));
1916 xmlEngine.NewAttr(nn, 0,
"OutputFunction",
TString(outputFunction));
1921 for (
Int_t i = 0; i < depth; i++)
1941 size_t inputDepth, inputHeight, inputWidth;
1946 size_t batchSize, batchDepth, batchHeight, batchWidth;
1954 char lossFunctionChar;
1956 char initializationChar;
1958 char regularizationChar;
1960 char outputFunctionChar;
1977 fNet = std::unique_ptr<DeepNetImpl_t>(
new DeepNetImpl_t(batchSize, inputDepth, inputHeight, inputWidth, batchDepth,
1978 batchHeight, batchWidth,
1991 for (
size_t i = 0; i < netDepth; i++) {
1996 if (layerName ==
"DenseLayer") {
2012 else if (layerName ==
"ConvLayer") {
2017 size_t fltHeight, fltWidth = 0;
2018 size_t strideRows, strideCols = 0;
2019 size_t padHeight, padWidth = 0;
2033 fNet->AddConvLayer(depth, fltHeight, fltWidth, strideRows, strideCols,
2034 padHeight, padWidth, actFunction);
2039 else if (layerName ==
"MaxPoolLayer") {
2042 size_t filterHeight, filterWidth = 0;
2043 size_t strideRows, strideCols = 0;
2049 fNet->AddMaxPoolLayer(filterHeight, filterWidth, strideRows, strideCols);
2051 else if (layerName ==
"ReshapeLayer") {
2054 size_t depth, height,
width = 0;
2061 fNet->AddReshapeLayer(depth, height,
width, flattening);
2064 else if (layerName ==
"RNNLayer") {
2067 size_t stateSize,inputSize, timeSteps = 0;
2068 int rememberState= 0;
2074 fNet->AddBasicRNNLayer(stateSize, inputSize, timeSteps, rememberState);
2080 fNet->GetLayers().back()->ReadWeightsFromXML(layerXML);
2088 int n1 = batchHeight;
2089 int n2 = batchWidth;
#define REGISTER_METHOD(CLASS)
for example
#define R(a, b, c, d, e, f, g, h, i)
include TDocParser_001 C image html pict1_TDocParser_001 png width
char * Form(const char *fmt,...)
OptionBase * DeclareOptionRef(T &ref, const TString &name, const TString &desc="")
void AddPreDefVal(const T &)
Adadelta Optimizer class.
Generic Deep Neural Network class.
TDenseLayer< Architecture_t > * AddDenseLayer(size_t width, EActivationFunction f, Scalar_t dropoutProbability=1.0)
Function for adding Dense Connected Layer in the Deep Neural Network, with a given width,...
TMaxPoolLayer< Architecture_t > * AddMaxPoolLayer(size_t frameHeight, size_t frameWidth, size_t strideRows, size_t strideCols, Scalar_t dropoutProbability=1.0)
Function for adding Pooling layer in the Deep Neural Network, with a given filter height and width,...
TConvLayer< Architecture_t > * AddConvLayer(size_t depth, size_t filterHeight, size_t filterWidth, size_t strideRows, size_t strideCols, size_t paddingHeight, size_t paddingWidth, EActivationFunction f, Scalar_t dropoutProbability=1.0)
Function for adding Convolution layer in the Deep Neural Network, with a given depth,...
TBasicRNNLayer< Architecture_t > * AddBasicRNNLayer(size_t stateSize, size_t inputSize, size_t timeSteps, bool rememberState=false)
Function for adding Recurrent Layer in the Deep Neural Network, with given parameters.
TReshapeLayer< Architecture_t > * AddReshapeLayer(size_t depth, size_t height, size_t width, bool flattening)
Function for adding Reshape Layer in the Deep Neural Network, with a given height and width.
static void CopyDiffArch(TMatrixT< Scalar_t > &A, const AMatrix_t &B)
Stochastic Batch Gradient Descent Optimizer class.
Generic General Layer class.
void Initialize()
Initialize the weights and biases according to the given initialization method.
Class that contains all the data information.
Types::ETreeType GetCurrentType() const
Long64_t GetNEvents(Types::ETreeType type=Types::kMaxTreeType) const
void SetCurrentEvent(Long64_t ievt) const
void SetTarget(UInt_t itgt, Float_t value)
set the target value (dimension itgt) to value
UInt_t GetNVariables() const
accessor to the number of variables
UInt_t GetNTargets() const
accessor to the number of targets
std::vector< Float_t > & GetValues()
Float_t GetTarget(UInt_t itgt) const
Virtual base Class for all MVA method.
const char * GetName() const
Bool_t IgnoreEventsWithNegWeightsInTraining() const
const std::vector< TMVA::Event * > & GetEventCollection(Types::ETreeType type)
returns the event collection (i.e.
const TString & GetMethodName() const
const Event * GetEvent() const
DataSetInfo & DataInfo() const
UInt_t GetNVariables() const
Types::EAnalysisType fAnalysisType
IPythonInteractive * fInteractive
void SetInputWidth(size_t inputWidth)
size_t fBatchHeight
The height of the batch used to train the deep net.
void GetHelpMessage() const
DNN::ELossFunction fLossFunction
The loss function.
virtual const std::vector< Float_t > & GetMulticlassValues()
size_t fInputHeight
The height of the input.
TString fLayoutString
The string defining the layout of the deep net.
std::unique_ptr< MatrixImpl_t > fYHat
void Train()
Methods for training the deep learning network.
size_t GetBatchHeight() const
virtual std::vector< Double_t > GetMvaValues(Long64_t firstEvt, Long64_t lastEvt, Bool_t logProgress)
Evaluate the DeepNet on a vector of input values stored in the TMVA Event class.
void ParseRnnLayer(DNN::TDeepNet< Architecture_t, Layer_t > &deepNet, std::vector< DNN::TDeepNet< Architecture_t, Layer_t > > &nets, TString layerString, TString delim)
Pases the layer string and creates the appropriate rnn layer.
TString fWeightInitializationString
The string defining the weight initialization method.
void ParseMaxPoolLayer(DNN::TDeepNet< Architecture_t, Layer_t > &deepNet, std::vector< DNN::TDeepNet< Architecture_t, Layer_t > > &nets, TString layerString, TString delim)
Pases the layer string and creates the appropriate max pool layer.
size_t fRandomSeed
The random seed used to initialize the weights and shuffling batches (default is zero)
TString fArchitectureString
The string defining the architecure: CPU or GPU.
void Init()
default initializations
MethodDL(const TString &jobName, const TString &methodTitle, DataSetInfo &theData, const TString &theOption)
Constructor.
void TrainDeepNet()
train of deep neural network using the defined architecture
const std::vector< TTrainingSettings > & GetTrainingSettings() const
DNN::EOutputFunction GetOutputFunction() const
void ParseLstmLayer(DNN::TDeepNet< Architecture_t, Layer_t > &deepNet, std::vector< DNN::TDeepNet< Architecture_t, Layer_t > > &nets, TString layerString, TString delim)
Pases the layer string and creates the appropriate lstm layer.
void SetInputDepth(size_t inputDepth)
Setters.
void ParseDenseLayer(DNN::TDeepNet< Architecture_t, Layer_t > &deepNet, std::vector< DNN::TDeepNet< Architecture_t, Layer_t > > &nets, TString layerString, TString delim)
Pases the layer string and creates the appropriate dense layer.
UInt_t GetNumValidationSamples()
parce the validation string and return the number of event data used for validation
TString GetBatchLayoutString() const
size_t fBatchWidth
The width of the batch used to train the deep net.
size_t GetInputDepth() const
virtual const std::vector< Float_t > & GetRegressionValues()
std::unique_ptr< DeepNetImpl_t > fNet
TString GetInputLayoutString() const
void SetBatchHeight(size_t batchHeight)
std::vector< MatrixImpl_t > fXInput
size_t GetInputHeight() const
TString GetArchitectureString() const
void ParseBatchLayout()
Parse the input layout.
void ReadWeightsFromStream(std::istream &)
void ReadWeightsFromXML(void *wghtnode)
TString fNumValidationString
The string defining the number (or percentage) of training data used for validation.
std::vector< std::map< TString, TString > > KeyValueVector_t
DNN::EOutputFunction fOutputFunction
The output function for making the predictions.
DNN::EInitialization fWeightInitialization
The initialization method.
size_t GetBatchDepth() const
std::vector< TTrainingSettings > fTrainingSettings
The vector defining each training strategy.
size_t GetInputWidth() const
DNN::ELossFunction GetLossFunction() const
TString fBatchLayoutString
The string defining the layout of the batch.
Bool_t HasAnalysisType(Types::EAnalysisType type, UInt_t numberClasses, UInt_t numberTargets)
Check the type of analysis the deep learning network can do.
void ParseConvLayer(DNN::TDeepNet< Architecture_t, Layer_t > &deepNet, std::vector< DNN::TDeepNet< Architecture_t, Layer_t > > &nets, TString layerString, TString delim)
Pases the layer string and creates the appropriate convolutional layer.
void ParseReshapeLayer(DNN::TDeepNet< Architecture_t, Layer_t > &deepNet, std::vector< DNN::TDeepNet< Architecture_t, Layer_t > > &nets, TString layerString, TString delim)
Pases the layer string and creates the appropriate reshape layer.
TString fTrainingStrategyString
The string defining the training strategy.
const Ranking * CreateRanking()
void SetInputHeight(size_t inputHeight)
void SetBatchDepth(size_t batchDepth)
size_t fInputWidth
The width of the input.
KeyValueVector_t ParseKeyValueString(TString parseString, TString blockDelim, TString tokenDelim)
Function for parsing the training settings, provided as a string in a key-value form.
size_t fInputDepth
The depth of the input.
void SetBatchWidth(size_t batchWidth)
std::vector< Double_t > PredictDeepNet(Long64_t firstEvt, Long64_t lastEvt, size_t batchSize, Bool_t logProgress)
perform prediction of the deep neural network using batches (called by GetMvaValues)
DNN::EInitialization GetWeightInitialization() const
TString GetLayoutString() const
size_t fBatchDepth
The depth of the batch used to train the deep net.
TMVA::DNN::TDeepNet< ArchitectureImpl_t > DeepNetImpl_t
size_t GetBatchWidth() const
void AddWeightsXMLTo(void *parent) const
typename ArchitectureImpl_t::Matrix_t MatrixImpl_t
Double_t GetMvaValue(Double_t *err=0, Double_t *errUpper=0)
virtual ~MethodDL()
Virtual Destructor.
void ParseInputLayout()
Parse the input layout.
bool fBuildNet
Flag to control whether to build fNet, the stored network used for the evaluation.
void CreateDeepNet(DNN::TDeepNet< Architecture_t, Layer_t > &deepNet, std::vector< DNN::TDeepNet< Architecture_t, Layer_t > > &nets)
After calling the ProcesOptions(), all of the options are parsed, so using the parsed options,...
TString fErrorStrategy
The string defining the error strategy for training.
void DeclareOptions()
The option handling methods.
TString fInputLayoutString
The string defining the layout of the input.
EMsgType GetMinType() const
Ranking for variables in method (implementation)
Timing information for training and evaluation of MVA methods.
TString GetElapsedTime(Bool_t Scientific=kTRUE)
returns pretty string with elapsed time
Singleton class for Global types used by TMVA.
void Print(Option_t *name="") const
Print the matrix as a table of elements.
virtual void Print(Option_t *option="") const
Print TNamed name and title.
Collectable string class.
const TString & GetString() const
virtual void Error(const char *method, const char *msgfmt,...) const
Issue error message.
virtual void Print(Option_t *option="") const
This method must be overridden when a class wants to print itself.
Int_t Atoi() const
Return integer value of string.
TSubString Strip(EStripType s=kTrailing, char c=' ') const
Return a substring of self stripped at beginning and/or end.
Double_t Atof() const
Return floating-point value contained in string.
Bool_t IsFloat() const
Returns kTRUE if string contains a floating point or integer number.
Ssiz_t First(char c) const
Find first occurrence of a character c.
const char * Data() const
TString & ReplaceAll(const TString &s1, const TString &s2)
void ToUpper()
Change string to upper case.
TObjArray * Tokenize(const TString &delim) const
This function is used to isolate sequential tokens in a TString.
Bool_t BeginsWith(const char *s, ECaseCompare cmp=kExact) const
XMLNodePointer_t GetChild(XMLNodePointer_t xmlnode, Bool_t realnode=kTRUE)
returns first child of xmlnode
XMLNodePointer_t NewChild(XMLNodePointer_t parent, XMLNsPointer_t ns, const char *name, const char *content=0)
create new child element for parent node
const char * GetNodeName(XMLNodePointer_t xmlnode)
returns name of xmlnode
EOptimizer
Enum representing the optimizer used for training.
EOutputFunction
Enum that represents output functions.
double weightDecay(double error, ItWeight itWeight, ItWeight itWeightEnd, double factorWeightDecay, EnumRegularization eRegularization)
compute the weight decay for regularization (L1 or L2)
auto regularization(const typename Architecture_t::Matrix_t &A, ERegularization R) -> decltype(Architecture_t::L1Regularization(A))
Evaluate the regularization functional for a given weight matrix.
ERegularization
Enum representing the regularization type applied for a given layer.
EActivationFunction
Enum that represents layer activation functions.
ELossFunction
Enum that represents objective functions for the net, i.e.
std::tuple< const std::vector< Event * > &, const DataSetInfo & > TMVAInput_t
create variable transformations
TString fetchValueTmp(const std::map< TString, TString > &keyValueMap, TString key)
MsgLogger & Endl(MsgLogger &ml)
All of the options that can be specified in the training string.
DNN::EOptimizer optimizer
DNN::ERegularization regularization
std::vector< Double_t > dropoutProbabilities
static void output(int code)