115 fROC(
kTRUE), fSilentFile(theTargetFile == nullptr), fJobName(jobName), fAnalysisType(
Types::kClassification),
116 fModelPersistence(
kTRUE)
148 DeclareOptionRef(color,
"Color",
"Flag for coloured screen output (default: True, if in batch mode: False)");
151 "List of transformations to test; formatting example: \"Transformations=I;D;P;U;G,D\", for identity, "
152 "decorrelation, PCA, Uniform and Gaussianisation followed by decorrelation transformations");
156 "Batch mode: boolean silent flag inhibiting any output from TMVA after the creation of the factory "
157 "class object (default: False)");
159 "Draw progress bar to display training, testing and evaluation schedule (default: True)");
161 "Option to save the trained model in xml file or using serialization");
165 "Set the analysis type (Classification, Regression, Multiclass, Auto) (default: Auto)");
189 if (analysisType ==
"classification")
191 else if (analysisType ==
"regression")
193 else if (analysisType ==
"multiclass")
195 else if (analysisType ==
"auto")
206 fSilentFile(
kTRUE), fJobName(jobName), fAnalysisType(
Types::kClassification), fModelPersistence(
kTRUE)
238 DeclareOptionRef(color,
"Color",
"Flag for coloured screen output (default: True, if in batch mode: False)");
241 "List of transformations to test; formatting example: \"Transformations=I;D;P;U;G,D\", for identity, "
242 "decorrelation, PCA, Uniform and Gaussianisation followed by decorrelation transformations");
246 "Batch mode: boolean silent flag inhibiting any output from TMVA after the creation of the factory "
247 "class object (default: False)");
249 "Draw progress bar to display training, testing and evaluation schedule (default: True)");
251 "Option to save the trained model in xml file or using serialization");
255 "Set the analysis type (Classification, Regression, Multiclass, Auto) (default: Auto)");
279 if (analysisType ==
"classification")
281 else if (analysisType ==
"regression")
283 else if (analysisType ==
"multiclass")
285 else if (analysisType ==
"auto")
308 std::vector<TMVA::VariableTransformBase *>::iterator trfIt = fDefaultTrfs.begin();
309 for (; trfIt != fDefaultTrfs.end(); ++trfIt)
312 this->DeleteAllMethods();
326 std::map<TString, MVector *>::iterator itrMap;
328 for (itrMap = fMethodsMap.begin(); itrMap != fMethodsMap.end(); ++itrMap) {
329 MVector *methods = itrMap->second;
331 MVector::iterator itrMethod = methods->begin();
332 for (; itrMethod != methods->end(); ++itrMethod) {
333 Log() << kDEBUG <<
"Delete method: " << (*itrMethod)->GetName() <<
Endl;
354 if (fModelPersistence)
373 if (fMethodsMap.find(datasetname) != fMethodsMap.end()) {
374 if (GetMethod(datasetname, methodTitle) != 0) {
375 Log() << kFATAL <<
"Booking failed since method with title <" << methodTitle <<
"> already exists "
376 <<
"in with DataSet Name <" << loader->
GetName() <<
"> " <<
Endl;
380 Log() << kHEADER <<
"Booking method: " <<
gTools().
Color(
"bold")
388 conf->
DeclareOptionRef(boostNum = 0,
"Boost_num",
"Number of times the classifier will be boosted");
393 if (fModelPersistence) {
398 if (fileDir[fileDir.
Length() - 1] !=
'/')
410 Log() << kDEBUG <<
"Boost Number is " << boostNum <<
" > 0: train boosted classifier" <<
Endl;
414 Log() << kFATAL <<
"Method with type kBoost cannot be casted to MethodCategory. /Factory" <<
Endl;
417 if (fModelPersistence)
422 methBoost->
SetFile(fgTargetFile);
434 Log() << kFATAL <<
"Method with type kCategory cannot be casted to MethodCategory. /Factory"
438 if (fModelPersistence)
442 methCat->
SetFile(fgTargetFile);
448 Log() << kWARNING <<
"Method " << method->
GetMethodTypeName() <<
" is not capable of handling ";
459 if (fModelPersistence)
472 if (fMethodsMap.find(datasetname) == fMethodsMap.end()) {
474 fMethodsMap[datasetname] = mvector;
476 fMethodsMap[datasetname]->push_back(method);
489 return BookMethod(loader,
Types::Instance().GetMethodName(theMethod), methodTitle, theOption);
504 std::string methodTypeName = std::string(
Types::Instance().GetMethodName(methodType).Data());
510 if (method ==
nullptr)
514 Log() << kERROR <<
"Cannot handle category methods for now." <<
Endl;
518 if (fModelPersistence) {
523 if (fileDir[fileDir.
Length() - 1] !=
'/')
529 if (fModelPersistence)
545 if (HasMethod(datasetname, methodTitle) != 0) {
546 Log() << kFATAL <<
"Booking failed since method with title <" << methodTitle <<
"> already exists "
547 <<
"in with DataSet Name <" << loader->
GetName() <<
"> " <<
Endl;
550 Log() << kINFO <<
"Booked classifier \"" << method->
GetMethodName() <<
"\" of type: \""
553 if (fMethodsMap.count(datasetname) == 0) {
555 fMethodsMap[datasetname] = mvector;
558 fMethodsMap[datasetname]->push_back(method);
568 if (fMethodsMap.find(datasetname) == fMethodsMap.end())
571 MVector *methods = fMethodsMap.find(datasetname)->second;
573 MVector::const_iterator itrMethod;
575 for (itrMethod = methods->begin(); itrMethod != methods->end(); ++itrMethod) {
588 if (fMethodsMap.find(datasetname) == fMethodsMap.end())
591 std::string methodName = methodTitle.
Data();
592 auto isEqualToMethodName = [&methodName](
TMVA::IMethod *
m) {
return (0 == methodName.compare(
m->
GetName())); };
595 Bool_t isMethodNameExisting = std::any_of(methods->begin(), methods->end(), isEqualToMethodName);
597 return isMethodNameExisting;
606 if (!RootBaseDir()->GetDirectory(fDataSetInfo.
GetName()))
607 RootBaseDir()->mkdir(fDataSetInfo.
GetName());
611 RootBaseDir()->cd(fDataSetInfo.
GetName());
657 processTrfs = fTransformations;
660 std::vector<TMVA::TransformationHandler *> trfs;
664 std::vector<TString>::iterator trfsDefIt = trfsDef.begin();
665 for (; trfsDefIt != trfsDef.end(); ++trfsDefIt) {
670 Log() << kDEBUG <<
"current transformation string: '" << trfS.
Data() <<
"'" <<
Endl;
674 identityTrHandler = trfs.back();
680 std::vector<TMVA::TransformationHandler *>::iterator trfIt = trfs.begin();
682 for (; trfIt != trfs.end(); ++trfIt) {
684 (*trfIt)->SetRootDir(RootBaseDir()->GetDirectory(fDataSetInfo.
GetName()));
685 (*trfIt)->CalcTransformations(inputEvents);
687 if (identityTrHandler)
691 for (trfIt = trfs.begin(); trfIt != trfs.end(); ++trfIt)
704 std::map<TString, MVector *>::iterator itrMap;
705 std::map<TString, Double_t> TunedParameters;
706 for (itrMap = fMethodsMap.begin(); itrMap != fMethodsMap.end(); ++itrMap) {
707 MVector *methods = itrMap->second;
709 MVector::iterator itrMethod;
712 for (itrMethod = methods->begin(); itrMethod != methods->end(); ++itrMethod) {
716 Log() << kFATAL <<
"Dynamic cast to MethodBase failed" <<
Endl;
717 return TunedParameters;
721 Log() << kWARNING <<
"Method " << mva->
GetMethodName() <<
" not trained (training tree has less entries ["
726 Log() << kINFO <<
"Optimize method: " << mva->
GetMethodName() <<
" for "
729 : (fAnalysisType ==
Types::kMulticlass ?
"Multiclass classification" :
"Classification"))
733 Log() << kINFO <<
"Optimization of tuning parameters finished for Method:" << mva->
GetName() <<
Endl;
737 return TunedParameters;
764 if (fMethodsMap.find(datasetname) == fMethodsMap.end()) {
765 Log() << kERROR <<
Form(
"DataSet = %s not found in methods map.", datasetname.
Data()) <<
Endl;
769 if (!this->HasMethod(datasetname, theMethodName)) {
770 Log() << kERROR <<
Form(
"Method = %s not found with Dataset = %s ", theMethodName.
Data(), datasetname.
Data())
776 if (allowedAnalysisTypes.count(this->fAnalysisType) == 0) {
777 Log() << kERROR <<
Form(
"Can only generate ROC curves for analysis type kClassification and kMulticlass.")
784 dataset->SetCurrentType(
type);
785 TMVA::Results *results = dataset->GetResults(theMethodName,
type, this->fAnalysisType);
790 <<
Form(
"Given class number (iClass = %i) does not exist. There are %i classes in dataset.", iClass,
800 std::vector<Bool_t> *mvaResTypes =
dynamic_cast<ResultsClassification *
>(results)->GetValueVectorTypes();
801 std::vector<Float_t> mvaResWeights;
803 auto eventCollection = dataset->GetEventCollection(
type);
804 mvaResWeights.reserve(eventCollection.size());
805 for (
auto ev : eventCollection) {
806 mvaResWeights.push_back(ev->GetWeight());
809 rocCurve =
new TMVA::ROCCurve(*mvaRes, *mvaResTypes, mvaResWeights);
812 std::vector<Float_t> mvaRes;
813 std::vector<Bool_t> mvaResTypes;
814 std::vector<Float_t> mvaResWeights;
816 std::vector<std::vector<Float_t>> *rawMvaRes =
dynamic_cast<ResultsMulticlass *
>(results)->GetValueVector();
821 mvaRes.reserve(rawMvaRes->size());
822 for (
auto item : *rawMvaRes) {
823 mvaRes.push_back(item[iClass]);
826 auto eventCollection = dataset->GetEventCollection(
type);
827 mvaResTypes.reserve(eventCollection.size());
828 mvaResWeights.reserve(eventCollection.size());
829 for (
auto ev : eventCollection) {
830 mvaResTypes.push_back(ev->GetClass() == iClass);
831 mvaResWeights.push_back(ev->GetWeight());
834 rocCurve =
new TMVA::ROCCurve(mvaRes, mvaResTypes, mvaResWeights);
864 if (fMethodsMap.find(datasetname) == fMethodsMap.end()) {
865 Log() << kERROR <<
Form(
"DataSet = %s not found in methods map.", datasetname.
Data()) <<
Endl;
869 if (!this->HasMethod(datasetname, theMethodName)) {
870 Log() << kERROR <<
Form(
"Method = %s not found with Dataset = %s ", theMethodName.
Data(), datasetname.
Data())
876 if (allowedAnalysisTypes.count(this->fAnalysisType) == 0) {
877 Log() << kERROR <<
Form(
"Can only generate ROC integral for analysis type kClassification. and kMulticlass.")
885 <<
Form(
"ROCCurve object was not created in Method = %s not found with Dataset = %s ", theMethodName.
Data(),
915 return GetROCCurve((
TString)loader->
GetName(), theMethodName, setTitles, iClass,
type);
935 if (fMethodsMap.find(datasetname) == fMethodsMap.end()) {
936 Log() << kERROR <<
Form(
"DataSet = %s not found in methods map.", datasetname.
Data()) <<
Endl;
940 if (!this->HasMethod(datasetname, theMethodName)) {
941 Log() << kERROR <<
Form(
"Method = %s not found with Dataset = %s ", theMethodName.
Data(), datasetname.
Data())
947 if (allowedAnalysisTypes.count(this->fAnalysisType) == 0) {
948 Log() << kERROR <<
Form(
"Can only generate ROC curves for analysis type kClassification and kMulticlass.")
958 <<
Form(
"ROCCurve object was not created in Method = %s not found with Dataset = %s ", theMethodName.
Data(),
968 graph->GetYaxis()->SetTitle(
"Background rejection (Specificity)");
969 graph->GetXaxis()->SetTitle(
"Signal efficiency (Sensitivity)");
1011 MVector *methods = fMethodsMap[datasetname.
Data()];
1012 for (
auto *method_raw : *methods) {
1014 if (method ==
nullptr) {
1023 <<
Form(
"Given class number (iClass = %i) does not exist. There are %i classes in dataset.", iClass,
1031 TGraph *
graph = this->GetROCCurve(datasetname, methodName,
false, iClass,
type);
1032 graph->SetTitle(methodName);
1034 graph->SetLineWidth(2);
1035 graph->SetLineColor(line_color++);
1036 graph->SetFillColor(10);
1042 Log() << kERROR <<
Form(
"No metohds have class %i defined.", iClass) <<
Endl;
1079 if (fMethodsMap.find(datasetname) == fMethodsMap.end()) {
1080 Log() << kERROR <<
Form(
"DataSet = %s not found in methods map.", datasetname.
Data()) <<
Endl;
1088 TMultiGraph *multigraph = this->GetROCCurveAsMultiGraph(datasetname, iClass,
type);
1091 multigraph->
Draw(
"AL");
1105 canvas->
BuildLegend(0.15, 0.15, 0.35, 0.3,
"MVA Method");
1120 if (fMethodsMap.empty()) {
1121 Log() << kINFO <<
"...nothing found to train" <<
Endl;
1127 Log() << kDEBUG <<
"Train all methods for "
1133 std::map<TString, MVector *>::iterator itrMap;
1135 for (itrMap = fMethodsMap.begin(); itrMap != fMethodsMap.end(); ++itrMap) {
1136 MVector *methods = itrMap->second;
1137 MVector::iterator itrMethod;
1140 for (itrMethod = methods->begin(); itrMethod != methods->end(); ++itrMethod) {
1149 Log() << kFATAL <<
"No input data for the training provided!" <<
Endl;
1153 Log() << kFATAL <<
"You want to do regression training without specifying a target." <<
Endl;
1156 Log() << kFATAL <<
"You want to do classification training, but specified less than two classes." <<
Endl;
1159 if (!IsSilentFile())
1163 Log() << kWARNING <<
"Method " << mva->
GetMethodName() <<
" not trained (training tree has less entries ["
1168 Log() << kHEADER <<
"Train method: " << mva->
GetMethodName() <<
" for "
1171 : (fAnalysisType ==
Types::kMulticlass ?
"Multiclass classification" :
"Classification"))
1174 Log() << kHEADER <<
"Training finished" <<
Endl <<
Endl;
1181 Log() << kINFO <<
"Ranking input variables (method specific)..." <<
Endl;
1182 for (itrMethod = methods->begin(); itrMethod != methods->end(); ++itrMethod) {
1187 const Ranking *ranking = (*itrMethod)->CreateRanking();
1191 Log() << kINFO <<
"No variable ranking supplied by classifier: "
1198 if (!IsSilentFile()) {
1199 for (
UInt_t i = 0; i < methods->size(); i++) {
1204 m->fTrainHistory.SaveHistory(
m->GetMethodName());
1212 if (fModelPersistence) {
1214 Log() << kHEADER <<
"=== Destroy and recreate all methods via weight files for testing ===" <<
Endl <<
Endl;
1216 if (!IsSilentFile())
1217 RootBaseDir()->cd();
1220 for (
UInt_t i = 0; i < methods->size(); i++) {
1227 TString weightfile =
m->GetWeightFileName();
1234 TString testvarName =
m->GetTestvarName();
1239 Types::Instance().GetMethodName(methodType).Data(), dataSetInfo, weightfile));
1243 Log() << kFATAL <<
"Method with type kCategory cannot be casted to MethodCategory. /Factory" <<
Endl;
1251 m->SetWeightFileDir(wfileDir);
1252 m->SetModelPersistence(fModelPersistence);
1253 m->SetSilentFile(IsSilentFile());
1254 m->SetAnalysisType(fAnalysisType);
1256 m->ReadStateFromFile();
1257 m->SetTestvarName(testvarName);
1276 if (fMethodsMap.empty()) {
1277 Log() << kINFO <<
"...nothing found to test" <<
Endl;
1280 std::map<TString, MVector *>::iterator itrMap;
1282 for (itrMap = fMethodsMap.begin(); itrMap != fMethodsMap.end(); ++itrMap) {
1283 MVector *methods = itrMap->second;
1284 MVector::iterator itrMethod;
1287 for (itrMethod = methods->begin(); itrMethod != methods->end(); ++itrMethod) {
1293 Log() << kHEADER <<
"Test method: " << mva->
GetMethodName() <<
" for "
1296 : (analysisType ==
Types::kMulticlass ?
"Multiclass classification" :
"Classification"))
1307 if (methodTitle !=
"") {
1308 IMethod *method = GetMethod(datasetname, methodTitle);
1312 Log() << kWARNING <<
"<MakeClass> Could not find classifier \"" << methodTitle <<
"\" in list" <<
Endl;
1317 MVector *methods = fMethodsMap.find(datasetname)->second;
1318 MVector::const_iterator itrMethod;
1319 for (itrMethod = methods->begin(); itrMethod != methods->end(); ++itrMethod) {
1323 Log() << kINFO <<
"Make response class for classifier: " << method->
GetMethodName() <<
Endl;
1335 if (methodTitle !=
"") {
1336 IMethod *method = GetMethod(datasetname, methodTitle);
1340 Log() << kWARNING <<
"<PrintHelpMessage> Could not find classifier \"" << methodTitle <<
"\" in list" <<
Endl;
1345 MVector *methods = fMethodsMap.find(datasetname)->second;
1346 MVector::const_iterator itrMethod;
1347 for (itrMethod = methods->begin(); itrMethod != methods->end(); ++itrMethod) {
1351 Log() << kINFO <<
"Print help message for classifier: " << method->
GetMethodName() <<
Endl;
1362 Log() << kINFO <<
"Evaluating all variables..." <<
Endl;
1369 this->BookMethod(loader,
"Variable", s);
1381 if (fMethodsMap.empty()) {
1382 Log() << kINFO <<
"...nothing found to evaluate" <<
Endl;
1385 std::map<TString, MVector *>::iterator itrMap;
1387 for (itrMap = fMethodsMap.begin(); itrMap != fMethodsMap.end(); ++itrMap) {
1388 MVector *methods = itrMap->second;
1398 Int_t nmeth_used[2] = {0, 0};
1400 std::vector<std::vector<TString>> mname(2);
1401 std::vector<std::vector<Double_t>> sig(2), sep(2), roc(2);
1402 std::vector<std::vector<Double_t>> eff01(2), eff10(2), eff30(2), effArea(2);
1403 std::vector<std::vector<Double_t>> eff01err(2), eff10err(2), eff30err(2);
1404 std::vector<std::vector<Double_t>> trainEff01(2), trainEff10(2), trainEff30(2);
1406 std::vector<std::vector<Float_t>> multiclass_testEff;
1407 std::vector<std::vector<Float_t>> multiclass_trainEff;
1408 std::vector<std::vector<Float_t>> multiclass_testPur;
1409 std::vector<std::vector<Float_t>> multiclass_trainPur;
1411 std::vector<std::vector<Float_t>> train_history;
1414 std::vector<TMatrixD> multiclass_trainConfusionEffB01;
1415 std::vector<TMatrixD> multiclass_trainConfusionEffB10;
1416 std::vector<TMatrixD> multiclass_trainConfusionEffB30;
1417 std::vector<TMatrixD> multiclass_testConfusionEffB01;
1418 std::vector<TMatrixD> multiclass_testConfusionEffB10;
1419 std::vector<TMatrixD> multiclass_testConfusionEffB30;
1421 std::vector<std::vector<Double_t>> biastrain(1);
1422 std::vector<std::vector<Double_t>> biastest(1);
1423 std::vector<std::vector<Double_t>> devtrain(1);
1424 std::vector<std::vector<Double_t>> devtest(1);
1425 std::vector<std::vector<Double_t>> rmstrain(1);
1426 std::vector<std::vector<Double_t>> rmstest(1);
1427 std::vector<std::vector<Double_t>> minftrain(1);
1428 std::vector<std::vector<Double_t>> minftest(1);
1429 std::vector<std::vector<Double_t>> rhotrain(1);
1430 std::vector<std::vector<Double_t>> rhotest(1);
1433 std::vector<std::vector<Double_t>> biastrainT(1);
1434 std::vector<std::vector<Double_t>> biastestT(1);
1435 std::vector<std::vector<Double_t>> devtrainT(1);
1436 std::vector<std::vector<Double_t>> devtestT(1);
1437 std::vector<std::vector<Double_t>> rmstrainT(1);
1438 std::vector<std::vector<Double_t>> rmstestT(1);
1439 std::vector<std::vector<Double_t>> minftrainT(1);
1440 std::vector<std::vector<Double_t>> minftestT(1);
1449 for (MVector::iterator itrMethod = methods->begin(); itrMethod != methods->end(); ++itrMethod) {
1454 theMethod->
SetFile(fgTargetFile);
1457 methodsNoCuts.push_back(*itrMethod);
1460 doRegression =
kTRUE;
1462 Log() << kINFO <<
"Evaluate regression method: " << theMethod->
GetMethodName() <<
Endl;
1467 Log() << kINFO <<
"TestRegression (testing)" <<
Endl;
1469 biastest[0].push_back(bias);
1470 devtest[0].push_back(dev);
1471 rmstest[0].push_back(rms);
1472 minftest[0].push_back(mInf);
1473 rhotest[0].push_back(rho);
1474 biastestT[0].push_back(biasT);
1475 devtestT[0].push_back(devT);
1476 rmstestT[0].push_back(rmsT);
1477 minftestT[0].push_back(mInfT);
1479 Log() << kINFO <<
"TestRegression (training)" <<
Endl;
1481 biastrain[0].push_back(bias);
1482 devtrain[0].push_back(dev);
1483 rmstrain[0].push_back(rms);
1484 minftrain[0].push_back(mInf);
1485 rhotrain[0].push_back(rho);
1486 biastrainT[0].push_back(biasT);
1487 devtrainT[0].push_back(devT);
1488 rmstrainT[0].push_back(rmsT);
1489 minftrainT[0].push_back(mInfT);
1493 if (!IsSilentFile()) {
1494 Log() << kDEBUG <<
"\tWrite evaluation histograms to file" <<
Endl;
1502 doMulticlass =
kTRUE;
1503 Log() << kINFO <<
"Evaluate multiclass classification method: " << theMethod->
GetMethodName() <<
Endl;
1522 if (!IsSilentFile()) {
1523 Log() << kDEBUG <<
"\tWrite evaluation histograms to file" <<
Endl;
1546 eff01err[isel].push_back(err);
1548 eff10err[isel].push_back(err);
1550 eff30err[isel].push_back(err);
1553 trainEff01[isel].push_back(
1560 if (!IsSilentFile()) {
1561 Log() << kDEBUG <<
"\tWrite evaluation histograms to file" <<
Endl;
1569 std::vector<TString> vtemps = mname[0];
1570 std::vector<std::vector<Double_t>> vtmp;
1571 vtmp.push_back(devtest[0]);
1572 vtmp.push_back(devtrain[0]);
1573 vtmp.push_back(biastest[0]);
1574 vtmp.push_back(biastrain[0]);
1575 vtmp.push_back(rmstest[0]);
1576 vtmp.push_back(rmstrain[0]);
1577 vtmp.push_back(minftest[0]);
1578 vtmp.push_back(minftrain[0]);
1579 vtmp.push_back(rhotest[0]);
1580 vtmp.push_back(rhotrain[0]);
1581 vtmp.push_back(devtestT[0]);
1582 vtmp.push_back(devtrainT[0]);
1583 vtmp.push_back(biastestT[0]);
1584 vtmp.push_back(biastrainT[0]);
1585 vtmp.push_back(rmstestT[0]);
1586 vtmp.push_back(rmstrainT[0]);
1587 vtmp.push_back(minftestT[0]);
1588 vtmp.push_back(minftrainT[0]);
1591 devtest[0] = vtmp[0];
1592 devtrain[0] = vtmp[1];
1593 biastest[0] = vtmp[2];
1594 biastrain[0] = vtmp[3];
1595 rmstest[0] = vtmp[4];
1596 rmstrain[0] = vtmp[5];
1597 minftest[0] = vtmp[6];
1598 minftrain[0] = vtmp[7];
1599 rhotest[0] = vtmp[8];
1600 rhotrain[0] = vtmp[9];
1601 devtestT[0] = vtmp[10];
1602 devtrainT[0] = vtmp[11];
1603 biastestT[0] = vtmp[12];
1604 biastrainT[0] = vtmp[13];
1605 rmstestT[0] = vtmp[14];
1606 rmstrainT[0] = vtmp[15];
1607 minftestT[0] = vtmp[16];
1608 minftrainT[0] = vtmp[17];
1609 }
else if (doMulticlass) {
1616 for (
Int_t k = 0; k < 2; k++) {
1617 std::vector<std::vector<Double_t>> vtemp;
1618 vtemp.push_back(effArea[k]);
1619 vtemp.push_back(eff10[k]);
1620 vtemp.push_back(eff01[k]);
1621 vtemp.push_back(eff30[k]);
1622 vtemp.push_back(eff10err[k]);
1623 vtemp.push_back(eff01err[k]);
1624 vtemp.push_back(eff30err[k]);
1625 vtemp.push_back(trainEff10[k]);
1626 vtemp.push_back(trainEff01[k]);
1627 vtemp.push_back(trainEff30[k]);
1628 vtemp.push_back(sig[k]);
1629 vtemp.push_back(sep[k]);
1630 vtemp.push_back(roc[k]);
1631 std::vector<TString> vtemps = mname[k];
1633 effArea[k] = vtemp[0];
1634 eff10[k] = vtemp[1];
1635 eff01[k] = vtemp[2];
1636 eff30[k] = vtemp[3];
1637 eff10err[k] = vtemp[4];
1638 eff01err[k] = vtemp[5];
1639 eff30err[k] = vtemp[6];
1640 trainEff10[k] = vtemp[7];
1641 trainEff01[k] = vtemp[8];
1642 trainEff30[k] = vtemp[9];
1656 if (fCorrelations) {
1657 const Int_t nmeth = methodsNoCuts.size();
1660 if (!doRegression && !doMulticlass) {
1666 std::vector<Double_t> rvec;
1674 std::vector<TString> *theVars =
new std::vector<TString>;
1675 std::vector<ResultsClassification *> mvaRes;
1676 for (MVector::iterator itrMethod = methodsNoCuts.begin(); itrMethod != methodsNoCuts.end();
1677 ++itrMethod, ++ivar) {
1681 theVars->push_back(
m->GetTestvarName());
1682 rvec.push_back(
m->GetSignalReferenceCut());
1683 theVars->back().ReplaceAll(
"MVA_",
"");
1702 for (
Int_t im = 0; im < nmeth; im++) {
1706 Log() << kWARNING <<
"Found NaN return value in event: " << ievt <<
" for method \""
1707 << methodsNoCuts[im]->GetName() <<
"\"" <<
Endl;
1712 for (
Int_t iv = 0; iv < nvar; iv++)
1723 for (
Int_t im = 0; im < nmeth; im++) {
1724 for (
Int_t jm = im; jm < nmeth; jm++) {
1725 if ((dvec[im] - rvec[im]) * (dvec[jm] - rvec[jm]) > 0) {
1726 (*theMat)(im, jm)++;
1728 (*theMat)(jm, im)++;
1748 if (corrMatS != 0 && corrMatB != 0) {
1753 for (
Int_t im = 0; im < nmeth; im++) {
1754 for (
Int_t jm = 0; jm < nmeth; jm++) {
1755 mvaMatS(im, jm) = (*corrMatS)(im, jm);
1756 mvaMatB(im, jm) = (*corrMatB)(im, jm);
1761 std::vector<TString> theInputVars;
1764 for (
Int_t iv = 0; iv < nvar; iv++) {
1766 for (
Int_t jm = 0; jm < nmeth; jm++) {
1767 varmvaMatS(iv, jm) = (*corrMatS)(nmeth + iv, jm);
1768 varmvaMatB(iv, jm) = (*corrMatB)(nmeth + iv, jm);
1773 Log() << kINFO <<
Endl;
1775 <<
"Inter-MVA correlation matrix (signal):" <<
Endl;
1777 Log() << kINFO <<
Endl;
1780 <<
"Inter-MVA correlation matrix (background):" <<
Endl;
1782 Log() << kINFO <<
Endl;
1786 <<
"Correlations between input variables and MVA response (signal):" <<
Endl;
1788 Log() << kINFO <<
Endl;
1791 <<
"Correlations between input variables and MVA response (background):" <<
Endl;
1793 Log() << kINFO <<
Endl;
1796 <<
"<TestAllMethods> cannot compute correlation matrices" <<
Endl;
1800 <<
"The following \"overlap\" matrices contain the fraction of events for which " <<
Endl;
1802 <<
"the MVAs 'i' and 'j' have returned conform answers about \"signal-likeness\"" <<
Endl;
1804 <<
"An event is signal-like, if its MVA output exceeds the following value:" <<
Endl;
1807 <<
"which correspond to the working point: eff(signal) = 1 - eff(background)" <<
Endl;
1810 if (nmeth != (
Int_t)methods->size())
1812 <<
"Note: no correlations and overlap with cut method are provided at present" <<
Endl;
1815 Log() << kINFO <<
Endl;
1817 <<
"Inter-MVA overlap matrix (signal):" <<
Endl;
1819 Log() << kINFO <<
Endl;
1822 <<
"Inter-MVA overlap matrix (background):" <<
Endl;
1845 Log() << kINFO <<
Endl;
1847 "--------------------------------------------------------------------------------------------------";
1848 Log() << kINFO <<
"Evaluation results ranked by smallest RMS on test sample:" <<
Endl;
1849 Log() << kINFO <<
"(\"Bias\" quotes the mean deviation of the regression from true target." <<
Endl;
1850 Log() << kINFO <<
" \"MutInf\" is the \"Mutual Information\" between regression and target." <<
Endl;
1851 Log() << kINFO <<
" Indicated by \"_T\" are the corresponding \"truncated\" quantities ob-" <<
Endl;
1852 Log() << kINFO <<
" tained when removing events deviating more than 2sigma from average.)" <<
Endl;
1853 Log() << kINFO << hLine <<
Endl;
1856 Log() << kINFO << hLine <<
Endl;
1858 for (
Int_t i = 0; i < nmeth_used[0]; i++) {
1865 (
const char *)mname[0][i], biastest[0][i], biastestT[0][i], rmstest[0][i], rmstestT[0][i],
1866 minftest[0][i], minftestT[0][i])
1869 Log() << kINFO << hLine <<
Endl;
1870 Log() << kINFO <<
Endl;
1871 Log() << kINFO <<
"Evaluation results ranked by smallest RMS on training sample:" <<
Endl;
1872 Log() << kINFO <<
"(overtraining check)" <<
Endl;
1873 Log() << kINFO << hLine <<
Endl;
1875 <<
"DataSet Name: MVA Method: <Bias> <Bias_T> RMS RMS_T | MutInf MutInf_T"
1877 Log() << kINFO << hLine <<
Endl;
1879 for (
Int_t i = 0; i < nmeth_used[0]; i++) {
1885 (
const char *)mname[0][i], biastrain[0][i], biastrainT[0][i], rmstrain[0][i], rmstrainT[0][i],
1886 minftrain[0][i], minftrainT[0][i])
1889 Log() << kINFO << hLine <<
Endl;
1890 Log() << kINFO <<
Endl;
1891 }
else if (doMulticlass) {
1897 "-------------------------------------------------------------------------------------------------------";
1936 TString header1 =
TString::Format(
"%-15s%-15s%-15s%-15s%-15s%-15s",
"Dataset",
"MVA Method",
"ROC AUC",
"Sig eff@B=0.01",
1937 "Sig eff@B=0.10",
"Sig eff@B=0.30");
1938 TString header2 =
TString::Format(
"%-15s%-15s%-15s%-15s%-15s%-15s",
"Name:",
"/ Class:",
"test (train)",
"test (train)",
1939 "test (train)",
"test (train)");
1940 Log() << kINFO <<
Endl;
1941 Log() << kINFO <<
"1-vs-rest performance metrics per class" <<
Endl;
1942 Log() << kINFO << hLine <<
Endl;
1943 Log() << kINFO <<
Endl;
1944 Log() << kINFO <<
"Considers the listed class as signal and the other classes" <<
Endl;
1945 Log() << kINFO <<
"as background, reporting the resulting binary performance." <<
Endl;
1946 Log() << kINFO <<
"A score of 0.820 (0.850) means 0.820 was acheived on the" <<
Endl;
1947 Log() << kINFO <<
"test set and 0.850 on the training set." <<
Endl;
1949 Log() << kINFO <<
Endl;
1950 Log() << kINFO << header1 <<
Endl;
1951 Log() << kINFO << header2 <<
Endl;
1952 for (
Int_t k = 0; k < 2; k++) {
1953 for (
Int_t i = 0; i < nmeth_used[k]; i++) {
1955 mname[k][i].ReplaceAll(
"Variable_",
"");
1958 const TString datasetName = itrMap->first;
1959 const TString mvaName = mname[k][i];
1962 if (theMethod == 0) {
1966 Log() << kINFO <<
Endl;
1968 Log() << kINFO << row <<
Endl;
1969 Log() << kINFO <<
"------------------------------" <<
Endl;
1972 for (
UInt_t iClass = 0; iClass < numClasses; ++iClass) {
1991 effB10Cmp.
Data(), effB30Cmp.
Data());
1992 Log() << kINFO << row <<
Endl;
1994 delete rocCurveTrain;
1995 delete rocCurveTest;
1999 Log() << kINFO <<
Endl;
2000 Log() << kINFO << hLine <<
Endl;
2001 Log() << kINFO <<
Endl;
2015 for (
UInt_t iCol = 0; iCol < numClasses; ++iCol) {
2019 stream << kINFO << header <<
Endl;
2020 stream << kINFO << headerInfo <<
Endl;
2022 for (
UInt_t iRow = 0; iRow < numClasses; ++iRow) {
2023 stream << kINFO <<
TString::Format(
" %-14s", classnames[iRow].Data());
2025 for (
UInt_t iCol = 0; iCol < numClasses; ++iCol) {
2029 Double_t trainValue = matTraining[iRow][iCol];
2030 Double_t testValue = matTesting[iRow][iCol];
2035 stream << kINFO <<
Endl;
2039 Log() << kINFO <<
Endl;
2040 Log() << kINFO <<
"Confusion matrices for all methods" <<
Endl;
2041 Log() << kINFO << hLine <<
Endl;
2042 Log() << kINFO <<
Endl;
2043 Log() << kINFO <<
"Does a binary comparison between the two classes given by a " <<
Endl;
2044 Log() << kINFO <<
"particular row-column combination. In each case, the class " <<
Endl;
2045 Log() << kINFO <<
"given by the row is considered signal while the class given " <<
Endl;
2046 Log() << kINFO <<
"by the column index is considered background." <<
Endl;
2047 Log() << kINFO <<
Endl;
2048 for (
UInt_t iMethod = 0; iMethod < methods->size(); ++iMethod) {
2050 if (theMethod ==
nullptr) {
2055 std::vector<TString> classnames;
2056 for (
UInt_t iCls = 0; iCls < numClasses; ++iCls) {
2060 <<
"=== Showing confusion matrix for method : " <<
Form(
"%-15s", (
const char *)mname[0][iMethod])
2062 Log() << kINFO <<
"(Signal Efficiency for Background Efficiency 0.01%)" <<
Endl;
2063 Log() << kINFO <<
"---------------------------------------------------" <<
Endl;
2064 printMatrix(multiclass_testConfusionEffB01[iMethod], multiclass_trainConfusionEffB01[iMethod], classnames,
2066 Log() << kINFO <<
Endl;
2068 Log() << kINFO <<
"(Signal Efficiency for Background Efficiency 0.10%)" <<
Endl;
2069 Log() << kINFO <<
"---------------------------------------------------" <<
Endl;
2070 printMatrix(multiclass_testConfusionEffB10[iMethod], multiclass_trainConfusionEffB10[iMethod], classnames,
2072 Log() << kINFO <<
Endl;
2074 Log() << kINFO <<
"(Signal Efficiency for Background Efficiency 0.30%)" <<
Endl;
2075 Log() << kINFO <<
"---------------------------------------------------" <<
Endl;
2076 printMatrix(multiclass_testConfusionEffB30[iMethod], multiclass_trainConfusionEffB30[iMethod], classnames,
2078 Log() << kINFO <<
Endl;
2080 Log() << kINFO << hLine <<
Endl;
2081 Log() << kINFO <<
Endl;
2086 Log().EnableOutput();
2089 TString hLine =
"------------------------------------------------------------------------------------------"
2090 "-------------------------";
2091 Log() << kINFO <<
"Evaluation results ranked by best signal efficiency and purity (area)" <<
Endl;
2092 Log() << kINFO << hLine <<
Endl;
2093 Log() << kINFO <<
"DataSet MVA " <<
Endl;
2094 Log() << kINFO <<
"Name: Method: ROC-integ" <<
Endl;
2099 Log() << kDEBUG << hLine <<
Endl;
2100 for (
Int_t k = 0; k < 2; k++) {
2101 if (k == 1 && nmeth_used[k] > 0) {
2102 Log() << kINFO << hLine <<
Endl;
2103 Log() << kINFO <<
"Input Variables: " <<
Endl << hLine <<
Endl;
2105 for (
Int_t i = 0; i < nmeth_used[k]; i++) {
2106 TString datasetName = itrMap->first;
2107 TString methodName = mname[k][i];
2114 if (theMethod == 0) {
2120 std::vector<Bool_t> *mvaResType =
2124 if (mvaResType->size() != 0) {
2125 rocIntegral = GetROCIntegral(datasetName, methodName);
2128 if (sep[k][i] < 0 || sig[k][i] < 0) {
2130 Log() << kINFO <<
Form(
"%-13s %-15s: %#1.3f", datasetName.
Data(), methodName.
Data(), effArea[k][i])
2142 Log() << kINFO <<
Form(
"%-13s %-15s: %#1.3f", datasetName.
Data(), methodName.
Data(), rocIntegral)
2156 Log() << kINFO << hLine <<
Endl;
2157 Log() << kINFO <<
Endl;
2158 Log() << kINFO <<
"Testing efficiency compared to training efficiency (overtraining check)" <<
Endl;
2159 Log() << kINFO << hLine <<
Endl;
2161 <<
"DataSet MVA Signal efficiency: from test sample (from training sample) "
2163 Log() << kINFO <<
"Name: Method: @B=0.01 @B=0.10 @B=0.30 "
2165 Log() << kINFO << hLine <<
Endl;
2166 for (
Int_t k = 0; k < 2; k++) {
2167 if (k == 1 && nmeth_used[k] > 0) {
2168 Log() << kINFO << hLine <<
Endl;
2169 Log() << kINFO <<
"Input Variables: " <<
Endl << hLine <<
Endl;
2171 for (
Int_t i = 0; i < nmeth_used[k]; i++) {
2173 mname[k][i].ReplaceAll(
"Variable_",
"");
2179 <<
Form(
"%-20s %-15s: %#1.3f (%#1.3f) %#1.3f (%#1.3f) %#1.3f (%#1.3f)",
2181 trainEff01[k][i], eff10[k][i], trainEff10[k][i], eff30[k][i], trainEff30[k][i])
2185 Log() << kINFO << hLine <<
Endl;
2186 Log() << kINFO <<
Endl;
2188 if (
gTools().CheckForSilentOption(GetOptions()))
2189 Log().InhibitOutput();
2192 if (!IsSilentFile()) {
2193 std::list<TString> datasets;
2194 for (
Int_t k = 0; k < 2; k++) {
2195 for (
Int_t i = 0; i < nmeth_used[k]; i++) {
2201 if (std::find(datasets.begin(), datasets.end(), theMethod->
fDataSetInfo.
GetName()) == datasets.end()) {
2218 const char *theOption)
2220 fModelPersistence =
kFALSE;
2221 fSilentFile =
kTRUE;
2225 if (vitype == VIType::kShort)
2226 return EvaluateImportanceShort(loader, theMethod, methodTitle, theOption);
2227 else if (vitype == VIType::kAll)
2228 return EvaluateImportanceAll(loader, theMethod, methodTitle, theOption);
2229 else if (vitype == VIType::kRandom) {
2230 if ( nbits > 10 && nbits < 30) {
2232 return EvaluateImportanceRandom(loader,
static_cast<UInt_t>( pow(2, nbits) ), theMethod, methodTitle, theOption);
2233 }
else if (nbits < 10) {
2234 Log() << kERROR <<
"Error in Variable Importance: Random mode require more that 10 variables in the dataset."
2236 }
else if (nbits > 30) {
2237 Log() << kERROR <<
"Error in Variable Importance: Number of variables is too large for Random mode"
2247 const char *theOption)
2258 Log() << kERROR <<
"Number of combinations is too large , is 2^" << nbits <<
Endl;
2262 Log() << kWARNING <<
"Number of combinations is very large , is 2^" << nbits <<
Endl;
2264 uint64_t range =
static_cast<uint64_t
>(pow(2, nbits));
2268 std::vector<Double_t> importances(nbits);
2270 std::vector<Double_t> ROC(range);
2272 for (
int i = 0; i < nbits; i++)
2276 for (
x = 1;
x < range;
x++) {
2278 std::bitset<VIBITS> xbitset(
x);
2297 BookMethod(seedloader, theMethod, methodTitle, theOption);
2302 EvaluateAllMethods();
2305 ROC[
x] = GetROCIntegral(xbitset.to_string(), methodTitle);
2313 this->DeleteAllMethods();
2315 fMethodsMap.clear();
2319 for (
x = 0;
x < range;
x++) {
2321 for (uint32_t i = 0; i <
VIBITS; ++i) {
2322 if (
x & (uint64_t(1) << i)) {
2324 std::bitset<VIBITS> ybitset(
y);
2328 uint32_t ny =
static_cast<uint32_t
>( log(
x -
y) / 0.693147 ) ;
2330 importances[ny] = SROC - 0.5;
2336 importances[ny] += SROC - SSROC;
2341 std::cout <<
"--- Variable Importance Results (All)" << std::endl;
2342 return GetImportance(nbits, importances, varNames);
2345static uint64_t
sum(uint64_t i)
2348 if (i > 62)
return 0;
2349 return static_cast<uint64_t
>( std::pow(2, i + 1)) - 1;
2359 const char *theOption)
2369 Log() << kERROR <<
"Number of combinations is too large , is 2^" << nbits <<
Endl;
2372 long int range =
sum(nbits);
2375 std::vector<Double_t> importances(nbits);
2376 for (
int i = 0; i < nbits; i++)
2383 std::bitset<VIBITS> xbitset(
x);
2385 Log() << kFATAL <<
"Error: need at least one variable.";
2400 BookMethod(seedloader, theMethod, methodTitle, theOption);
2405 EvaluateAllMethods();
2408 SROC = GetROCIntegral(xbitset.to_string(), methodTitle);
2416 this->DeleteAllMethods();
2417 fMethodsMap.clear();
2421 for (uint32_t i = 0; i <
VIBITS; ++i) {
2423 y =
x & ~(uint64_t(1) << i);
2424 std::bitset<VIBITS> ybitset(
y);
2428 uint32_t ny =
static_cast<uint32_t
>(log(
x -
y) / 0.693147);
2430 importances[ny] = SROC - 0.5;
2446 BookMethod(subseedloader, theMethod, methodTitle, theOption);
2451 EvaluateAllMethods();
2454 SSROC = GetROCIntegral(ybitset.to_string(), methodTitle);
2455 importances[ny] += SROC - SSROC;
2462 delete subseedloader;
2463 this->DeleteAllMethods();
2464 fMethodsMap.clear();
2467 std::cout <<
"--- Variable Importance Results (Short)" << std::endl;
2468 return GetImportance(nbits, importances, varNames);
2474 TString methodTitle,
const char *theOption)
2485 long int range = pow(2, nbits);
2488 std::vector<Double_t> importances(nbits);
2489 for (
int i = 0; i < nbits; i++)
2496 std::bitset<32> xbitset(
x);
2513 BookMethod(seedloader, theMethod, methodTitle, theOption);
2518 EvaluateAllMethods();
2521 SROC = GetROCIntegral(xbitset.to_string(), methodTitle);
2530 this->DeleteAllMethods();
2531 fMethodsMap.clear();
2535 for (uint32_t i = 0; i < 32; ++i) {
2536 if (
x & (uint64_t(1) << i)) {
2538 std::bitset<32> ybitset(
y);
2544 importances[ny] = SROC - 0.5;
2561 BookMethod(subseedloader, theMethod, methodTitle, theOption);
2566 EvaluateAllMethods();
2569 SSROC = GetROCIntegral(ybitset.to_string(), methodTitle);
2570 importances[ny] += SROC - SSROC;
2575 dynamic_cast<TMVA::MethodBase *
>(fMethodsMap[ybitset.to_string().c_str()][0][0]);
2579 delete subseedloader;
2580 this->DeleteAllMethods();
2581 fMethodsMap.clear();
2585 std::cout <<
"--- Variable Importance Results (Random)" << std::endl;
2586 return GetImportance(nbits, importances, varNames);
2593 TH1F *vih1 =
new TH1F(
"vih1",
"", nbits, 0, nbits);
2598 for (
int i = 0; i < nbits; i++) {
2599 normalization = normalization + importances[i];
2607 std::vector<Double_t> x_ie(nbits), y_ie(nbits);
2608 for (
Int_t i = 1; i < nbits + 1; i++) {
2609 x_ie[i - 1] = (i - 1) * 1.;
2610 roc = 100.0 * importances[i - 1] / normalization;
2612 std::cout <<
"--- " << varNames[i - 1] <<
" = " << roc <<
" %" << std::endl;
2616 TGraph *g_ie =
new TGraph(nbits + 2, &x_ie[0], &y_ie[0]);
#define MinNoTrainingEvents
void printMatrix(const TMatrixD &mat)
write a matrix
Option_t Option_t TPoint TPoint const char GetTextMagnitude GetFillStyle GetLineColor GetLineWidth GetMarkerStyle GetTextAlign GetTextColor GetTextSize void char Point_t Rectangle_t WindowAttributes_t index
Option_t Option_t TPoint TPoint const char GetTextMagnitude GetFillStyle GetLineColor GetLineWidth GetMarkerStyle GetTextAlign GetTextColor GetTextSize void char Point_t Rectangle_t WindowAttributes_t Float_t Float_t Float_t Int_t Int_t UInt_t UInt_t Rectangle_t Int_t Int_t Window_t TString Int_t GCValues_t GetPrimarySelectionOwner GetDisplay GetScreen GetColormap GetNativeEvent const char const char dpyName wid window const char font_name cursor keysym reg const char only_if_exist regb h Point_t winding char text const char depth char const char Int_t count const char ColorStruct_t color const char Pixmap_t Pixmap_t PictureAttributes_t attr const char char ret_data h unsigned char height h Atom_t Int_t ULong_t ULong_t unsigned char prop_list Atom_t Atom_t Atom_t Time_t type
TMatrixT< Double_t > TMatrixD
char * Form(const char *fmt,...)
Formats a string in a circular formatting buffer.
R__EXTERN TStyle * gStyle
R__EXTERN TSystem * gSystem
virtual void SetTitleOffset(Float_t offset=1)
Set distance between the axis and the axis title.
virtual void SetTitleSize(Float_t size=0.04)
Set size of axis title.
virtual void SetFillColor(Color_t fcolor)
Set the fill area color.
virtual void SetBinLabel(Int_t bin, const char *label)
Set label for bin.
void CenterTitle(Bool_t center=kTRUE)
Center axis title.
virtual void SetRangeUser(Double_t ufirst, Double_t ulast)
Set the viewing range for the axis from ufirst to ulast (in user coordinates, that is,...
static Int_t GetColor(const char *hexcolor)
Static method returning color number for color specified by hex color string of form: "#rrggbb",...
A ROOT file is an on-disk file, usually with extension .root, that stores objects in a file-system-li...
A TGraph is an object made of two arrays X and Y with npoints each.
void SetTitle(const char *title="") override
Change (i.e.
1-D histogram with a float per channel (see TH1 documentation)
virtual void SetDirectory(TDirectory *dir)
By default, when a histogram is created, it is added to the list of histogram objects in the current ...
void SetTitle(const char *title) override
Change/set the title.
virtual void LabelsOption(Option_t *option="h", Option_t *axis="X")
Sort bins with labels or set option(s) to draw axis with labels.
static void AddDirectory(Bool_t add=kTRUE)
Sets the flag controlling the automatic add of histograms in memory.
virtual void SetBinContent(Int_t bin, Double_t content)
Set bin content see convention for numbering bins in TH1::GetBin In case the bin number is greater th...
virtual void SetBarWidth(Float_t width=0.5)
Set the width of bars as fraction of the bin width for drawing mode "B".
Service class for 2-D histogram classes.
IMethod * Create(const std::string &name, const TString &job, const TString &title, DataSetInfo &dsi, const TString &option)
creates the method if needed based on the method name using the creator function the factory has stor...
static ClassifierFactory & Instance()
access to the ClassifierFactory singleton creates the instance if needed
TString fWeightFileDirPrefix
void SetDrawProgressBar(Bool_t d)
void SetUseColor(Bool_t uc)
class TMVA::Config::VariablePlotting fVariablePlotting
void SetConfigDescription(const char *d)
OptionBase * DeclareOptionRef(T &ref, const TString &name, const TString &desc="")
void AddPreDefVal(const T &)
void SetConfigName(const char *n)
virtual void ParseOptions()
options parser
const TString & GetOptions() const
MsgLogger * fLogger
! message logger
void CheckForUnusedOptions() const
checks for unused options in option string
void PrepareTrainingAndTestTree(const TCut &cut, const TString &splitOpt)
prepare the training and test trees -> same cuts for signal and background
DataSetInfo & GetDataSetInfo()
void AddVariable(const TString &expression, const TString &title, const TString &unit, char type='F', Double_t min=0, Double_t max=0)
user inserts discriminating variable in data set info
Class that contains all the data information.
UInt_t GetNVariables() const
virtual const char * GetName() const
Returns name of object.
const TMatrixD * CorrelationMatrix(const TString &className) const
UInt_t GetNClasses() const
const TString & GetSplitOptions() const
UInt_t GetNTargets() const
DataSet * GetDataSet() const
returns data set
TH2 * CreateCorrelationMatrixHist(const TMatrixD *m, const TString &hName, const TString &hTitle) const
std::vector< TString > GetListOfVariables() const
returns list of variables
ClassInfo * GetClassInfo(Int_t clNum) const
const TCut & GetCut(Int_t i) const
VariableInfo & GetVariableInfo(Int_t i)
Bool_t IsSignal(const Event *ev) const
DataSetManager * GetDataSetManager()
DataInputHandler & DataInput()
Class that contains all the data information.
Long64_t GetNEvtSigTest()
return number of signal test events in dataset
TTree * GetTree(Types::ETreeType type)
create the test/trainings tree with all the variables, the weights, the classes, the targets,...
const Event * GetEvent() const
returns event without transformations
Long64_t GetNEvents(Types::ETreeType type=Types::kMaxTreeType) const
Results * GetResults(const TString &, Types::ETreeType type, Types::EAnalysisType analysistype)
Long64_t GetNTrainingEvents() const
void SetCurrentType(Types::ETreeType type) const
const std::vector< Event * > & GetEventCollection(Types::ETreeType type=Types::kMaxTreeType) const
Long64_t GetNEvtBkgdTest()
return number of background test events in dataset
Float_t GetValue(UInt_t ivar) const
return value of i'th variable
static void SetIsTraining(Bool_t)
when this static function is called, it sets the flag whether events with negative event weight shoul...
This is the main MVA steering class.
void PrintHelpMessage(const TString &datasetname, const TString &methodTitle="") const
Print predefined help message of classifier.
Bool_t fCorrelations
! enable to calculate correlations
std::vector< IMethod * > MVector
void TrainAllMethods()
Iterates through all booked methods and calls training.
Bool_t Verbose(void) const
void WriteDataInformation(DataSetInfo &fDataSetInfo)
MethodBase * BookMethod(DataLoader *loader, TString theMethodName, TString methodTitle, TString theOption="")
Book a classifier or regression method.
Factory(TString theJobName, TFile *theTargetFile, TString theOption="")
Standard constructor.
void TestAllMethods()
Evaluates all booked methods on the testing data and adds the output to the Results in the corresponi...
Bool_t fVerbose
! verbose mode
void EvaluateAllMethods(void)
Iterates over all MVAs that have been booked, and calls their evaluation methods.
TH1F * EvaluateImportanceRandom(DataLoader *loader, UInt_t nseeds, Types::EMVA theMethod, TString methodTitle, const char *theOption="")
TH1F * GetImportance(const int nbits, std::vector< Double_t > importances, std::vector< TString > varNames)
Bool_t fROC
! enable to calculate ROC values
void EvaluateAllVariables(DataLoader *loader, TString options="")
Iterates over all MVA input variables and evaluates them.
TString fVerboseLevel
! verbosity level, controls granularity of logging
TMultiGraph * GetROCCurveAsMultiGraph(DataLoader *loader, UInt_t iClass, Types::ETreeType type=Types::kTesting)
Generate a collection of graphs, for all methods for a given class.
TH1F * EvaluateImportance(DataLoader *loader, VIType vitype, Types::EMVA theMethod, TString methodTitle, const char *theOption="")
Evaluate Variable Importance.
Double_t GetROCIntegral(DataLoader *loader, TString theMethodName, UInt_t iClass=0, Types::ETreeType type=Types::kTesting)
Calculate the integral of the ROC curve, also known as the area under curve (AUC),...
virtual ~Factory()
Destructor.
virtual void MakeClass(const TString &datasetname, const TString &methodTitle="") const
MethodBase * BookMethodWeightfile(DataLoader *dataloader, TMVA::Types::EMVA methodType, const TString &weightfile)
Adds an already constructed method to be managed by this factory.
Bool_t fModelPersistence
! option to save the trained model in xml file or using serialization
std::map< TString, Double_t > OptimizeAllMethods(TString fomType="ROCIntegral", TString fitType="FitGA")
Iterates through all booked methods and sees if they use parameter tuning and if so does just that,...
ROCCurve * GetROC(DataLoader *loader, TString theMethodName, UInt_t iClass=0, Types::ETreeType type=Types::kTesting)
Private method to generate a ROCCurve instance for a given method.
TH1F * EvaluateImportanceShort(DataLoader *loader, Types::EMVA theMethod, TString methodTitle, const char *theOption="")
Types::EAnalysisType fAnalysisType
! the training type
Bool_t HasMethod(const TString &datasetname, const TString &title) const
Checks whether a given method name is defined for a given dataset.
TGraph * GetROCCurve(DataLoader *loader, TString theMethodName, Bool_t setTitles=kTRUE, UInt_t iClass=0, Types::ETreeType type=Types::kTesting)
Argument iClass specifies the class to generate the ROC curve in a multiclass setting.
TH1F * EvaluateImportanceAll(DataLoader *loader, Types::EMVA theMethod, TString methodTitle, const char *theOption="")
void SetVerbose(Bool_t v=kTRUE)
TFile * fgTargetFile
! ROOT output file
IMethod * GetMethod(const TString &datasetname, const TString &title) const
Returns pointer to MVA that corresponds to given method title.
void DeleteAllMethods(void)
Delete methods.
TString fTransformations
! list of transformations to test
void Greetings()
Print welcome message.
Interface for all concrete MVA method implementations.
virtual void PrintHelpMessage() const =0
virtual Bool_t HasAnalysisType(Types::EAnalysisType type, UInt_t numberClasses, UInt_t numberTargets)=0
virtual void MakeClass(const TString &classFileName=TString("")) const =0
Virtual base Class for all MVA method.
virtual Double_t GetSeparation(TH1 *, TH1 *) const
compute "separation" defined as
void SetSilentFile(Bool_t status)
void SetWeightFileDir(TString fileDir)
set directory of weight file
virtual void TestRegression(Double_t &bias, Double_t &biasT, Double_t &dev, Double_t &devT, Double_t &rms, Double_t &rmsT, Double_t &mInf, Double_t &mInfT, Double_t &corr, Types::ETreeType type)
calculate <sum-of-deviation-squared> of regression output versus "true" value from test sample
virtual void DeclareCompatibilityOptions()
options that are used ONLY for the READER to ensure backward compatibility they are hence without any...
TString GetMethodTypeName() const
Bool_t DoMulticlass() const
virtual Double_t GetSignificance() const
compute significance of mean difference
const char * GetName() const
Types::EAnalysisType GetAnalysisType() const
virtual TMatrixD GetMulticlassConfusionMatrix(Double_t effB, Types::ETreeType type)
Construct a confusion matrix for a multiclass classifier.
void PrintHelpMessage() const
prints out method-specific help method
virtual void WriteEvaluationHistosToFile(Types::ETreeType treetype)
writes all MVA evaluation histograms to file
virtual void TestMulticlass()
test multiclass classification
void SetupMethod()
setup of methods
virtual Double_t GetEfficiency(const TString &, Types::ETreeType, Double_t &err)
fill background efficiency (resp.
virtual void SetAnalysisType(Types::EAnalysisType type)
const TString & GetMethodName() const
Bool_t DoRegression() const
void ProcessSetup()
process all options the "CheckForUnusedOptions" is done in an independent call, since it may be overr...
virtual Double_t GetTrainingEfficiency(const TString &)
DataSetInfo & DataInfo() const
virtual void MakeClass(const TString &classFileName=TString("")) const
create reader class for method (classification only at present)
virtual void TestClassification()
initialization
void AddOutput(Types::ETreeType type, Types::EAnalysisType analysisType)
void ReadStateFromFile()
Function to write options and weights to file.
virtual std::map< TString, Double_t > OptimizeTuningParameters(TString fomType="ROCIntegral", TString fitType="FitGA")
call the Optimizer with the set of parameters and ranges that are meant to be tuned.
DataSetInfo & fDataSetInfo
Types::EMVA GetMethodType() const
void SetFile(TFile *file)
void SetModelPersistence(Bool_t status)
virtual Double_t GetROCIntegral(TH1D *histS, TH1D *histB) const
calculate the area (integral) under the ROC curve as a overall quality measure of the classification
virtual void CheckSetup()
check may be overridden by derived class (sometimes, eg, fitters are used which can only be implement...
Class for boosting a TMVA method.
void SetBoostedMethodName(TString methodName)
DataSetManager * fDataSetManager
DSMTEST.
Class for categorizing the phase space.
DataSetManager * fDataSetManager
ostringstream derivative to redirect and format output
void SetMinType(EMsgType minType)
void SetSource(const std::string &source)
static void InhibitOutput()
Double_t GetEffSForEffB(Double_t effB, const UInt_t num_points=41)
Calculate the signal efficiency (sensitivity) for a given background efficiency (sensitivity).
Double_t GetROCIntegral(const UInt_t points=41)
Calculates the ROC integral (AUC)
TGraph * GetROCCurve(const UInt_t points=100)
Returns a new TGraph containing the ROC curve.
Ranking for variables in method (implementation)
virtual void Print() const
get maximum length of variable names
Class that is the base-class for a vector of result.
Class which takes the results of a multiclass classification.
Class that is the base-class for a vector of result.
Singleton class for Global types used by TMVA.
static Types & Instance()
The single instance of "Types" if existing already, or create it (Singleton)
const TString & GetLabel() const
A TMultiGraph is a collection of TGraph (or derived) objects.
TList * GetListOfGraphs() const
virtual void Add(TGraph *graph, Option_t *chopt="")
Add a new graph to the list of graphs.
TH1F * GetHistogram()
Returns a pointer to the histogram used to draw the axis.
void Draw(Option_t *chopt="") override
Draw this multigraph with its current attributes.
TAxis * GetYaxis()
Get y axis of the graph.
TAxis * GetXaxis()
Get x axis of the graph.
TObject * Clone(const char *newname="") const override
Make a clone of an object using the Streamer facility.
virtual void SetTitle(const char *title="")
Set the title of the TNamed.
const char * GetName() const override
Returns name of object.
@ kOverwrite
overwrite existing object with same name
virtual const char * GetName() const
Returns name of object.
virtual Int_t Write(const char *name=nullptr, Int_t option=0, Int_t bufsize=0)
Write this object to the current directory.
void SetGrid(Int_t valuex=1, Int_t valuey=1) override
TLegend * BuildLegend(Double_t x1=0.3, Double_t y1=0.21, Double_t x2=0.3, Double_t y2=0.21, const char *title="", Option_t *option="") override
Build a legend from the graphical objects in the pad.
Principal Components Analysis (PCA)
virtual void AddRow(const Double_t *x)
Add a data point and update the covariance matrix.
const TMatrixD * GetCovarianceMatrix() const
Return the covariance matrix.
virtual void MakePrincipals()
Perform the principal components analysis.
Random number generator class based on M.
virtual UInt_t Integer(UInt_t imax)
Returns a random integer uniformly distributed on the interval [ 0, imax-1 ].
void ToLower()
Change string to lower-case.
int CompareTo(const char *cs, ECaseCompare cmp=kExact) const
Compare a string to char *cs2.
const char * Data() const
TString & ReplaceAll(const TString &s1, const TString &s2)
Bool_t BeginsWith(const char *s, ECaseCompare cmp=kExact) const
static TString Format(const char *fmt,...)
Static method which formats a string using a printf style format descriptor and return a TString.
Bool_t Contains(const char *pat, ECaseCompare cmp=kExact) const
void SetOptStat(Int_t stat=1)
The type of information printed in the histogram statistics box can be selected via the parameter mod...
void SetTitleXOffset(Float_t offset=1)
virtual int MakeDirectory(const char *name)
Make a directory.
Int_t Write(const char *name=nullptr, Int_t option=0, Int_t bufsize=0) override
Write this object to the current directory.
void DataLoaderCopy(TMVA::DataLoader *des, TMVA::DataLoader *src)
void CreateVariableTransforms(const TString &trafoDefinition, TMVA::DataSetInfo &dataInfo, TMVA::TransformationHandler &transformationHandler, TMVA::MsgLogger &log)
MsgLogger & Endl(MsgLogger &ml)
static uint64_t sum(uint64_t i)
const Int_t MinNoTrainingEvents