116 fModelPersistence(
kTRUE)
148 DeclareOptionRef(color,
"Color",
"Flag for coloured screen output (default: True, if in batch mode: False)");
151 "List of transformations to test; formatting example: \"Transformations=I;D;P;U;G,D\", for identity, "
152 "decorrelation, PCA, Uniform and Gaussianisation followed by decorrelation transformations");
156 "Batch mode: boolean silent flag inhibiting any output from TMVA after the creation of the factory "
157 "class object (default: False)");
159 "Draw progress bar to display training, testing and evaluation schedule (default: True)");
161 "Option to save the trained model in xml file or using serialization");
165 "Set the analysis type (Classification, Regression, Multiclass, Auto) (default: Auto)");
189 if (analysisType ==
"classification")
191 else if (analysisType ==
"regression")
193 else if (analysisType ==
"multiclass")
195 else if (analysisType ==
"auto")
238 DeclareOptionRef(color,
"Color",
"Flag for coloured screen output (default: True, if in batch mode: False)");
241 "List of transformations to test; formatting example: \"Transformations=I;D;P;U;G,D\", for identity, "
242 "decorrelation, PCA, Uniform and Gaussianisation followed by decorrelation transformations");
246 "Batch mode: boolean silent flag inhibiting any output from TMVA after the creation of the factory "
247 "class object (default: False)");
249 "Draw progress bar to display training, testing and evaluation schedule (default: True)");
251 "Option to save the trained model in xml file or using serialization");
255 "Set the analysis type (Classification, Regression, Multiclass, Auto) (default: Auto)");
279 if (analysisType ==
"classification")
281 else if (analysisType ==
"regression")
283 else if (analysisType ==
"multiclass")
285 else if (analysisType ==
"auto")
308 std::vector<TMVA::VariableTransformBase *>::iterator
trfIt = fDefaultTrfs.
begin();
312 this->DeleteAllMethods();
326 std::map<TString, MVector *>::iterator
itrMap;
333 Log() << kDEBUG <<
"Delete method: " << (*itrMethod)->GetName() <<
Endl;
354 if (fModelPersistence)
360 if (
loader->GetDataSetInfo().GetNClasses() == 2 &&
loader->GetDataSetInfo().GetClassInfo(
"Signal") !=
NULL &&
361 loader->GetDataSetInfo().GetClassInfo(
"Background") !=
NULL) {
363 }
else if (
loader->GetDataSetInfo().GetNClasses() >= 2) {
366 Log() << kFATAL <<
"No analysis type for " <<
loader->GetDataSetInfo().GetNClasses() <<
" classes and "
367 <<
loader->GetDataSetInfo().GetNTargets() <<
" regression targets." <<
Endl;
373 if (fMethodsMap.find(
datasetname) != fMethodsMap.end()) {
375 Log() << kFATAL <<
"Booking failed since method with title <" << methodTitle <<
"> already exists "
376 <<
"in with DataSet Name <" <<
loader->GetName() <<
"> " <<
Endl;
380 Log() << kHEADER <<
"Booking method: " <<
gTools().
Color(
"bold")
388 conf->DeclareOptionRef(
boostNum = 0,
"Boost_num",
"Number of times the classifier will be boosted");
389 conf->ParseOptions();
393 if (fModelPersistence) {
410 Log() << kDEBUG <<
"Boost Number is " <<
boostNum <<
" > 0: train boosted classifier" <<
Endl;
414 Log() << kFATAL <<
"Method with type kBoost cannot be casted to MethodCategory. /Factory" <<
Endl;
417 if (fModelPersistence)
419 methBoost->SetModelPersistence(fModelPersistence);
421 methBoost->fDataSetManager =
loader->GetDataSetInfo().GetDataSetManager();
423 methBoost->SetSilentFile(IsSilentFile());
434 Log() << kFATAL <<
"Method with type kCategory cannot be casted to MethodCategory. /Factory"
438 if (fModelPersistence)
440 methCat->SetModelPersistence(fModelPersistence);
441 methCat->fDataSetManager =
loader->GetDataSetInfo().GetDataSetManager();
442 methCat->SetFile(fgTargetFile);
443 methCat->SetSilentFile(IsSilentFile());
446 if (!
method->HasAnalysisType(fAnalysisType,
loader->GetDataSetInfo().GetNClasses(),
447 loader->GetDataSetInfo().GetNTargets())) {
448 Log() << kWARNING <<
"Method " <<
method->GetMethodTypeName() <<
" is not capable of handling ";
450 Log() <<
"regression with " <<
loader->GetDataSetInfo().GetNTargets() <<
" targets." <<
Endl;
452 Log() <<
"multiclass classification with " <<
loader->GetDataSetInfo().GetNClasses() <<
" classes." <<
Endl;
454 Log() <<
"classification with " <<
loader->GetDataSetInfo().GetNClasses() <<
" classes." <<
Endl;
459 if (fModelPersistence)
461 method->SetModelPersistence(fModelPersistence);
462 method->SetAnalysisType(fAnalysisType);
466 method->SetFile(fgTargetFile);
467 method->SetSilentFile(IsSilentFile());
472 if (fMethodsMap.find(
datasetname) == fMethodsMap.end()) {
514 Log() << kERROR <<
"Cannot handle category methods for now." <<
Endl;
518 if (fModelPersistence) {
529 if (fModelPersistence)
531 method->SetModelPersistence(fModelPersistence);
532 method->SetAnalysisType(fAnalysisType);
534 method->SetFile(fgTargetFile);
535 method->SetSilentFile(IsSilentFile());
537 method->DeclareCompatibilityOptions();
540 method->ReadStateFromFile();
546 Log() << kFATAL <<
"Booking failed since method with title <" << methodTitle <<
"> already exists "
547 <<
"in with DataSet Name <" <<
loader->GetName() <<
"> " <<
Endl;
550 Log() << kINFO <<
"Booked classifier \"" <<
method->GetMethodName() <<
"\" of type: \""
551 <<
method->GetMethodTypeName() <<
"\"" <<
Endl;
568 if (fMethodsMap.find(
datasetname) == fMethodsMap.end())
577 if ((
mva->GetMethodName()) == methodTitle)
588 if (fMethodsMap.find(
datasetname) == fMethodsMap.end())
591 std::string methodName = methodTitle.
Data();
606 if (!RootBaseDir()->GetDirectory(fDataSetInfo.
GetName()))
607 RootBaseDir()->mkdir(fDataSetInfo.
GetName());
611 RootBaseDir()->cd(fDataSetInfo.
GetName());
660 std::vector<TMVA::TransformationHandler *>
trfs;
670 Log() << kDEBUG <<
"current transformation string: '" <<
trfS.Data() <<
"'" <<
Endl;
673 if (
trfS.BeginsWith(
'I'))
680 std::vector<TMVA::TransformationHandler *>::iterator
trfIt =
trfs.
begin();
684 (*trfIt)->SetRootDir(RootBaseDir()->GetDirectory(fDataSetInfo.
GetName()));
704 std::map<TString, MVector *>::iterator
itrMap;
716 Log() << kFATAL <<
"Dynamic cast to MethodBase failed" <<
Endl;
721 Log() << kWARNING <<
"Method " <<
mva->GetMethodName() <<
" not trained (training tree has less entries ["
726 Log() << kINFO <<
"Optimize method: " <<
mva->GetMethodName() <<
" for "
729 : (fAnalysisType ==
Types::kMulticlass ?
"Multiclass classification" :
"Classification"))
733 Log() << kINFO <<
"Optimization of tuning parameters finished for Method:" <<
mva->GetName() <<
Endl;
764 if (fMethodsMap.find(
datasetname) == fMethodsMap.end()) {
765 Log() << kERROR <<
Form(
"DataSet = %s not found in methods map.",
datasetname.Data()) <<
Endl;
777 Log() << kERROR <<
Form(
"Can only generate ROC curves for analysis type kClassification and kMulticlass.")
784 dataset->SetCurrentType(
type);
790 <<
Form(
"Given class number (iClass = %i) does not exist. There are %i classes in dataset.",
iClass,
812 std::vector<Float_t>
mvaRes;
864 if (fMethodsMap.find(
datasetname) == fMethodsMap.end()) {
865 Log() << kERROR <<
Form(
"DataSet = %s not found in methods map.",
datasetname.Data()) <<
Endl;
877 Log() << kERROR <<
Form(
"Can only generate ROC integral for analysis type kClassification. and kMulticlass.")
885 <<
Form(
"ROCCurve object was not created in Method = %s not found with Dataset = %s ",
theMethodName.Data(),
935 if (fMethodsMap.find(
datasetname) == fMethodsMap.end()) {
936 Log() << kERROR <<
Form(
"DataSet = %s not found in methods map.",
datasetname.Data()) <<
Endl;
948 Log() << kERROR <<
Form(
"Can only generate ROC curves for analysis type kClassification and kMulticlass.")
958 <<
Form(
"ROCCurve object was not created in Method = %s not found with Dataset = %s ",
theMethodName.Data(),
968 graph->GetYaxis()->SetTitle(
"Background rejection (Specificity)");
969 graph->GetXaxis()->SetTitle(
"Signal efficiency (Sensitivity)");
1023 <<
Form(
"Given class number (iClass = %i) does not exist. There are %i classes in dataset.",
iClass,
1032 graph->SetTitle(methodName);
1034 graph->SetLineWidth(2);
1036 graph->SetFillColor(10);
1041 if (
multigraph->GetListOfGraphs() ==
nullptr) {
1042 Log() << kERROR <<
Form(
"No metohds have class %i defined.",
iClass) <<
Endl;
1079 if (fMethodsMap.find(
datasetname) == fMethodsMap.end()) {
1080 Log() << kERROR <<
Form(
"DataSet = %s not found in methods map.",
datasetname.Data()) <<
Endl;
1093 multigraph->GetYaxis()->SetTitle(
"Background rejection (Specificity)");
1094 multigraph->GetXaxis()->SetTitle(
"Signal efficiency (Sensitivity)");
1105 canvas->
BuildLegend(0.15, 0.15, 0.35, 0.3,
"MVA Method");
1120 if (fMethodsMap.empty()) {
1121 Log() << kINFO <<
"...nothing found to train" <<
Endl;
1127 Log() << kDEBUG <<
"Train all methods for "
1133 std::map<TString, MVector *>::iterator
itrMap;
1147 if (
mva->DataInfo().GetDataSetManager()->DataInput().GetEntries() <=
1149 Log() << kFATAL <<
"No input data for the training provided!" <<
Endl;
1153 Log() << kFATAL <<
"You want to do regression training without specifying a target." <<
Endl;
1155 mva->DataInfo().GetNClasses() < 2)
1156 Log() << kFATAL <<
"You want to do classification training, but specified less than two classes." <<
Endl;
1159 if (!IsSilentFile())
1160 WriteDataInformation(
mva->fDataSetInfo);
1163 Log() << kWARNING <<
"Method " <<
mva->GetMethodName() <<
" not trained (training tree has less entries ["
1168 Log() << kHEADER <<
"Train method: " <<
mva->GetMethodName() <<
" for "
1171 : (fAnalysisType ==
Types::kMulticlass ?
"Multiclass classification" :
"Classification"))
1174 Log() << kHEADER <<
"Training finished" <<
Endl <<
Endl;
1181 Log() << kINFO <<
"Ranking input variables (method specific)..." <<
Endl;
1191 Log() << kINFO <<
"No variable ranking supplied by classifier: "
1198 if (!IsSilentFile()) {
1204 m->fTrainHistory.SaveHistory(
m->GetMethodName());
1212 if (fModelPersistence) {
1214 Log() << kHEADER <<
"=== Destroy and recreate all methods via weight files for testing ===" <<
Endl <<
Endl;
1216 if (!IsSilentFile())
1217 RootBaseDir()->cd();
1243 Log() << kFATAL <<
"Method with type kCategory cannot be casted to MethodCategory. /Factory" <<
Endl;
1245 methCat->fDataSetManager =
m->DataInfo().GetDataSetManager();
1252 m->SetModelPersistence(fModelPersistence);
1253 m->SetSilentFile(IsSilentFile());
1254 m->SetAnalysisType(fAnalysisType);
1256 m->ReadStateFromFile();
1276 if (fMethodsMap.empty()) {
1277 Log() << kINFO <<
"...nothing found to test" <<
Endl;
1280 std::map<TString, MVector *>::iterator
itrMap;
1293 Log() << kHEADER <<
"Test method: " <<
mva->GetMethodName() <<
" for "
1296 : (analysisType ==
Types::kMulticlass ?
"Multiclass classification" :
"Classification"))
1307 if (methodTitle !=
"") {
1312 Log() << kWARNING <<
"<MakeClass> Could not find classifier \"" << methodTitle <<
"\" in list" <<
Endl;
1323 Log() << kINFO <<
"Make response class for classifier: " <<
method->GetMethodName() <<
Endl;
1335 if (methodTitle !=
"") {
1338 method->PrintHelpMessage();
1340 Log() << kWARNING <<
"<PrintHelpMessage> Could not find classifier \"" << methodTitle <<
"\" in list" <<
Endl;
1351 Log() << kINFO <<
"Print help message for classifier: " <<
method->GetMethodName() <<
Endl;
1352 method->PrintHelpMessage();
1362 Log() << kINFO <<
"Evaluating all variables..." <<
Endl;
1365 for (
UInt_t i = 0; i <
loader->GetDataSetInfo().GetNVariables(); i++) {
1366 TString s =
loader->GetDataSetInfo().GetVariableInfo(i).GetLabel();
1369 this->BookMethod(
loader,
"Variable", s);
1381 if (fMethodsMap.empty()) {
1382 Log() << kINFO <<
"...nothing found to evaluate" <<
Endl;
1385 std::map<TString, MVector *>::iterator
itrMap;
1400 std::vector<std::vector<TString>>
mname(2);
1401 std::vector<std::vector<Double_t>> sig(2), sep(2),
roc(2);
1421 std::vector<std::vector<Double_t>>
biastrain(1);
1422 std::vector<std::vector<Double_t>>
biastest(1);
1423 std::vector<std::vector<Double_t>>
devtrain(1);
1424 std::vector<std::vector<Double_t>>
devtest(1);
1425 std::vector<std::vector<Double_t>>
rmstrain(1);
1426 std::vector<std::vector<Double_t>>
rmstest(1);
1427 std::vector<std::vector<Double_t>>
minftrain(1);
1428 std::vector<std::vector<Double_t>>
minftest(1);
1429 std::vector<std::vector<Double_t>>
rhotrain(1);
1430 std::vector<std::vector<Double_t>>
rhotest(1);
1433 std::vector<std::vector<Double_t>>
biastrainT(1);
1434 std::vector<std::vector<Double_t>>
biastestT(1);
1435 std::vector<std::vector<Double_t>>
devtrainT(1);
1436 std::vector<std::vector<Double_t>>
devtestT(1);
1437 std::vector<std::vector<Double_t>>
rmstrainT(1);
1438 std::vector<std::vector<Double_t>>
rmstestT(1);
1439 std::vector<std::vector<Double_t>>
minftrainT(1);
1440 std::vector<std::vector<Double_t>>
minftestT(1);
1455 theMethod->SetSilentFile(IsSilentFile());
1462 Log() << kINFO <<
"Evaluate regression method: " <<
theMethod->GetMethodName() <<
Endl;
1467 Log() << kINFO <<
"TestRegression (testing)" <<
Endl;
1479 Log() << kINFO <<
"TestRegression (training)" <<
Endl;
1493 if (!IsSilentFile()) {
1494 Log() << kDEBUG <<
"\tWrite evaluation histograms to file" <<
Endl;
1503 Log() << kINFO <<
"Evaluate multiclass classification method: " <<
theMethod->GetMethodName() <<
Endl;
1522 if (!IsSilentFile()) {
1523 Log() << kDEBUG <<
"\tWrite evaluation histograms to file" <<
Endl;
1532 Log() << kHEADER <<
"Evaluate classifier: " <<
theMethod->GetMethodName() <<
Endl <<
Endl;
1533 isel = (
theMethod->GetMethodTypeName().Contains(
"Variable")) ? 1 : 0;
1554 theMethod->GetTrainingEfficiency(
"Efficiency:0.01"));
1560 if (!IsSilentFile()) {
1561 Log() << kDEBUG <<
"\tWrite evaluation histograms to file" <<
Endl;
1570 std::vector<std::vector<Double_t>>
vtmp;
1616 for (
Int_t k = 0; k < 2; k++) {
1617 std::vector<std::vector<Double_t>>
vtemp;
1628 vtemp.push_back(sig[k]);
1629 vtemp.push_back(sep[k]);
1656 if (fCorrelations) {
1659 const Int_t nvar =
method->fDataSetInfo.GetNVariables();
1666 std::vector<Double_t>
rvec;
1674 std::vector<TString> *
theVars =
new std::vector<TString>;
1675 std::vector<ResultsClassification *>
mvaRes;
1681 theVars->push_back(
m->GetTestvarName());
1682 rvec.push_back(
m->GetSignalReferenceCut());
1683 theVars->back().ReplaceAll(
"MVA_",
"");
1706 Log() << kWARNING <<
"Found NaN return value in event: " <<
ievt <<
" for method \""
1714 if (
method->fDataSetInfo.IsSignal(
ev)) {
1726 (*theMat)(
im,
jm)++;
1728 (*theMat)(
jm,
im)++;
1735 (*overlapS) *= (1.0 /
defDs->GetNEvtSigTest());
1736 (*overlapB) *= (1.0 /
defDs->GetNEvtBkgdTest());
1738 tpSig->MakePrincipals();
1739 tpBkg->MakePrincipals();
1773 Log() << kINFO <<
Endl;
1774 Log() << kINFO <<
Form(
"Dataset[%s] : ",
method->fDataSetInfo.GetName())
1775 <<
"Inter-MVA correlation matrix (signal):" <<
Endl;
1777 Log() << kINFO <<
Endl;
1779 Log() << kINFO <<
Form(
"Dataset[%s] : ",
method->fDataSetInfo.GetName())
1780 <<
"Inter-MVA correlation matrix (background):" <<
Endl;
1782 Log() << kINFO <<
Endl;
1785 Log() << kINFO <<
Form(
"Dataset[%s] : ",
method->fDataSetInfo.GetName())
1786 <<
"Correlations between input variables and MVA response (signal):" <<
Endl;
1788 Log() << kINFO <<
Endl;
1790 Log() << kINFO <<
Form(
"Dataset[%s] : ",
method->fDataSetInfo.GetName())
1791 <<
"Correlations between input variables and MVA response (background):" <<
Endl;
1793 Log() << kINFO <<
Endl;
1795 Log() << kWARNING <<
Form(
"Dataset[%s] : ",
method->fDataSetInfo.GetName())
1796 <<
"<TestAllMethods> cannot compute correlation matrices" <<
Endl;
1799 Log() << kINFO <<
Form(
"Dataset[%s] : ",
method->fDataSetInfo.GetName())
1800 <<
"The following \"overlap\" matrices contain the fraction of events for which " <<
Endl;
1801 Log() << kINFO <<
Form(
"Dataset[%s] : ",
method->fDataSetInfo.GetName())
1802 <<
"the MVAs 'i' and 'j' have returned conform answers about \"signal-likeness\"" <<
Endl;
1803 Log() << kINFO <<
Form(
"Dataset[%s] : ",
method->fDataSetInfo.GetName())
1804 <<
"An event is signal-like, if its MVA output exceeds the following value:" <<
Endl;
1806 Log() << kINFO <<
Form(
"Dataset[%s] : ",
method->fDataSetInfo.GetName())
1807 <<
"which correspond to the working point: eff(signal) = 1 - eff(background)" <<
Endl;
1811 Log() << kINFO <<
Form(
"Dataset[%s] : ",
method->fDataSetInfo.GetName())
1812 <<
"Note: no correlations and overlap with cut method are provided at present" <<
Endl;
1815 Log() << kINFO <<
Endl;
1816 Log() << kINFO <<
Form(
"Dataset[%s] : ",
method->fDataSetInfo.GetName())
1817 <<
"Inter-MVA overlap matrix (signal):" <<
Endl;
1819 Log() << kINFO <<
Endl;
1821 Log() << kINFO <<
Form(
"Dataset[%s] : ",
method->fDataSetInfo.GetName())
1822 <<
"Inter-MVA overlap matrix (background):" <<
Endl;
1845 Log() << kINFO <<
Endl;
1847 "--------------------------------------------------------------------------------------------------";
1848 Log() << kINFO <<
"Evaluation results ranked by smallest RMS on test sample:" <<
Endl;
1849 Log() << kINFO <<
"(\"Bias\" quotes the mean deviation of the regression from true target." <<
Endl;
1850 Log() << kINFO <<
" \"MutInf\" is the \"Mutual Information\" between regression and target." <<
Endl;
1851 Log() << kINFO <<
" Indicated by \"_T\" are the corresponding \"truncated\" quantities ob-" <<
Endl;
1852 Log() << kINFO <<
" tained when removing events deviating more than 2sigma from average.)" <<
Endl;
1864 <<
Form(
"%-20s %-15s:%#9.3g%#9.3g%#9.3g%#9.3g | %#5.3f %#5.3f",
theMethod->fDataSetInfo.GetName(),
1870 Log() << kINFO <<
Endl;
1871 Log() << kINFO <<
"Evaluation results ranked by smallest RMS on training sample:" <<
Endl;
1872 Log() << kINFO <<
"(overtraining check)" <<
Endl;
1875 <<
"DataSet Name: MVA Method: <Bias> <Bias_T> RMS RMS_T | MutInf MutInf_T"
1884 <<
Form(
"%-20s %-15s:%#9.3g%#9.3g%#9.3g%#9.3g | %#5.3f %#5.3f",
theMethod->fDataSetInfo.GetName(),
1890 Log() << kINFO <<
Endl;
1897 "-------------------------------------------------------------------------------------------------------";
1937 "Sig eff@B=0.10",
"Sig eff@B=0.30");
1939 "test (train)",
"test (train)");
1940 Log() << kINFO <<
Endl;
1941 Log() << kINFO <<
"1-vs-rest performance metrics per class" <<
Endl;
1943 Log() << kINFO <<
Endl;
1944 Log() << kINFO <<
"Considers the listed class as signal and the other classes" <<
Endl;
1945 Log() << kINFO <<
"as background, reporting the resulting binary performance." <<
Endl;
1946 Log() << kINFO <<
"A score of 0.820 (0.850) means 0.820 was acheived on the" <<
Endl;
1947 Log() << kINFO <<
"test set and 0.850 on the training set." <<
Endl;
1949 Log() << kINFO <<
Endl;
1952 for (
Int_t k = 0; k < 2; k++) {
1955 mname[k][i].ReplaceAll(
"Variable_",
"");
1966 Log() << kINFO <<
Endl;
1968 Log() << kINFO << row <<
Endl;
1969 Log() << kINFO <<
"------------------------------" <<
Endl;
1992 Log() << kINFO << row <<
Endl;
1999 Log() << kINFO <<
Endl;
2001 Log() << kINFO <<
Endl;
2019 stream << kINFO << header <<
Endl;
2035 stream << kINFO <<
Endl;
2039 Log() << kINFO <<
Endl;
2040 Log() << kINFO <<
"Confusion matrices for all methods" <<
Endl;
2042 Log() << kINFO <<
Endl;
2043 Log() << kINFO <<
"Does a binary comparison between the two classes given by a " <<
Endl;
2044 Log() << kINFO <<
"particular row-column combination. In each case, the class " <<
Endl;
2045 Log() << kINFO <<
"given by the row is considered signal while the class given " <<
Endl;
2046 Log() << kINFO <<
"by the column index is considered background." <<
Endl;
2047 Log() << kINFO <<
Endl;
2060 <<
"=== Showing confusion matrix for method : " <<
Form(
"%-15s", (
const char *)
mname[0][
iMethod])
2062 Log() << kINFO <<
"(Signal Efficiency for Background Efficiency 0.01%)" <<
Endl;
2063 Log() << kINFO <<
"---------------------------------------------------" <<
Endl;
2066 Log() << kINFO <<
Endl;
2068 Log() << kINFO <<
"(Signal Efficiency for Background Efficiency 0.10%)" <<
Endl;
2069 Log() << kINFO <<
"---------------------------------------------------" <<
Endl;
2072 Log() << kINFO <<
Endl;
2074 Log() << kINFO <<
"(Signal Efficiency for Background Efficiency 0.30%)" <<
Endl;
2075 Log() << kINFO <<
"---------------------------------------------------" <<
Endl;
2078 Log() << kINFO <<
Endl;
2081 Log() << kINFO <<
Endl;
2086 Log().EnableOutput();
2089 TString hLine =
"------------------------------------------------------------------------------------------"
2090 "-------------------------";
2091 Log() << kINFO <<
"Evaluation results ranked by best signal efficiency and purity (area)" <<
Endl;
2093 Log() << kINFO <<
"DataSet MVA " <<
Endl;
2094 Log() << kINFO <<
"Name: Method: ROC-integ" <<
Endl;
2100 for (
Int_t k = 0; k < 2; k++) {
2103 Log() << kINFO <<
"Input Variables: " <<
Endl <<
hLine <<
Endl;
2128 if (sep[k][i] < 0 || sig[k][i] < 0) {
2157 Log() << kINFO <<
Endl;
2158 Log() << kINFO <<
"Testing efficiency compared to training efficiency (overtraining check)" <<
Endl;
2161 <<
"DataSet MVA Signal efficiency: from test sample (from training sample) "
2163 Log() << kINFO <<
"Name: Method: @B=0.01 @B=0.10 @B=0.30 "
2166 for (
Int_t k = 0; k < 2; k++) {
2169 Log() << kINFO <<
"Input Variables: " <<
Endl <<
hLine <<
Endl;
2173 mname[k][i].ReplaceAll(
"Variable_",
"");
2179 <<
Form(
"%-20s %-15s: %#1.3f (%#1.3f) %#1.3f (%#1.3f) %#1.3f (%#1.3f)",
2186 Log() << kINFO <<
Endl;
2188 if (
gTools().CheckForSilentOption(GetOptions()))
2189 Log().InhibitOutput();
2192 if (!IsSilentFile()) {
2193 std::list<TString> datasets;
2194 for (
Int_t k = 0; k < 2; k++) {
2200 RootBaseDir()->cd(
theMethod->fDataSetInfo.GetName());
2201 if (std::find(datasets.begin(), datasets.end(),
theMethod->fDataSetInfo.GetName()) == datasets.end()) {
2204 datasets.push_back(
theMethod->fDataSetInfo.GetName());
2220 fModelPersistence =
kFALSE;
2221 fSilentFile =
kTRUE;
2224 const int nbits =
loader->GetDataSetInfo().GetNVariables();
2225 if (
vitype == VIType::kShort)
2227 else if (
vitype == VIType::kAll)
2229 else if (
vitype == VIType::kRandom) {
2233 }
else if (
nbits < 10) {
2234 Log() << kERROR <<
"Error in Variable Importance: Random mode require more that 10 variables in the dataset."
2236 }
else if (
nbits > 30) {
2237 Log() << kERROR <<
"Error in Variable Importance: Number of variables is too large for Random mode"
2254 const int nbits =
loader->GetDataSetInfo().GetNVariables();
2255 std::vector<TString>
varNames =
loader->GetDataSetInfo().GetListOfVariables();
2258 Log() << kERROR <<
"Number of combinations is too large , is 2^" <<
nbits <<
Endl;
2262 Log() << kWARNING <<
"Number of combinations is very large , is 2^" <<
nbits <<
Endl;
2264 uint64_t
range =
static_cast<uint64_t
>(pow(2,
nbits));
2272 for (
int i = 0; i <
nbits; i++)
2292 seedloader->PrepareTrainingAndTestTree(
loader->GetDataSetInfo().GetCut(
"Signal"),
2293 loader->GetDataSetInfo().GetCut(
"Background"),
2294 loader->GetDataSetInfo().GetSplitOptions());
2302 EvaluateAllMethods();
2305 ROC[
x] = GetROCIntegral(
xbitset.to_string(), methodTitle);
2313 this->DeleteAllMethods();
2315 fMethodsMap.clear();
2321 for (uint32_t i = 0; i <
VIBITS; ++i) {
2322 if (
x & (uint64_t(1) << i)) {
2328 uint32_t
ny =
static_cast<uint32_t
>( log(
x -
y) / 0.693147 ) ;
2341 std::cout <<
"--- Variable Importance Results (All)" << std::endl;
2345static uint64_t
sum(uint64_t i)
2348 if (i > 62)
return 0;
2349 return static_cast<uint64_t
>( std::pow(2, i + 1)) - 1;
2365 const int nbits =
loader->GetDataSetInfo().GetNVariables();
2366 std::vector<TString>
varNames =
loader->GetDataSetInfo().GetListOfVariables();
2369 Log() << kERROR <<
"Number of combinations is too large , is 2^" <<
nbits <<
Endl;
2376 for (
int i = 0; i <
nbits; i++)
2385 Log() << kFATAL <<
"Error: need at least one variable.";
2405 EvaluateAllMethods();
2408 SROC = GetROCIntegral(
xbitset.to_string(), methodTitle);
2416 this->DeleteAllMethods();
2417 fMethodsMap.clear();
2421 for (uint32_t i = 0; i <
VIBITS; ++i) {
2423 y =
x & ~(uint64_t(1) << i);
2428 uint32_t
ny =
static_cast<uint32_t
>(log(
x -
y) / 0.693147);
2451 EvaluateAllMethods();
2454 SSROC = GetROCIntegral(
ybitset.to_string(), methodTitle);
2463 this->DeleteAllMethods();
2464 fMethodsMap.clear();
2467 std::cout <<
"--- Variable Importance Results (Short)" << std::endl;
2482 const int nbits =
loader->GetDataSetInfo().GetNVariables();
2483 std::vector<TString>
varNames =
loader->GetDataSetInfo().GetListOfVariables();
2489 for (
int i = 0; i <
nbits; i++)
2518 EvaluateAllMethods();
2521 SROC = GetROCIntegral(
xbitset.to_string(), methodTitle);
2530 this->DeleteAllMethods();
2531 fMethodsMap.clear();
2535 for (uint32_t i = 0; i < 32; ++i) {
2536 if (
x & (uint64_t(1) << i)) {
2566 EvaluateAllMethods();
2569 SSROC = GetROCIntegral(
ybitset.to_string(), methodTitle);
2580 this->DeleteAllMethods();
2581 fMethodsMap.clear();
2585 std::cout <<
"--- Variable Importance Results (Random)" << std::endl;
2598 for (
int i = 0; i <
nbits; i++) {
2609 x_ie[i - 1] = (i - 1) * 1.;
2612 std::cout <<
"--- " <<
varNames[i - 1] <<
" = " <<
roc <<
" %" << std::endl;
2613 vih1->GetXaxis()->SetBinLabel(i,
varNames[i - 1].Data());
2619 vih1->LabelsOption(
"v >",
"X");
2620 vih1->SetBarWidth(0.97);
2625 vih1->GetYaxis()->SetTitle(
"Importance (%)");
2626 vih1->GetYaxis()->SetTitleSize(0.045);
2627 vih1->GetYaxis()->CenterTitle();
2628 vih1->GetYaxis()->SetTitleOffset(1.24);
2630 vih1->GetYaxis()->SetRangeUser(-7, 50);
2631 vih1->SetDirectory(
nullptr);
#define MinNoTrainingEvents
void printMatrix(const TMatrixD &mat)
write a matrix
ROOT::Detail::TRangeCast< T, true > TRangeDynCast
TRangeDynCast is an adapter class that allows the typed iteration through a TCollection.
Option_t Option_t TPoint TPoint const char GetTextMagnitude GetFillStyle GetLineColor GetLineWidth GetMarkerStyle GetTextAlign GetTextColor GetTextSize void char Point_t Rectangle_t WindowAttributes_t index
Option_t Option_t TPoint TPoint const char GetTextMagnitude GetFillStyle GetLineColor GetLineWidth GetMarkerStyle GetTextAlign GetTextColor GetTextSize void char Point_t Rectangle_t WindowAttributes_t Float_t Float_t Float_t Int_t Int_t UInt_t UInt_t Rectangle_t Int_t Int_t Window_t TString Int_t GCValues_t GetPrimarySelectionOwner GetDisplay GetScreen GetColormap GetNativeEvent const char const char dpyName wid window const char font_name cursor keysym reg const char only_if_exist regb h Point_t winding char text const char depth char const char Int_t count const char ColorStruct_t color const char Pixmap_t Pixmap_t PictureAttributes_t attr const char char ret_data h unsigned char height h Atom_t Int_t ULong_t ULong_t unsigned char prop_list Atom_t Atom_t Atom_t Time_t type
TMatrixT< Double_t > TMatrixD
char * Form(const char *fmt,...)
Formats a string in a circular formatting buffer.
R__EXTERN TStyle * gStyle
R__EXTERN TSystem * gSystem
const_iterator begin() const
const_iterator end() const
static Int_t GetColor(const char *hexcolor)
Static method returning color number for color specified by hex color string of form: "#rrggbb",...
A ROOT file is an on-disk file, usually with extension .root, that stores objects in a file-system-li...
A TGraph is an object made of two arrays X and Y with npoints each.
1-D histogram with a float per channel (see TH1 documentation)
static void AddDirectory(Bool_t add=kTRUE)
Sets the flag controlling the automatic add of histograms in memory.
Service class for 2-D histogram classes.
static ClassifierFactory & Instance()
access to the ClassifierFactory singleton creates the instance if needed
TString fWeightFileDirPrefix
void SetDrawProgressBar(Bool_t d)
void SetUseColor(Bool_t uc)
class TMVA::Config::VariablePlotting fVariablePlotting
void SetConfigDescription(const char *d)
OptionBase * DeclareOptionRef(T &ref, const TString &name, const TString &desc="")
void AddPreDefVal(const T &)
void SetConfigName(const char *n)
virtual void ParseOptions()
options parser
const TString & GetOptions() const
MsgLogger * fLogger
! message logger
void CheckForUnusedOptions() const
checks for unused options in option string
Class that contains all the data information.
virtual const char * GetName() const
Returns name of object.
const TMatrixD * CorrelationMatrix(const TString &className) const
UInt_t GetNClasses() const
DataSet * GetDataSet() const
returns data set
TH2 * CreateCorrelationMatrixHist(const TMatrixD *m, const TString &hName, const TString &hTitle) const
ClassInfo * GetClassInfo(Int_t clNum) const
Class that contains all the data information.
const std::vector< Event * > & GetEventCollection(Types::ETreeType type=Types::kMaxTreeType) const
static void SetIsTraining(Bool_t)
when this static function is called, it sets the flag whether events with negative event weight shoul...
This is the main MVA steering class.
void PrintHelpMessage(const TString &datasetname, const TString &methodTitle="") const
Print predefined help message of classifier.
Bool_t fCorrelations
! enable to calculate correlations
std::vector< IMethod * > MVector
void TrainAllMethods()
Iterates through all booked methods and calls training.
Bool_t Verbose(void) const
void WriteDataInformation(DataSetInfo &fDataSetInfo)
MethodBase * BookMethod(DataLoader *loader, TString theMethodName, TString methodTitle, TString theOption="")
Book a classifier or regression method.
Factory(TString theJobName, TFile *theTargetFile, TString theOption="")
Standard constructor.
void TestAllMethods()
Evaluates all booked methods on the testing data and adds the output to the Results in the corresponi...
Bool_t fVerbose
! verbose mode
void EvaluateAllMethods(void)
Iterates over all MVAs that have been booked, and calls their evaluation methods.
TH1F * EvaluateImportanceRandom(DataLoader *loader, UInt_t nseeds, Types::EMVA theMethod, TString methodTitle, const char *theOption="")
TH1F * GetImportance(const int nbits, std::vector< Double_t > importances, std::vector< TString > varNames)
Bool_t fROC
! enable to calculate ROC values
void EvaluateAllVariables(DataLoader *loader, TString options="")
Iterates over all MVA input variables and evaluates them.
TString fVerboseLevel
! verbosity level, controls granularity of logging
TMultiGraph * GetROCCurveAsMultiGraph(DataLoader *loader, UInt_t iClass, Types::ETreeType type=Types::kTesting)
Generate a collection of graphs, for all methods for a given class.
TH1F * EvaluateImportance(DataLoader *loader, VIType vitype, Types::EMVA theMethod, TString methodTitle, const char *theOption="")
Evaluate Variable Importance.
Double_t GetROCIntegral(DataLoader *loader, TString theMethodName, UInt_t iClass=0, Types::ETreeType type=Types::kTesting)
Calculate the integral of the ROC curve, also known as the area under curve (AUC),...
virtual ~Factory()
Destructor.
virtual void MakeClass(const TString &datasetname, const TString &methodTitle="") const
MethodBase * BookMethodWeightfile(DataLoader *dataloader, TMVA::Types::EMVA methodType, const TString &weightfile)
Adds an already constructed method to be managed by this factory.
Bool_t fModelPersistence
! option to save the trained model in xml file or using serialization
std::map< TString, Double_t > OptimizeAllMethods(TString fomType="ROCIntegral", TString fitType="FitGA")
Iterates through all booked methods and sees if they use parameter tuning and if so does just that,...
ROCCurve * GetROC(DataLoader *loader, TString theMethodName, UInt_t iClass=0, Types::ETreeType type=Types::kTesting)
Private method to generate a ROCCurve instance for a given method.
TH1F * EvaluateImportanceShort(DataLoader *loader, Types::EMVA theMethod, TString methodTitle, const char *theOption="")
Types::EAnalysisType fAnalysisType
! the training type
Bool_t HasMethod(const TString &datasetname, const TString &title) const
Checks whether a given method name is defined for a given dataset.
TGraph * GetROCCurve(DataLoader *loader, TString theMethodName, Bool_t setTitles=kTRUE, UInt_t iClass=0, Types::ETreeType type=Types::kTesting)
Argument iClass specifies the class to generate the ROC curve in a multiclass setting.
TH1F * EvaluateImportanceAll(DataLoader *loader, Types::EMVA theMethod, TString methodTitle, const char *theOption="")
void SetVerbose(Bool_t v=kTRUE)
TFile * fgTargetFile
! ROOT output file
IMethod * GetMethod(const TString &datasetname, const TString &title) const
Returns pointer to MVA that corresponds to given method title.
void DeleteAllMethods(void)
Delete methods.
TString fTransformations
! list of transformations to test
void Greetings()
Print welcome message.
Interface for all concrete MVA method implementations.
Virtual base Class for all MVA method.
const TString & GetMethodName() const
Class for boosting a TMVA method.
Class for categorizing the phase space.
ostringstream derivative to redirect and format output
void SetMinType(EMsgType minType)
void SetSource(const std::string &source)
static void InhibitOutput()
Ranking for variables in method (implementation)
Class that is the base-class for a vector of result.
Class which takes the results of a multiclass classification.
Class that is the base-class for a vector of result.
Singleton class for Global types used by TMVA.
static Types & Instance()
The single instance of "Types" if existing already, or create it (Singleton)
A TMultiGraph is a collection of TGraph (or derived) objects.
const char * GetName() const override
Returns name of object.
@ kOverwrite
overwrite existing object with same name
virtual const char * GetName() const
Returns name of object.
virtual Int_t Write(const char *name=nullptr, Int_t option=0, Int_t bufsize=0)
Write this object to the current directory.
void SetGrid(Int_t valuex=1, Int_t valuey=1) override
TLegend * BuildLegend(Double_t x1=0.3, Double_t y1=0.21, Double_t x2=0.3, Double_t y2=0.21, const char *title="", Option_t *option="") override
Build a legend from the graphical objects in the pad.
Principal Components Analysis (PCA)
Random number generator class based on M.
void ToLower()
Change string to lower-case.
int CompareTo(const char *cs, ECaseCompare cmp=kExact) const
Compare a string to char *cs2.
const char * Data() const
TString & ReplaceAll(const TString &s1, const TString &s2)
static TString Format(const char *fmt,...)
Static method which formats a string using a printf style format descriptor and return a TString.
Bool_t Contains(const char *pat, ECaseCompare cmp=kExact) const
void SetOptStat(Int_t stat=1)
The type of information printed in the histogram statistics box can be selected via the parameter mod...
void SetTitleXOffset(Float_t offset=1)
virtual int MakeDirectory(const char *name)
Make a directory.
void DataLoaderCopy(TMVA::DataLoader *des, TMVA::DataLoader *src)
void CreateVariableTransforms(const TString &trafoDefinition, TMVA::DataSetInfo &dataInfo, TMVA::TransformationHandler &transformationHandler, TMVA::MsgLogger &log)
MsgLogger & Endl(MsgLogger &ml)
static uint64_t sum(uint64_t i)
const Int_t MinNoTrainingEvents