121 fTransformations (
"I" ),
123 fVerboseLevel ( kINFO ),
126 fSilentFile ( theTargetFile == nullptr ),
127 fJobName ( jobName ),
128 fAnalysisType (
Types::kClassification ),
129 fModelPersistence (
kTRUE)
160 DeclareOptionRef( color,
"Color",
"Flag for coloured screen output (default: True, if in batch mode: False)" );
161 DeclareOptionRef(
fTransformations,
"Transformations",
"List of transformations to test; formatting example: \"Transformations=I;D;P;U;G,D\", for identity, decorrelation, PCA, Uniform and Gaussianisation followed by decorrelation transformations" );
164 DeclareOptionRef( silent,
"Silent",
"Batch mode: boolean silent flag inhibiting any output from TMVA after the creation of the factory class object (default: False)" );
166 "DrawProgressBar",
"Draw progress bar to display training, testing and evaluation schedule (default: True)" );
169 "Option to save the trained model in xml file or using serialization");
173 "AnalysisType",
"Set the analysis type (Classification, Regression, Multiclass, Auto) (default: Auto)" );
206 fTransformations (
"I" ),
210 fSilentFile (
kTRUE ),
211 fJobName ( jobName ),
212 fAnalysisType (
Types::kClassification ),
213 fModelPersistence (
kTRUE)
245 DeclareOptionRef( color,
"Color",
"Flag for coloured screen output (default: True, if in batch mode: False)" );
246 DeclareOptionRef(
fTransformations,
"Transformations",
"List of transformations to test; formatting example: \"Transformations=I;D;P;U;G,D\", for identity, decorrelation, PCA, Uniform and Gaussianisation followed by decorrelation transformations" );
249 DeclareOptionRef( silent,
"Silent",
"Batch mode: boolean silent flag inhibiting any output from TMVA after the creation of the factory class object (default: False)" );
251 "DrawProgressBar",
"Draw progress bar to display training, testing and evaluation schedule (default: True)" );
254 "Option to save the trained model in xml file or using serialization");
258 "AnalysisType",
"Set the analysis type (Classification, Regression, Multiclass, Auto) (default: Auto)" );
302 std::vector<TMVA::VariableTransformBase*>::iterator trfIt = fDefaultTrfs.begin();
303 for (;trfIt != fDefaultTrfs.end(); ++trfIt)
delete (*trfIt);
305 this->DeleteAllMethods();
320 std::map<TString,MVector*>::iterator itrMap;
322 for(itrMap = fMethodsMap.begin();itrMap != fMethodsMap.end();++itrMap)
324 MVector *methods=itrMap->second;
326 MVector::iterator itrMethod = methods->begin();
327 for (; itrMethod != methods->end(); ++itrMethod) {
328 Log() << kDEBUG <<
"Delete method: " << (*itrMethod)->GetName() <<
Endl;
368 if(fMethodsMap.find(datasetname)!=fMethodsMap.end())
370 if (
GetMethod( datasetname,methodTitle ) != 0) {
371 Log() << kFATAL <<
"Booking failed since method with title <"
372 << methodTitle <<
"> already exists "<<
"in with DataSet Name <"<< loader->
GetName()<<
"> "
378 Log() << kHEADER <<
"Booking method: " <<
gTools().
Color(
"bold") << methodTitle
386 "Number of times the classifier will be boosted" );
391 if(fModelPersistence)
397 if (fileDir[fileDir.
Length()-1] !=
'/') fileDir +=
"/";
409 Log() << kDEBUG <<
"Boost Number is " << boostNum <<
" > 0: train boosted classifier" <<
Endl;
413 Log() << kFATAL <<
"Method with type kBoost cannot be casted to MethodCategory. /Factory" <<
Endl;
420 methBoost->
SetFile(fgTargetFile);
425 if (method==0)
return 0;
431 Log() << kFATAL <<
"Method with type kCategory cannot be casted to MethodCategory. /Factory" <<
Endl;
437 methCat->
SetFile(fgTargetFile);
445 Log() << kWARNING <<
"Method " << method->
GetMethodTypeName() <<
" is not capable of handling " ;
470 if(fMethodsMap.find(datasetname)==fMethodsMap.end())
473 fMethodsMap[datasetname]=mvector;
475 fMethodsMap[datasetname]->push_back( method );
507 if (method ==
nullptr)
return nullptr;
510 Log() << kERROR <<
"Cannot handle category methods for now." <<
Endl;
514 if(fModelPersistence) {
519 if (fileDir[fileDir.
Length() - 1] !=
'/')
540 if (HasMethod(datasetname, methodTitle) != 0) {
541 Log() << kFATAL <<
"Booking failed since method with title <"
542 << methodTitle <<
"> already exists "<<
"in with DataSet Name <"<< loader->
GetName()<<
"> "
549 if(fMethodsMap.count(datasetname) == 0) {
551 fMethodsMap[datasetname] = mvector;
554 fMethodsMap[datasetname]->push_back( method );
564 if(fMethodsMap.find(datasetname)==fMethodsMap.end())
return 0;
566 MVector *methods=fMethodsMap.find(datasetname)->second;
568 MVector::const_iterator itrMethod;
570 for (itrMethod = methods->begin(); itrMethod != methods->end(); ++itrMethod) {
582 if(fMethodsMap.find(datasetname)==fMethodsMap.end())
return 0;
584 std::string methodName = methodTitle.
Data();
586 return ( 0 == methodName.compare(
m->GetName() ) );
590 Bool_t isMethodNameExisting = std::any_of( methods->begin(), methods->end(), isEqualToMethodName);
592 return isMethodNameExisting;
601 if(!RootBaseDir()->GetDirectory(fDataSetInfo.
GetName())) RootBaseDir()->mkdir(fDataSetInfo.
GetName());
604 RootBaseDir()->cd(fDataSetInfo.
GetName());
651 processTrfs = fTransformations;
654 std::vector<TMVA::TransformationHandler*> trfs;
658 std::vector<TString>::iterator trfsDefIt = trfsDef.begin();
659 for (; trfsDefIt!=trfsDef.end(); ++trfsDefIt) {
664 Log() << kDEBUG <<
"current transformation string: '" << trfS.
Data() <<
"'" <<
Endl;
670 if (trfS.
BeginsWith(
'I')) identityTrHandler = trfs.back();
676 std::vector<TMVA::TransformationHandler*>::iterator trfIt = trfs.begin();
678 for (;trfIt != trfs.end(); ++trfIt) {
680 (*trfIt)->SetRootDir(RootBaseDir()->GetDirectory(fDataSetInfo.
GetName()));
681 (*trfIt)->CalcTransformations(inputEvents);
686 for (trfIt = trfs.begin(); trfIt != trfs.end(); ++trfIt)
delete *trfIt;
698 std::map<TString,MVector*>::iterator itrMap;
699 std::map<TString,Double_t> TunedParameters;
700 for(itrMap = fMethodsMap.begin();itrMap != fMethodsMap.end();++itrMap)
702 MVector *methods=itrMap->second;
704 MVector::iterator itrMethod;
707 for( itrMethod = methods->begin(); itrMethod != methods->end(); ++itrMethod ) {
711 Log() << kFATAL <<
"Dynamic cast to MethodBase failed" <<
Endl;
712 return TunedParameters;
717 <<
" not trained (training tree has less entries ["
728 Log() << kINFO <<
"Optimization of tuning parameters finished for Method:"<<mva->
GetName() <<
Endl;
732 return TunedParameters;
760 if (fMethodsMap.find(datasetname) == fMethodsMap.end()) {
761 Log() << kERROR <<
Form(
"DataSet = %s not found in methods map.", datasetname.
Data()) <<
Endl;
765 if (!this->HasMethod(datasetname, theMethodName)) {
766 Log() << kERROR <<
Form(
"Method = %s not found with Dataset = %s ", theMethodName.
Data(), datasetname.
Data())
772 if (allowedAnalysisTypes.count(this->fAnalysisType) == 0) {
773 Log() << kERROR <<
Form(
"Can only generate ROC curves for analysis type kClassification and kMulticlass.")
785 Log() << kERROR <<
Form(
"Given class number (iClass = %i) does not exist. There are %i classes in dataset.",
795 std::vector<Bool_t> *mvaResTypes =
dynamic_cast<ResultsClassification *
>(results)->GetValueVectorTypes();
796 std::vector<Float_t> mvaResWeights;
799 mvaResWeights.reserve(eventCollection.size());
800 for (
auto ev : eventCollection) {
801 mvaResWeights.push_back(ev->GetWeight());
804 rocCurve =
new TMVA::ROCCurve(*mvaRes, *mvaResTypes, mvaResWeights);
807 std::vector<Float_t> mvaRes;
808 std::vector<Bool_t> mvaResTypes;
809 std::vector<Float_t> mvaResWeights;
811 std::vector<std::vector<Float_t>> *rawMvaRes =
dynamic_cast<ResultsMulticlass *
>(results)->GetValueVector();
816 mvaRes.reserve(rawMvaRes->size());
817 for (
auto item : *rawMvaRes) {
818 mvaRes.push_back(item[iClass]);
822 mvaResTypes.reserve(eventCollection.size());
823 mvaResWeights.reserve(eventCollection.size());
824 for (
auto ev : eventCollection) {
825 mvaResTypes.push_back(ev->GetClass() == iClass);
826 mvaResWeights.push_back(ev->GetWeight());
829 rocCurve =
new TMVA::ROCCurve(mvaRes, mvaResTypes, mvaResWeights);
845 return GetROCIntegral((
TString)loader->
GetName(), theMethodName, iClass);
858 if (fMethodsMap.find(datasetname) == fMethodsMap.end()) {
859 Log() << kERROR <<
Form(
"DataSet = %s not found in methods map.", datasetname.
Data()) <<
Endl;
863 if ( ! this->HasMethod(datasetname, theMethodName) ) {
864 Log() << kERROR <<
Form(
"Method = %s not found with Dataset = %s ", theMethodName.
Data(), datasetname.
Data()) <<
Endl;
869 if ( allowedAnalysisTypes.count(this->fAnalysisType) == 0 ) {
870 Log() << kERROR <<
Form(
"Can only generate ROC integral for analysis type kClassification. and kMulticlass.")
875 TMVA::ROCCurve *rocCurve = GetROC(datasetname, theMethodName, iClass);
877 Log() << kFATAL <<
Form(
"ROCCurve object was not created in Method = %s not found with Dataset = %s ",
878 theMethodName.
Data(), datasetname.
Data())
906 return GetROCCurve( (
TString)loader->
GetName(), theMethodName, setTitles, iClass );
925 if (fMethodsMap.find(datasetname) == fMethodsMap.end()) {
926 Log() << kERROR <<
Form(
"DataSet = %s not found in methods map.", datasetname.
Data()) <<
Endl;
930 if ( ! this->HasMethod(datasetname, theMethodName) ) {
931 Log() << kERROR <<
Form(
"Method = %s not found with Dataset = %s ", theMethodName.
Data(), datasetname.
Data()) <<
Endl;
936 if ( allowedAnalysisTypes.count(this->fAnalysisType) == 0 ) {
937 Log() << kERROR <<
Form(
"Can only generate ROC curves for analysis type kClassification and kMulticlass.") <<
Endl;
941 TMVA::ROCCurve *rocCurve = GetROC(datasetname, theMethodName, iClass);
945 Log() << kFATAL <<
Form(
"ROCCurve object was not created in Method = %s not found with Dataset = %s ", theMethodName.
Data(), datasetname.
Data()) <<
Endl;
953 graph->GetYaxis()->SetTitle(
"Background rejection (Specificity)");
954 graph->GetXaxis()->SetTitle(
"Signal efficiency (Sensitivity)");
955 graph->SetTitle(
Form(
"Signal efficiency vs. Background rejection (%s)", theMethodName.
Data()));
975 return GetROCCurveAsMultiGraph((
TString)loader->
GetName(), iClass);
996 MVector *methods = fMethodsMap[datasetname.
Data()];
997 for (
auto * method_raw : *methods) {
999 if (method ==
nullptr) {
continue; }
1005 Log() << kERROR <<
Form(
"Given class number (iClass = %i) does not exist. There are %i classes in dataset.", iClass, nClasses) <<
Endl;
1011 TGraph *
graph = this->GetROCCurve(datasetname, methodName,
false, iClass);
1012 graph->SetTitle(methodName);
1014 graph->SetLineWidth(2);
1015 graph->SetLineColor(line_color++);
1016 graph->SetFillColor(10);
1022 Log() << kERROR <<
Form(
"No metohds have class %i defined.", iClass) <<
Endl;
1059 if (fMethodsMap.find(datasetname) == fMethodsMap.end()) {
1060 Log() << kERROR <<
Form(
"DataSet = %s not found in methods map.", datasetname.
Data()) <<
Endl;
1068 TMultiGraph *multigraph = this->GetROCCurveAsMultiGraph(datasetname, iClass);
1071 multigraph->
Draw(
"AL");
1076 TString titleString =
Form(
"Signal efficiency vs. Background rejection");
1078 titleString =
Form(
"%s (Class=%i)", titleString.
Data(), iClass);
1083 multigraph->
SetTitle( titleString );
1085 canvas->
BuildLegend(0.15, 0.15, 0.35, 0.3,
"MVA Method");
1101 if (fMethodsMap.empty()) {
1102 Log() << kINFO <<
"...nothing found to train" <<
Endl;
1108 Log() << kDEBUG <<
"Train all methods for "
1112 std::map<TString,MVector*>::iterator itrMap;
1114 for(itrMap = fMethodsMap.begin();itrMap != fMethodsMap.end();++itrMap)
1116 MVector *methods=itrMap->second;
1117 MVector::iterator itrMethod;
1120 for( itrMethod = methods->begin(); itrMethod != methods->end(); ++itrMethod ) {
1124 if(mva==0)
continue;
1127 Log() << kFATAL <<
"No input data for the training provided!" <<
Endl;
1131 Log() << kFATAL <<
"You want to do regression training without specifying a target." <<
Endl;
1134 Log() << kFATAL <<
"You want to do classification training, but specified less than two classes." <<
Endl;
1137 if(!IsSilentFile()) WriteDataInformation(mva->
fDataSetInfo);
1142 <<
" not trained (training tree has less entries ["
1152 Log() << kHEADER <<
"Training finished" <<
Endl <<
Endl;
1159 Log() << kINFO <<
"Ranking input variables (method specific)..." <<
Endl;
1160 for (itrMethod = methods->begin(); itrMethod != methods->end(); ++itrMethod) {
1165 const Ranking* ranking = (*itrMethod)->CreateRanking();
1166 if (ranking != 0) ranking->
Print();
1167 else Log() << kINFO <<
"No variable ranking supplied by classifier: "
1174 if (!IsSilentFile()) {
1175 for (
UInt_t i=0; i<methods->size(); i++) {
1179 m->fTrainHistory.SaveHistory(
m->GetMethodName());
1187 if (fModelPersistence) {
1189 Log() << kHEADER <<
"=== Destroy and recreate all methods via weight files for testing ===" <<
Endl <<
Endl;
1191 if(!IsSilentFile())RootBaseDir()->cd();
1194 for (
UInt_t i=0; i<methods->size(); i++) {
1201 TString weightfile =
m->GetWeightFileName();
1208 TString testvarName =
m->GetTestvarName();
1217 Log() << kFATAL <<
"Method with type kCategory cannot be casted to MethodCategory. /Factory" <<
Endl;
1223 TString wfileDir =
m->DataInfo().GetName();
1225 m->SetWeightFileDir(wfileDir);
1226 m->SetModelPersistence(fModelPersistence);
1227 m->SetSilentFile(IsSilentFile());
1228 m->SetAnalysisType(fAnalysisType);
1230 m->ReadStateFromFile();
1231 m->SetTestvarName(testvarName);
1250 if (fMethodsMap.empty()) {
1251 Log() << kINFO <<
"...nothing found to test" <<
Endl;
1254 std::map<TString,MVector*>::iterator itrMap;
1256 for(itrMap = fMethodsMap.begin();itrMap != fMethodsMap.end();++itrMap)
1258 MVector *methods=itrMap->second;
1259 MVector::iterator itrMethod;
1262 for (itrMethod = methods->begin(); itrMethod != methods->end(); ++itrMethod) {
1271 : (analysisType ==
Types::kMulticlass ?
"Multiclass classification" :
"Classification"))
1282 if (methodTitle !=
"") {
1286 Log() << kWARNING <<
"<MakeClass> Could not find classifier \"" << methodTitle
1287 <<
"\" in list" <<
Endl;
1293 MVector *methods=fMethodsMap.find(datasetname)->second;
1294 MVector::const_iterator itrMethod;
1295 for (itrMethod = methods->begin(); itrMethod != methods->end(); ++itrMethod) {
1297 if(method==0)
continue;
1310 if (methodTitle !=
"") {
1314 Log() << kWARNING <<
"<PrintHelpMessage> Could not find classifier \"" << methodTitle
1315 <<
"\" in list" <<
Endl;
1321 MVector *methods=fMethodsMap.find(datasetname)->second;
1322 MVector::const_iterator itrMethod ;
1323 for (itrMethod = methods->begin(); itrMethod != methods->end(); ++itrMethod) {
1325 if(method==0)
continue;
1337 Log() << kINFO <<
"Evaluating all variables..." <<
Endl;
1343 this->BookMethod(loader,
"Variable",
s );
1355 if (fMethodsMap.empty()) {
1356 Log() << kINFO <<
"...nothing found to evaluate" <<
Endl;
1359 std::map<TString,MVector*>::iterator itrMap;
1361 for(itrMap = fMethodsMap.begin();itrMap != fMethodsMap.end();++itrMap)
1363 MVector *methods=itrMap->second;
1373 Int_t nmeth_used[2] = {0,0};
1375 std::vector<std::vector<TString> > mname(2);
1376 std::vector<std::vector<Double_t> > sig(2),
sep(2), roc(2);
1377 std::vector<std::vector<Double_t> > eff01(2), eff10(2), eff30(2), effArea(2);
1378 std::vector<std::vector<Double_t> > eff01err(2), eff10err(2), eff30err(2);
1379 std::vector<std::vector<Double_t> > trainEff01(2), trainEff10(2), trainEff30(2);
1381 std::vector<std::vector<Float_t> > multiclass_testEff;
1382 std::vector<std::vector<Float_t> > multiclass_trainEff;
1383 std::vector<std::vector<Float_t> > multiclass_testPur;
1384 std::vector<std::vector<Float_t> > multiclass_trainPur;
1386 std::vector<std::vector<Float_t> > train_history;
1389 std::vector<TMatrixD> multiclass_trainConfusionEffB01;
1390 std::vector<TMatrixD> multiclass_trainConfusionEffB10;
1391 std::vector<TMatrixD> multiclass_trainConfusionEffB30;
1392 std::vector<TMatrixD> multiclass_testConfusionEffB01;
1393 std::vector<TMatrixD> multiclass_testConfusionEffB10;
1394 std::vector<TMatrixD> multiclass_testConfusionEffB30;
1396 std::vector<std::vector<Double_t> > biastrain(1);
1397 std::vector<std::vector<Double_t> > biastest(1);
1398 std::vector<std::vector<Double_t> > devtrain(1);
1399 std::vector<std::vector<Double_t> > devtest(1);
1400 std::vector<std::vector<Double_t> > rmstrain(1);
1401 std::vector<std::vector<Double_t> > rmstest(1);
1402 std::vector<std::vector<Double_t> > minftrain(1);
1403 std::vector<std::vector<Double_t> > minftest(1);
1404 std::vector<std::vector<Double_t> > rhotrain(1);
1405 std::vector<std::vector<Double_t> > rhotest(1);
1408 std::vector<std::vector<Double_t> > biastrainT(1);
1409 std::vector<std::vector<Double_t> > biastestT(1);
1410 std::vector<std::vector<Double_t> > devtrainT(1);
1411 std::vector<std::vector<Double_t> > devtestT(1);
1412 std::vector<std::vector<Double_t> > rmstrainT(1);
1413 std::vector<std::vector<Double_t> > rmstestT(1);
1414 std::vector<std::vector<Double_t> > minftrainT(1);
1415 std::vector<std::vector<Double_t> > minftestT(1);
1424 for (MVector::iterator itrMethod =methods->begin(); itrMethod != methods->end(); ++itrMethod) {
1427 if(theMethod==0)
continue;
1428 theMethod->
SetFile(fgTargetFile);
1433 doRegression =
kTRUE;
1440 Log() << kINFO <<
"TestRegression (testing)" <<
Endl;
1442 biastest[0] .push_back( bias );
1443 devtest[0] .push_back( dev );
1444 rmstest[0] .push_back( rms );
1445 minftest[0] .push_back( mInf );
1446 rhotest[0] .push_back( rho );
1447 biastestT[0] .push_back( biasT );
1448 devtestT[0] .push_back( devT );
1449 rmstestT[0] .push_back( rmsT );
1450 minftestT[0] .push_back( mInfT );
1452 Log() << kINFO <<
"TestRegression (training)" <<
Endl;
1454 biastrain[0] .push_back( bias );
1455 devtrain[0] .push_back( dev );
1456 rmstrain[0] .push_back( rms );
1457 minftrain[0] .push_back( mInf );
1458 rhotrain[0] .push_back( rho );
1459 biastrainT[0].push_back( biasT );
1460 devtrainT[0] .push_back( devT );
1461 rmstrainT[0] .push_back( rmsT );
1462 minftrainT[0].push_back( mInfT );
1466 if (!IsSilentFile()) {
1467 Log() << kDEBUG <<
"\tWrite evaluation histograms to file" <<
Endl;
1475 doMulticlass =
kTRUE;
1476 Log() << kINFO <<
"Evaluate multiclass classification method: " << theMethod->
GetMethodName() <<
Endl;
1495 if (!IsSilentFile()) {
1496 Log() << kDEBUG <<
"\tWrite evaluation histograms to file" <<
Endl;
1519 eff01err[isel].push_back(err);
1521 eff10err[isel].push_back(err);
1523 eff30err[isel].push_back(err);
1532 if (!IsSilentFile()) {
1533 Log() << kDEBUG <<
"\tWrite evaluation histograms to file" <<
Endl;
1541 std::vector<TString> vtemps = mname[0];
1542 std::vector< std::vector<Double_t> > vtmp;
1543 vtmp.push_back( devtest[0] );
1544 vtmp.push_back( devtrain[0] );
1545 vtmp.push_back( biastest[0] );
1546 vtmp.push_back( biastrain[0] );
1547 vtmp.push_back( rmstest[0] );
1548 vtmp.push_back( rmstrain[0] );
1549 vtmp.push_back( minftest[0] );
1550 vtmp.push_back( minftrain[0] );
1551 vtmp.push_back( rhotest[0] );
1552 vtmp.push_back( rhotrain[0] );
1553 vtmp.push_back( devtestT[0] );
1554 vtmp.push_back( devtrainT[0] );
1555 vtmp.push_back( biastestT[0] );
1556 vtmp.push_back( biastrainT[0]);
1557 vtmp.push_back( rmstestT[0] );
1558 vtmp.push_back( rmstrainT[0] );
1559 vtmp.push_back( minftestT[0] );
1560 vtmp.push_back( minftrainT[0]);
1563 devtest[0] = vtmp[0];
1564 devtrain[0] = vtmp[1];
1565 biastest[0] = vtmp[2];
1566 biastrain[0] = vtmp[3];
1567 rmstest[0] = vtmp[4];
1568 rmstrain[0] = vtmp[5];
1569 minftest[0] = vtmp[6];
1570 minftrain[0] = vtmp[7];
1571 rhotest[0] = vtmp[8];
1572 rhotrain[0] = vtmp[9];
1573 devtestT[0] = vtmp[10];
1574 devtrainT[0] = vtmp[11];
1575 biastestT[0] = vtmp[12];
1576 biastrainT[0] = vtmp[13];
1577 rmstestT[0] = vtmp[14];
1578 rmstrainT[0] = vtmp[15];
1579 minftestT[0] = vtmp[16];
1580 minftrainT[0] = vtmp[17];
1581 }
else if (doMulticlass) {
1589 for (
Int_t k=0; k<2; k++) {
1590 std::vector< std::vector<Double_t> > vtemp;
1591 vtemp.push_back( effArea[k] );
1592 vtemp.push_back( eff10[k] );
1593 vtemp.push_back( eff01[k] );
1594 vtemp.push_back( eff30[k] );
1595 vtemp.push_back( eff10err[k] );
1596 vtemp.push_back( eff01err[k] );
1597 vtemp.push_back( eff30err[k] );
1598 vtemp.push_back( trainEff10[k] );
1599 vtemp.push_back( trainEff01[k] );
1600 vtemp.push_back( trainEff30[k] );
1601 vtemp.push_back( sig[k] );
1602 vtemp.push_back(
sep[k] );
1603 vtemp.push_back( roc[k] );
1604 std::vector<TString> vtemps = mname[k];
1606 effArea[k] = vtemp[0];
1607 eff10[k] = vtemp[1];
1608 eff01[k] = vtemp[2];
1609 eff30[k] = vtemp[3];
1610 eff10err[k] = vtemp[4];
1611 eff01err[k] = vtemp[5];
1612 eff30err[k] = vtemp[6];
1613 trainEff10[k] = vtemp[7];
1614 trainEff01[k] = vtemp[8];
1615 trainEff30[k] = vtemp[9];
1631 const Int_t nmeth = methodsNoCuts.size();
1634 if (!doRegression && !doMulticlass ) {
1640 std::vector<Double_t> rvec;
1648 std::vector<TString>* theVars =
new std::vector<TString>;
1649 std::vector<ResultsClassification*> mvaRes;
1650 for (MVector::iterator itrMethod = methodsNoCuts.begin(); itrMethod != methodsNoCuts.end(); ++itrMethod, ++ivar) {
1653 theVars->push_back(
m->GetTestvarName() );
1654 rvec.push_back(
m->GetSignalReferenceCut() );
1655 theVars->back().ReplaceAll(
"MVA_",
"" );
1675 for (
Int_t im=0; im<nmeth; im++) {
1679 Log() << kWARNING <<
"Found NaN return value in event: " << ievt
1680 <<
" for method \"" << methodsNoCuts[im]->GetName() <<
"\"" <<
Endl;
1683 else dvec[im] = retval;
1687 else { tpBkg->
AddRow( dvec ); theMat = overlapB; }
1690 for (
Int_t im=0; im<nmeth; im++) {
1691 for (
Int_t jm=im; jm<nmeth; jm++) {
1692 if ((dvec[im] - rvec[im])*(dvec[jm] - rvec[jm]) > 0) {
1694 if (im != jm) (*theMat)(jm,im)++;
1714 if (corrMatS != 0 && corrMatB != 0) {
1719 for (
Int_t im=0; im<nmeth; im++) {
1720 for (
Int_t jm=0; jm<nmeth; jm++) {
1721 mvaMatS(im,jm) = (*corrMatS)(im,jm);
1722 mvaMatB(im,jm) = (*corrMatB)(im,jm);
1727 std::vector<TString> theInputVars;
1730 for (
Int_t iv=0; iv<nvar; iv++) {
1732 for (
Int_t jm=0; jm<nmeth; jm++) {
1733 varmvaMatS(iv,jm) = (*corrMatS)(nmeth+iv,jm);
1734 varmvaMatB(iv,jm) = (*corrMatB)(nmeth+iv,jm);
1749 Log() << kINFO <<
Form(
"Dataset[%s] : ",method->
fDataSetInfo.
GetName())<<
"Correlations between input variables and MVA response (signal):" <<
Endl;
1753 Log() << kINFO <<
Form(
"Dataset[%s] : ",method->
fDataSetInfo.
GetName())<<
"Correlations between input variables and MVA response (background):" <<
Endl;
1760 Log() << kINFO <<
Form(
"Dataset[%s] : ",method->
fDataSetInfo.
GetName())<<
"The following \"overlap\" matrices contain the fraction of events for which " <<
Endl;
1761 Log() << kINFO <<
Form(
"Dataset[%s] : ",method->
fDataSetInfo.
GetName())<<
"the MVAs 'i' and 'j' have returned conform answers about \"signal-likeness\"" <<
Endl;
1762 Log() << kINFO <<
Form(
"Dataset[%s] : ",method->
fDataSetInfo.
GetName())<<
"An event is signal-like, if its MVA output exceeds the following value:" <<
Endl;
1764 Log() << kINFO <<
Form(
"Dataset[%s] : ",method->
fDataSetInfo.
GetName())<<
"which correspond to the working point: eff(signal) = 1 - eff(background)" <<
Endl;
1767 if (nmeth != (
Int_t)methods->size())
1768 Log() << kINFO <<
Form(
"Dataset[%s] : ",method->
fDataSetInfo.
GetName())<<
"Note: no correlations and overlap with cut method are provided at present" <<
Endl;
1800 TString hLine =
"--------------------------------------------------------------------------------------------------";
1801 Log() << kINFO <<
"Evaluation results ranked by smallest RMS on test sample:" <<
Endl;
1802 Log() << kINFO <<
"(\"Bias\" quotes the mean deviation of the regression from true target." <<
Endl;
1803 Log() << kINFO <<
" \"MutInf\" is the \"Mutual Information\" between regression and target." <<
Endl;
1804 Log() << kINFO <<
" Indicated by \"_T\" are the corresponding \"truncated\" quantities ob-" <<
Endl;
1805 Log() << kINFO <<
" tained when removing events deviating more than 2sigma from average.)" <<
Endl;
1806 Log() << kINFO << hLine <<
Endl;
1808 Log() << kINFO << hLine <<
Endl;
1810 for (
Int_t i=0; i<nmeth_used[0]; i++) {
1812 if(theMethod==0)
continue;
1814 Log() << kINFO <<
Form(
"%-20s %-15s:%#9.3g%#9.3g%#9.3g%#9.3g | %#5.3f %#5.3f",
1816 (
const char*)mname[0][i],
1817 biastest[0][i], biastestT[0][i],
1818 rmstest[0][i], rmstestT[0][i],
1819 minftest[0][i], minftestT[0][i] )
1822 Log() << kINFO << hLine <<
Endl;
1824 Log() << kINFO <<
"Evaluation results ranked by smallest RMS on training sample:" <<
Endl;
1825 Log() << kINFO <<
"(overtraining check)" <<
Endl;
1826 Log() << kINFO << hLine <<
Endl;
1827 Log() << kINFO <<
"DataSet Name: MVA Method: <Bias> <Bias_T> RMS RMS_T | MutInf MutInf_T" <<
Endl;
1828 Log() << kINFO << hLine <<
Endl;
1830 for (
Int_t i=0; i<nmeth_used[0]; i++) {
1832 if(theMethod==0)
continue;
1833 Log() << kINFO <<
Form(
"%-20s %-15s:%#9.3g%#9.3g%#9.3g%#9.3g | %#5.3f %#5.3f",
1835 (
const char*)mname[0][i],
1836 biastrain[0][i], biastrainT[0][i],
1837 rmstrain[0][i], rmstrainT[0][i],
1838 minftrain[0][i], minftrainT[0][i] )
1841 Log() << kINFO << hLine <<
Endl;
1843 }
else if (doMulticlass) {
1849 "-------------------------------------------------------------------------------------------------------";
1888 TString header1 =
Form(
"%-15s%-15s%-15s%-15s%-15s%-15s",
"Dataset",
"MVA Method",
"ROC AUC",
"Sig eff@B=0.01",
1889 "Sig eff@B=0.10",
"Sig eff@B=0.30");
1890 TString header2 =
Form(
"%-15s%-15s%-15s%-15s%-15s%-15s",
"Name:",
"/ Class:",
"test (train)",
"test (train)",
1891 "test (train)",
"test (train)");
1893 Log() << kINFO <<
"1-vs-rest performance metrics per class" <<
Endl;
1894 Log() << kINFO << hLine <<
Endl;
1896 Log() << kINFO <<
"Considers the listed class as signal and the other classes" <<
Endl;
1897 Log() << kINFO <<
"as background, reporting the resulting binary performance." <<
Endl;
1898 Log() << kINFO <<
"A score of 0.820 (0.850) means 0.820 was acheived on the" <<
Endl;
1899 Log() << kINFO <<
"test set and 0.850 on the training set." <<
Endl;
1902 Log() << kINFO << header1 <<
Endl;
1903 Log() << kINFO << header2 <<
Endl;
1904 for (
Int_t k = 0; k < 2; k++) {
1905 for (
Int_t i = 0; i < nmeth_used[k]; i++) {
1907 mname[k][i].ReplaceAll(
"Variable_",
"");
1910 const TString datasetName = itrMap->first;
1911 const TString mvaName = mname[k][i];
1914 if (theMethod == 0) {
1920 Log() << kINFO << row <<
Endl;
1921 Log() << kINFO <<
"------------------------------" <<
Endl;
1924 for (
UInt_t iClass = 0; iClass < numClasses; ++iClass) {
1938 const TString rocaucCmp =
Form(
"%5.3f (%5.3f)", rocaucTest, rocaucTrain);
1939 const TString effB01Cmp =
Form(
"%5.3f (%5.3f)", effB01Test, effB01Train);
1940 const TString effB10Cmp =
Form(
"%5.3f (%5.3f)", effB10Test, effB10Train);
1941 const TString effB30Cmp =
Form(
"%5.3f (%5.3f)", effB30Test, effB30Train);
1942 row =
Form(
"%-15s%-15s%-15s%-15s%-15s%-15s",
"", className.
Data(), rocaucCmp.
Data(), effB01Cmp.
Data(),
1943 effB10Cmp.
Data(), effB30Cmp.
Data());
1944 Log() << kINFO << row <<
Endl;
1946 delete rocCurveTrain;
1947 delete rocCurveTest;
1952 Log() << kINFO << hLine <<
Endl;
1957 auto printMatrix = [](
TMatrixD const &matTraining,
TMatrixD const &matTesting, std::vector<TString> classnames,
1967 for (
UInt_t iCol = 0; iCol < numClasses; ++iCol) {
1968 header +=
Form(
" %-14s", classnames[iCol].Data());
1969 headerInfo +=
Form(
" %-14s",
" test (train)");
1971 stream << kINFO << header <<
Endl;
1972 stream << kINFO << headerInfo <<
Endl;
1974 for (
UInt_t iRow = 0; iRow < numClasses; ++iRow) {
1975 stream << kINFO <<
Form(
" %-14s", classnames[iRow].Data());
1977 for (
UInt_t iCol = 0; iCol < numClasses; ++iCol) {
1979 stream << kINFO <<
Form(
" %-14s",
"-");
1981 Double_t trainValue = matTraining[iRow][iCol];
1982 Double_t testValue = matTesting[iRow][iCol];
1983 TString entry =
Form(
"%-5.3f (%-5.3f)", testValue, trainValue);
1984 stream << kINFO <<
Form(
" %-14s", entry.
Data());
1987 stream << kINFO <<
Endl;
1992 Log() << kINFO <<
"Confusion matrices for all methods" <<
Endl;
1993 Log() << kINFO << hLine <<
Endl;
1995 Log() << kINFO <<
"Does a binary comparison between the two classes given by a " <<
Endl;
1996 Log() << kINFO <<
"particular row-column combination. In each case, the class " <<
Endl;
1997 Log() << kINFO <<
"given by the row is considered signal while the class given " <<
Endl;
1998 Log() << kINFO <<
"by the column index is considered background." <<
Endl;
2000 for (
UInt_t iMethod = 0; iMethod < methods->size(); ++iMethod) {
2002 if (theMethod ==
nullptr) {
2007 std::vector<TString> classnames;
2008 for (
UInt_t iCls = 0; iCls < numClasses; ++iCls) {
2012 <<
"=== Showing confusion matrix for method : " <<
Form(
"%-15s", (
const char *)mname[0][iMethod])
2014 Log() << kINFO <<
"(Signal Efficiency for Background Efficiency 0.01%)" <<
Endl;
2015 Log() << kINFO <<
"---------------------------------------------------" <<
Endl;
2016 printMatrix(multiclass_testConfusionEffB01[iMethod], multiclass_trainConfusionEffB01[iMethod], classnames,
2020 Log() << kINFO <<
"(Signal Efficiency for Background Efficiency 0.10%)" <<
Endl;
2021 Log() << kINFO <<
"---------------------------------------------------" <<
Endl;
2022 printMatrix(multiclass_testConfusionEffB10[iMethod], multiclass_trainConfusionEffB10[iMethod], classnames,
2026 Log() << kINFO <<
"(Signal Efficiency for Background Efficiency 0.30%)" <<
Endl;
2027 Log() << kINFO <<
"---------------------------------------------------" <<
Endl;
2028 printMatrix(multiclass_testConfusionEffB30[iMethod], multiclass_trainConfusionEffB30[iMethod], classnames,
2032 Log() << kINFO << hLine <<
Endl;
2038 Log().EnableOutput();
2041 TString hLine =
"------------------------------------------------------------------------------------------"
2042 "-------------------------";
2043 Log() << kINFO <<
"Evaluation results ranked by best signal efficiency and purity (area)" <<
Endl;
2044 Log() << kINFO << hLine <<
Endl;
2045 Log() << kINFO <<
"DataSet MVA " <<
Endl;
2046 Log() << kINFO <<
"Name: Method: ROC-integ" <<
Endl;
2051 Log() << kDEBUG << hLine <<
Endl;
2052 for (
Int_t k = 0; k < 2; k++) {
2053 if (k == 1 && nmeth_used[k] > 0) {
2054 Log() << kINFO << hLine <<
Endl;
2055 Log() << kINFO <<
"Input Variables: " <<
Endl << hLine <<
Endl;
2057 for (
Int_t i = 0; i < nmeth_used[k]; i++) {
2058 TString datasetName = itrMap->first;
2059 TString methodName = mname[k][i];
2066 if (theMethod == 0) {
2072 std::vector<Bool_t> *mvaResType =
2076 if (mvaResType->size() != 0) {
2077 rocIntegral = GetROCIntegral(datasetName, methodName);
2080 if (
sep[k][i] < 0 || sig[k][i] < 0) {
2082 Log() << kINFO <<
Form(
"%-13s %-15s: %#1.3f", datasetName.
Data(), methodName.
Data(), effArea[k][i])
2094 Log() << kINFO <<
Form(
"%-13s %-15s: %#1.3f", datasetName.
Data(), methodName.
Data(), rocIntegral)
2108 Log() << kINFO << hLine <<
Endl;
2110 Log() << kINFO <<
"Testing efficiency compared to training efficiency (overtraining check)" <<
Endl;
2111 Log() << kINFO << hLine <<
Endl;
2113 <<
"DataSet MVA Signal efficiency: from test sample (from training sample) "
2115 Log() << kINFO <<
"Name: Method: @B=0.01 @B=0.10 @B=0.30 "
2117 Log() << kINFO << hLine <<
Endl;
2118 for (
Int_t k = 0; k < 2; k++) {
2119 if (k == 1 && nmeth_used[k] > 0) {
2120 Log() << kINFO << hLine <<
Endl;
2121 Log() << kINFO <<
"Input Variables: " <<
Endl << hLine <<
Endl;
2123 for (
Int_t i = 0; i < nmeth_used[k]; i++) {
2124 if (k == 1) mname[k][i].ReplaceAll(
"Variable_",
"");
2126 if (theMethod == 0)
continue;
2128 Log() << kINFO <<
Form(
"%-20s %-15s: %#1.3f (%#1.3f) %#1.3f (%#1.3f) %#1.3f (%#1.3f)",
2130 trainEff01[k][i], eff10[k][i], trainEff10[k][i], eff30[k][i], trainEff30[k][i])
2134 Log() << kINFO << hLine <<
Endl;
2137 if (
gTools().CheckForSilentOption(GetOptions()))
Log().InhibitOutput();
2142 std::list<TString> datasets;
2143 for (
Int_t k=0; k<2; k++) {
2144 for (
Int_t i=0; i<nmeth_used[k]; i++) {
2146 if(theMethod==0)
continue;
2149 if(std::find(datasets.begin(), datasets.end(), theMethod->
fDataSetInfo.
GetName()) == datasets.end())
2168 fModelPersistence=
kFALSE;
2173 if(vitype==VIType::kShort)
2174 return EvaluateImportanceShort(loader,theMethod,methodTitle,theOption);
2175 else if(vitype==VIType::kAll)
2176 return EvaluateImportanceAll(loader,theMethod,methodTitle,theOption);
2177 else if(vitype==VIType::kRandom&&nbits>10)
2179 return EvaluateImportanceRandom(loader,
pow(2,nbits),theMethod,methodTitle,theOption);
2182 std::cerr<<
"Error in Variable Importance: Random mode require more that 10 variables in the dataset."<<std::endl;
2199 uint64_t range =
pow(2, nbits);
2202 std::vector<Double_t> importances(nbits);
2204 std::vector<Double_t> ROC(range);
2206 for (
int i = 0; i < nbits; i++)importances[i] = 0;
2209 for (
x = 1;
x <range ;
x++) {
2211 std::bitset<VIBITS> xbitset(
x);
2212 if (
x == 0)
continue;
2218 for (
int index = 0; index < nbits; index++) {
2219 if (xbitset[index]) seedloader->
AddVariable(varNames[index],
'F');
2226 BookMethod(seedloader, theMethod, methodTitle, theOption);
2231 EvaluateAllMethods();
2234 ROC[
x] = GetROCIntegral(xbitset.to_string(), methodTitle);
2241 this->DeleteAllMethods();
2243 fMethodsMap.clear();
2248 for (
x = 0;
x <range ;
x++)
2251 for (uint32_t i = 0; i <
VIBITS; ++i) {
2254 std::bitset<VIBITS> ybitset(
y);
2260 importances[ny] = SROC - 0.5;
2266 importances[ny] += SROC - SSROC;
2272 std::cout<<
"--- Variable Importance Results (All)"<<std::endl;
2273 return GetImportance(nbits,importances,varNames);
2276static long int sum(
long int i)
2279 for(
long int n=0;
n<i;
n++) _sum+=
pow(2,
n);
2294 long int range =
sum(nbits);
2297 std::vector<Double_t> importances(nbits);
2298 for (
int i = 0; i < nbits; i++)importances[i] = 0;
2304 std::bitset<VIBITS> xbitset(
x);
2305 if (
x == 0)
Log()<<kFATAL<<
"Error: need at least one variable.";
2312 for (
int index = 0; index < nbits; index++) {
2313 if (xbitset[index]) seedloader->
AddVariable(varNames[index],
'F');
2320 BookMethod(seedloader, theMethod, methodTitle, theOption);
2325 EvaluateAllMethods();
2328 SROC = GetROCIntegral(xbitset.to_string(), methodTitle);
2335 this->DeleteAllMethods();
2336 fMethodsMap.clear();
2340 for (uint32_t i = 0; i <
VIBITS; ++i) {
2343 std::bitset<VIBITS> ybitset(
y);
2349 importances[ny] = SROC - 0.5;
2356 for (
int index = 0; index < nbits; index++) {
2357 if (ybitset[index]) subseedloader->
AddVariable(varNames[index],
'F');
2364 BookMethod(subseedloader, theMethod, methodTitle, theOption);
2369 EvaluateAllMethods();
2372 SSROC = GetROCIntegral(ybitset.to_string(), methodTitle);
2373 importances[ny] += SROC - SSROC;
2379 delete subseedloader;
2380 this->DeleteAllMethods();
2381 fMethodsMap.clear();
2384 std::cout<<
"--- Variable Importance Results (Short)"<<std::endl;
2385 return GetImportance(nbits,importances,varNames);
2401 long int range =
pow(2, nbits);
2404 std::vector<Double_t> importances(nbits);
2406 for (
int i = 0; i < nbits; i++)importances[i] = 0;
2410 x = rangen -> Integer(range);
2412 std::bitset<32> xbitset(
x);
2413 if (
x == 0)
continue;
2420 for (
int index = 0; index < nbits; index++) {
2421 if (xbitset[index]) seedloader->
AddVariable(varNames[index],
'F');
2428 BookMethod(seedloader, theMethod, methodTitle, theOption);
2433 EvaluateAllMethods();
2436 SROC = GetROCIntegral(xbitset.to_string(), methodTitle);
2444 this->DeleteAllMethods();
2445 fMethodsMap.clear();
2449 for (uint32_t i = 0; i < 32; ++i) {
2452 std::bitset<32> ybitset(
y);
2458 importances[ny] = SROC - 0.5;
2459 importances_norm += importances[ny];
2467 for (
int index = 0; index < nbits; index++) {
2468 if (ybitset[index]) subseedloader->
AddVariable(varNames[index],
'F');
2475 BookMethod(subseedloader, theMethod, methodTitle, theOption);
2480 EvaluateAllMethods();
2483 SSROC = GetROCIntegral(ybitset.to_string(), methodTitle);
2484 importances[ny] += SROC - SSROC;
2490 delete subseedloader;
2491 this->DeleteAllMethods();
2492 fMethodsMap.clear();
2496 std::cout<<
"--- Variable Importance Results (Random)"<<std::endl;
2497 return GetImportance(nbits,importances,varNames);
2504 TH1F *vih1 =
new TH1F(
"vih1",
"", nbits, 0, nbits);
2509 for (
int i = 0; i < nbits; i++) {
2510 normalization = normalization + importances[i];
2519 std::vector<Double_t> x_ie(nbits), y_ie(nbits);
2520 for (
Int_t i = 1; i < nbits + 1; i++) {
2521 x_ie[i - 1] = (i - 1) * 1.;
2522 roc = 100.0 * importances[i - 1] / normalization;
2524 std::cout<<
"--- "<<varNames[i-1]<<
" = "<<roc<<
" %"<<std::endl;
2528 TGraph *g_ie =
new TGraph(nbits + 2, &x_ie[0], &y_ie[0]);
double pow(double, double)
TMatrixT< Double_t > TMatrixD
char * Form(const char *fmt,...)
R__EXTERN TStyle * gStyle
R__EXTERN TSystem * gSystem
virtual void SetTitleOffset(Float_t offset=1)
Set distance between the axis and the axis title.
virtual void SetTitleSize(Float_t size=0.04)
Set size of axis title.
virtual void SetFillColor(Color_t fcolor)
Set the fill area color.
virtual void SetBinLabel(Int_t bin, const char *label)
Set label for bin.
void CenterTitle(Bool_t center=kTRUE)
Center axis title.
virtual void SetRangeUser(Double_t ufirst, Double_t ulast)
Set the viewing range for the axis from ufirst to ulast (in user coordinates).
static Int_t GetColor(const char *hexcolor)
Static method returning color number for color specified by hex color string of form: "#rrggbb",...
A ROOT file is a suite of consecutive data records (TKey instances) with a well defined format.
A Graph is a graphics object made of two arrays X and Y with npoints each.
virtual void SetTitle(const char *title="")
Change (i.e.
1-D histogram with a float per channel (see TH1 documentation)}
virtual void SetDirectory(TDirectory *dir)
By default when an histogram is created, it is added to the list of histogram objects in the current ...
virtual void SetTitle(const char *title)
See GetStatOverflows for more information.
virtual void LabelsOption(Option_t *option="h", Option_t *axis="X")
Set option(s) to draw axis with labels.
static void AddDirectory(Bool_t add=kTRUE)
Sets the flag controlling the automatic add of histograms in memory.
TAxis * GetXaxis()
Get the behaviour adopted by the object about the statoverflows. See EStatOverflows for more informat...
virtual void SetBarWidth(Float_t width=0.5)
virtual void SetBinContent(Int_t bin, Double_t content)
Set bin content see convention for numbering bins in TH1::GetBin In case the bin number is greater th...
Service class for 2-Dim histogram classes.
IMethod * Create(const std::string &name, const TString &job, const TString &title, DataSetInfo &dsi, const TString &option)
creates the method if needed based on the method name using the creator function the factory has stor...
static ClassifierFactory & Instance()
access to the ClassifierFactory singleton creates the instance if needed
TString fWeightFileDirPrefix
void SetDrawProgressBar(Bool_t d)
void SetUseColor(Bool_t uc)
class TMVA::Config::VariablePlotting fVariablePlotting
void SetConfigDescription(const char *d)
OptionBase * DeclareOptionRef(T &ref, const TString &name, const TString &desc="")
void AddPreDefVal(const T &)
void SetConfigName(const char *n)
virtual void ParseOptions()
options parser
const TString & GetOptions() const
void CheckForUnusedOptions() const
checks for unused options in option string
void PrepareTrainingAndTestTree(const TCut &cut, const TString &splitOpt)
prepare the training and test trees -> same cuts for signal and background
DataSetInfo & GetDataSetInfo()
void AddVariable(const TString &expression, const TString &title, const TString &unit, char type='F', Double_t min=0, Double_t max=0)
user inserts discriminating variable in data set info
Class that contains all the data information.
UInt_t GetNVariables() const
virtual const char * GetName() const
Returns name of object.
const TMatrixD * CorrelationMatrix(const TString &className) const
UInt_t GetNClasses() const
const TString & GetSplitOptions() const
UInt_t GetNTargets() const
DataSet * GetDataSet() const
returns data set
TH2 * CreateCorrelationMatrixHist(const TMatrixD *m, const TString &hName, const TString &hTitle) const
std::vector< TString > GetListOfVariables() const
returns list of variables
ClassInfo * GetClassInfo(Int_t clNum) const
const TCut & GetCut(Int_t i) const
VariableInfo & GetVariableInfo(Int_t i)
Bool_t IsSignal(const Event *ev) const
DataSetManager * GetDataSetManager()
DataInputHandler & DataInput()
Class that contains all the data information.
Long64_t GetNEvtSigTest()
return number of signal test events in dataset
TTree * GetTree(Types::ETreeType type)
create the test/trainings tree with all the variables, the weights, the classes, the targets,...
const Event * GetEvent() const
Long64_t GetNEvents(Types::ETreeType type=Types::kMaxTreeType) const
Results * GetResults(const TString &, Types::ETreeType type, Types::EAnalysisType analysistype)
Long64_t GetNTrainingEvents() const
void SetCurrentType(Types::ETreeType type) const
const std::vector< Event * > & GetEventCollection(Types::ETreeType type=Types::kMaxTreeType) const
Long64_t GetNEvtBkgdTest()
return number of background test events in dataset
Float_t GetValue(UInt_t ivar) const
return value of i'th variable
static void SetIsTraining(Bool_t)
when this static function is called, it sets the flag whether events with negative event weight shoul...
This is the main MVA steering class.
void PrintHelpMessage(const TString &datasetname, const TString &methodTitle="") const
Print predefined help message of classifier.
Double_t GetROCIntegral(DataLoader *loader, TString theMethodName, UInt_t iClass=0)
Calculate the integral of the ROC curve, also known as the area under curve (AUC),...
Bool_t fCorrelations
verbosity level, controls granularity of logging
std::vector< IMethod * > MVector
void TrainAllMethods()
Iterates through all booked methods and calls training.
Bool_t Verbose(void) const
void WriteDataInformation(DataSetInfo &fDataSetInfo)
MethodBase * BookMethod(DataLoader *loader, TString theMethodName, TString methodTitle, TString theOption="")
Book a classifier or regression method.
Factory(TString theJobName, TFile *theTargetFile, TString theOption="")
Standard constructor.
void TestAllMethods()
Evaluates all booked methods on the testing data and adds the output to the Results in the corresponi...
Bool_t fVerbose
list of transformations to test
void EvaluateAllMethods(void)
Iterates over all MVAs that have been booked, and calls their evaluation methods.
TH1F * EvaluateImportanceRandom(DataLoader *loader, UInt_t nseeds, Types::EMVA theMethod, TString methodTitle, const char *theOption="")
TH1F * GetImportance(const int nbits, std::vector< Double_t > importances, std::vector< TString > varNames)
Bool_t fROC
enable to calculate corelations
void EvaluateAllVariables(DataLoader *loader, TString options="")
Iterates over all MVA input variables and evaluates them.
TString fVerboseLevel
verbose mode
TH1F * EvaluateImportance(DataLoader *loader, VIType vitype, Types::EMVA theMethod, TString methodTitle, const char *theOption="")
Evaluate Variable Importance.
virtual ~Factory()
Destructor.
TGraph * GetROCCurve(DataLoader *loader, TString theMethodName, Bool_t setTitles=kTRUE, UInt_t iClass=0)
Argument iClass specifies the class to generate the ROC curve in a multiclass setting.
virtual void MakeClass(const TString &datasetname, const TString &methodTitle="") const
MethodBase * BookMethodWeightfile(DataLoader *dataloader, TMVA::Types::EMVA methodType, const TString &weightfile)
Adds an already constructed method to be managed by this factory.
Bool_t fModelPersistence
the training type
std::map< TString, Double_t > OptimizeAllMethods(TString fomType="ROCIntegral", TString fitType="FitGA")
Iterates through all booked methods and sees if they use parameter tuning and if so.
ROCCurve * GetROC(DataLoader *loader, TString theMethodName, UInt_t iClass=0, Types::ETreeType type=Types::kTesting)
Private method to generate a ROCCurve instance for a given method.
TH1F * EvaluateImportanceShort(DataLoader *loader, Types::EMVA theMethod, TString methodTitle, const char *theOption="")
Types::EAnalysisType fAnalysisType
jobname, used as extension in weight file names
Bool_t HasMethod(const TString &datasetname, const TString &title) const
Checks whether a given method name is defined for a given dataset.
TH1F * EvaluateImportanceAll(DataLoader *loader, Types::EMVA theMethod, TString methodTitle, const char *theOption="")
void SetVerbose(Bool_t v=kTRUE)
IMethod * GetMethod(const TString &datasetname, const TString &title) const
Returns pointer to MVA that corresponds to given method title.
void DeleteAllMethods(void)
Delete methods.
TString fTransformations
option string given by construction (presently only "V")
void Greetings()
Print welcome message.
TMultiGraph * GetROCCurveAsMultiGraph(DataLoader *loader, UInt_t iClass)
Generate a collection of graphs, for all methods for a given class.
Interface for all concrete MVA method implementations.
virtual void PrintHelpMessage() const =0
virtual Bool_t HasAnalysisType(Types::EAnalysisType type, UInt_t numberClasses, UInt_t numberTargets)=0
virtual void MakeClass(const TString &classFileName=TString("")) const =0
Virtual base Class for all MVA method.
virtual Double_t GetSeparation(TH1 *, TH1 *) const
compute "separation" defined as
void SetSilentFile(Bool_t status)
void SetWeightFileDir(TString fileDir)
set directory of weight file
virtual void TestRegression(Double_t &bias, Double_t &biasT, Double_t &dev, Double_t &devT, Double_t &rms, Double_t &rmsT, Double_t &mInf, Double_t &mInfT, Double_t &corr, Types::ETreeType type)
calculate <sum-of-deviation-squared> of regression output versus "true" value from test sample
virtual void DeclareCompatibilityOptions()
options that are used ONLY for the READER to ensure backward compatibility they are hence without any...
TString GetMethodTypeName() const
Bool_t DoMulticlass() const
virtual Double_t GetSignificance() const
compute significance of mean difference
const char * GetName() const
Types::EAnalysisType GetAnalysisType() const
virtual TMatrixD GetMulticlassConfusionMatrix(Double_t effB, Types::ETreeType type)
Construct a confusion matrix for a multiclass classifier.
void PrintHelpMessage() const
prints out method-specific help method
virtual void WriteEvaluationHistosToFile(Types::ETreeType treetype)
writes all MVA evaluation histograms to file
virtual void TestMulticlass()
test multiclass classification
void SetupMethod()
setup of methods
virtual Double_t GetEfficiency(const TString &, Types::ETreeType, Double_t &err)
fill background efficiency (resp.
virtual void SetAnalysisType(Types::EAnalysisType type)
const TString & GetMethodName() const
Bool_t DoRegression() const
void ProcessSetup()
process all options the "CheckForUnusedOptions" is done in an independent call, since it may be overr...
virtual Double_t GetTrainingEfficiency(const TString &)
DataSetInfo & DataInfo() const
virtual void MakeClass(const TString &classFileName=TString("")) const
create reader class for method (classification only at present)
virtual void TestClassification()
initialization
void AddOutput(Types::ETreeType type, Types::EAnalysisType analysisType)
void ReadStateFromFile()
Function to write options and weights to file.
virtual std::map< TString, Double_t > OptimizeTuningParameters(TString fomType="ROCIntegral", TString fitType="FitGA")
call the Optimizer with the set of parameters and ranges that are meant to be tuned.
DataSetInfo & fDataSetInfo
Types::EMVA GetMethodType() const
void SetFile(TFile *file)
void SetModelPersistence(Bool_t status)
virtual Double_t GetROCIntegral(TH1D *histS, TH1D *histB) const
calculate the area (integral) under the ROC curve as a overall quality measure of the classification
virtual void CheckSetup()
check may be overridden by derived class (sometimes, eg, fitters are used which can only be implement...
Class for boosting a TMVA method.
void SetBoostedMethodName(TString methodName)
DataSetManager * fDataSetManager
Class for categorizing the phase space.
DataSetManager * fDataSetManager
ostringstream derivative to redirect and format output
void SetMinType(EMsgType minType)
void SetSource(const std::string &source)
static void InhibitOutput()
Double_t GetEffSForEffB(Double_t effB, const UInt_t num_points=41)
Calculate the signal efficiency (sensitivity) for a given background efficiency (sensitivity).
Double_t GetROCIntegral(const UInt_t points=41)
Calculates the ROC integral (AUC)
TGraph * GetROCCurve(const UInt_t points=100)
Returns a new TGraph containing the ROC curve.
Ranking for variables in method (implementation)
virtual void Print() const
get maximum length of variable names
Class that is the base-class for a vector of result.
Class which takes the results of a multiclass classification.
Class that is the base-class for a vector of result.
Singleton class for Global types used by TMVA.
static Types & Instance()
the the single instance of "Types" if existing already, or create it (Singleton)
const TString & GetLabel() const
A TMultiGraph is a collection of TGraph (or derived) objects.
TList * GetListOfGraphs() const
virtual void Add(TGraph *graph, Option_t *chopt="")
Add a new graph to the list of graphs.
TH1F * GetHistogram()
Returns a pointer to the histogram used to draw the axis.
virtual void Draw(Option_t *chopt="")
Draw this multigraph with its current attributes.
TAxis * GetYaxis()
Get y axis of the graph.
TAxis * GetXaxis()
Get x axis of the graph.
virtual void SetTitle(const char *title="")
Set the title of the TNamed.
virtual TObject * Clone(const char *newname="") const
Make a clone of an object using the Streamer facility.
virtual const char * GetName() const
Returns name of object.
@ kOverwrite
overwrite existing object with same name
virtual TLegend * BuildLegend(Double_t x1=0.3, Double_t y1=0.21, Double_t x2=0.3, Double_t y2=0.21, const char *title="", Option_t *option="")
Build a legend from the graphical objects in the pad.
virtual void SetGrid(Int_t valuex=1, Int_t valuey=1)
Principal Components Analysis (PCA)
virtual void AddRow(const Double_t *x)
Add a data point and update the covariance matrix.
const TMatrixD * GetCovarianceMatrix() const
virtual void MakePrincipals()
Perform the principal components analysis.
Random number generator class based on M.
void ToLower()
Change string to lower-case.
int CompareTo(const char *cs, ECaseCompare cmp=kExact) const
Compare a string to char *cs2.
const char * Data() const
TString & ReplaceAll(const TString &s1, const TString &s2)
Bool_t BeginsWith(const char *s, ECaseCompare cmp=kExact) const
Bool_t Contains(const char *pat, ECaseCompare cmp=kExact) const
void SetOptStat(Int_t stat=1)
The type of information printed in the histogram statistics box can be selected via the parameter mod...
void SetTitleXOffset(Float_t offset=1)
virtual int MakeDirectory(const char *name)
Make a directory.
virtual Int_t Write(const char *name=0, Int_t option=0, Int_t bufsize=0)
Write this object to the current directory.
std::string GetMethodName(TCppMethod_t)
TCppMethod_t GetMethod(TCppScope_t scope, TCppIndex_t imeth)
static constexpr double s
void DataLoaderCopy(TMVA::DataLoader *des, TMVA::DataLoader *src)
void CreateVariableTransforms(const TString &trafoDefinition, TMVA::DataSetInfo &dataInfo, TMVA::TransformationHandler &transformationHandler, TMVA::MsgLogger &log)
MsgLogger & Endl(MsgLogger &ml)
static long int sum(long int i)
const Int_t MinNoTrainingEvents