120 fTransformations (
"I" ),
122 fVerboseLevel ( kINFO ),
125 fSilentFile ( theTargetFile == nullptr ),
126 fJobName ( jobName ),
127 fAnalysisType (
Types::kClassification ),
128 fModelPersistence (
kTRUE)
159 DeclareOptionRef( color,
"Color",
"Flag for coloured screen output (default: True, if in batch mode: False)" );
160 DeclareOptionRef(
fTransformations,
"Transformations",
"List of transformations to test; formatting example: \"Transformations=I;D;P;U;G,D\", for identity, decorrelation, PCA, Uniform and Gaussianisation followed by decorrelation transformations" );
163 DeclareOptionRef( silent,
"Silent",
"Batch mode: boolean silent flag inhibiting any output from TMVA after the creation of the factory class object (default: False)" );
165 "DrawProgressBar",
"Draw progress bar to display training, testing and evaluation schedule (default: True)" );
168 "Option to save the trained model in xml file or using serialization");
172 "AnalysisType",
"Set the analysis type (Classification, Regression, Multiclass, Auto) (default: Auto)" );
205 fTransformations (
"I" ),
209 fSilentFile (
kTRUE ),
210 fJobName ( jobName ),
211 fAnalysisType (
Types::kClassification ),
212 fModelPersistence (
kTRUE)
244 DeclareOptionRef( color,
"Color",
"Flag for coloured screen output (default: True, if in batch mode: False)" );
245 DeclareOptionRef(
fTransformations,
"Transformations",
"List of transformations to test; formatting example: \"Transformations=I;D;P;U;G,D\", for identity, decorrelation, PCA, Uniform and Gaussianisation followed by decorrelation transformations" );
248 DeclareOptionRef( silent,
"Silent",
"Batch mode: boolean silent flag inhibiting any output from TMVA after the creation of the factory class object (default: False)" );
250 "DrawProgressBar",
"Draw progress bar to display training, testing and evaluation schedule (default: True)" );
253 "Option to save the trained model in xml file or using serialization");
257 "AnalysisType",
"Set the analysis type (Classification, Regression, Multiclass, Auto) (default: Auto)" );
301 std::vector<TMVA::VariableTransformBase*>::iterator trfIt = fDefaultTrfs.begin();
302 for (;trfIt != fDefaultTrfs.end(); ++trfIt)
delete (*trfIt);
304 this->DeleteAllMethods();
319 std::map<TString,MVector*>::iterator itrMap;
321 for(itrMap = fMethodsMap.begin();itrMap != fMethodsMap.end();++itrMap)
323 MVector *methods=itrMap->second;
325 MVector::iterator itrMethod = methods->begin();
326 for (; itrMethod != methods->end(); ++itrMethod) {
327 Log() << kDEBUG <<
"Delete method: " << (*itrMethod)->GetName() <<
Endl;
367 if(fMethodsMap.find(datasetname)!=fMethodsMap.end())
369 if (
GetMethod( datasetname,methodTitle ) != 0) {
370 Log() << kFATAL <<
"Booking failed since method with title <"
371 << methodTitle <<
"> already exists "<<
"in with DataSet Name <"<< loader->
GetName()<<
"> "
377 Log() << kHEADER <<
"Booking method: " <<
gTools().
Color(
"bold") << methodTitle
385 "Number of times the classifier will be boosted" );
390 if(fModelPersistence)
396 if (fileDir[fileDir.
Length()-1] !=
'/') fileDir +=
"/";
408 Log() << kDEBUG <<
"Boost Number is " << boostNum <<
" > 0: train boosted classifier" <<
Endl;
412 Log() << kFATAL <<
"Method with type kBoost cannot be casted to MethodCategory. /Factory" <<
Endl;
419 methBoost->
SetFile(fgTargetFile);
424 if (method==0)
return 0;
430 Log() << kFATAL <<
"Method with type kCategory cannot be casted to MethodCategory. /Factory" <<
Endl;
436 methCat->
SetFile(fgTargetFile);
444 Log() << kWARNING <<
"Method " << method->
GetMethodTypeName() <<
" is not capable of handling " ;
469 if(fMethodsMap.find(datasetname)==fMethodsMap.end())
472 fMethodsMap[datasetname]=mvector;
474 fMethodsMap[datasetname]->push_back( method );
506 if (method ==
nullptr)
return nullptr;
509 Log() << kERROR <<
"Cannot handle category methods for now." <<
Endl;
513 if(fModelPersistence) {
518 if (fileDir[fileDir.
Length() - 1] !=
'/')
539 if (HasMethod(datasetname, methodTitle) != 0) {
540 Log() << kFATAL <<
"Booking failed since method with title <"
541 << methodTitle <<
"> already exists "<<
"in with DataSet Name <"<< loader->
GetName()<<
"> "
548 if(fMethodsMap.count(datasetname) == 0) {
550 fMethodsMap[datasetname] = mvector;
553 fMethodsMap[datasetname]->push_back( method );
563 if(fMethodsMap.find(datasetname)==fMethodsMap.end())
return 0;
565 MVector *methods=fMethodsMap.find(datasetname)->second;
567 MVector::const_iterator itrMethod;
569 for (itrMethod = methods->begin(); itrMethod != methods->end(); ++itrMethod) {
581 if(fMethodsMap.find(datasetname)==fMethodsMap.end())
return 0;
583 std::string methodName = methodTitle.
Data();
585 return ( 0 == methodName.compare(
m->GetName() ) );
589 Bool_t isMethodNameExisting = std::any_of( methods->begin(), methods->end(), isEqualToMethodName);
591 return isMethodNameExisting;
600 if(!RootBaseDir()->GetDirectory(fDataSetInfo.
GetName())) RootBaseDir()->mkdir(fDataSetInfo.
GetName());
603 RootBaseDir()->cd(fDataSetInfo.
GetName());
650 processTrfs = fTransformations;
653 std::vector<TMVA::TransformationHandler*> trfs;
657 std::vector<TString>::iterator trfsDefIt = trfsDef.begin();
658 for (; trfsDefIt!=trfsDef.end(); ++trfsDefIt) {
663 Log() << kDEBUG <<
"current transformation string: '" << trfS.
Data() <<
"'" <<
Endl;
669 if (trfS.
BeginsWith(
'I')) identityTrHandler = trfs.back();
675 std::vector<TMVA::TransformationHandler*>::iterator trfIt = trfs.begin();
677 for (;trfIt != trfs.end(); ++trfIt) {
679 (*trfIt)->SetRootDir(RootBaseDir()->GetDirectory(fDataSetInfo.
GetName()));
680 (*trfIt)->CalcTransformations(inputEvents);
685 for (trfIt = trfs.begin(); trfIt != trfs.end(); ++trfIt)
delete *trfIt;
697 std::map<TString,MVector*>::iterator itrMap;
698 std::map<TString,Double_t> TunedParameters;
699 for(itrMap = fMethodsMap.begin();itrMap != fMethodsMap.end();++itrMap)
701 MVector *methods=itrMap->second;
703 MVector::iterator itrMethod;
706 for( itrMethod = methods->begin(); itrMethod != methods->end(); ++itrMethod ) {
710 Log() << kFATAL <<
"Dynamic cast to MethodBase failed" <<
Endl;
711 return TunedParameters;
716 <<
" not trained (training tree has less entries ["
727 Log() << kINFO <<
"Optimization of tuning parameters finished for Method:"<<mva->
GetName() <<
Endl;
731 return TunedParameters;
759 if (fMethodsMap.find(datasetname) == fMethodsMap.end()) {
760 Log() << kERROR <<
Form(
"DataSet = %s not found in methods map.", datasetname.
Data()) <<
Endl;
764 if (!this->HasMethod(datasetname, theMethodName)) {
765 Log() << kERROR <<
Form(
"Method = %s not found with Dataset = %s ", theMethodName.
Data(), datasetname.
Data())
771 if (allowedAnalysisTypes.count(this->fAnalysisType) == 0) {
772 Log() << kERROR <<
Form(
"Can only generate ROC curves for analysis type kClassification and kMulticlass.")
784 Log() << kERROR <<
Form(
"Given class number (iClass = %i) does not exist. There are %i classes in dataset.",
794 std::vector<Bool_t> *mvaResTypes =
dynamic_cast<ResultsClassification *
>(results)->GetValueVectorTypes();
795 std::vector<Float_t> mvaResWeights;
798 mvaResWeights.reserve(eventCollection.size());
799 for (
auto ev : eventCollection) {
800 mvaResWeights.push_back(ev->GetWeight());
803 rocCurve =
new TMVA::ROCCurve(*mvaRes, *mvaResTypes, mvaResWeights);
806 std::vector<Float_t> mvaRes;
807 std::vector<Bool_t> mvaResTypes;
808 std::vector<Float_t> mvaResWeights;
810 std::vector<std::vector<Float_t>> *rawMvaRes =
dynamic_cast<ResultsMulticlass *
>(results)->GetValueVector();
815 mvaRes.reserve(rawMvaRes->size());
816 for (
auto item : *rawMvaRes) {
817 mvaRes.push_back(item[iClass]);
821 mvaResTypes.reserve(eventCollection.size());
822 mvaResWeights.reserve(eventCollection.size());
823 for (
auto ev : eventCollection) {
824 mvaResTypes.push_back(ev->GetClass() == iClass);
825 mvaResWeights.push_back(ev->GetWeight());
828 rocCurve =
new TMVA::ROCCurve(mvaRes, mvaResTypes, mvaResWeights);
844 return GetROCIntegral((
TString)loader->
GetName(), theMethodName, iClass);
857 if (fMethodsMap.find(datasetname) == fMethodsMap.end()) {
858 Log() << kERROR <<
Form(
"DataSet = %s not found in methods map.", datasetname.
Data()) <<
Endl;
862 if ( ! this->HasMethod(datasetname, theMethodName) ) {
863 Log() << kERROR <<
Form(
"Method = %s not found with Dataset = %s ", theMethodName.
Data(), datasetname.
Data()) <<
Endl;
868 if ( allowedAnalysisTypes.count(this->fAnalysisType) == 0 ) {
869 Log() << kERROR <<
Form(
"Can only generate ROC integral for analysis type kClassification. and kMulticlass.")
874 TMVA::ROCCurve *rocCurve = GetROC(datasetname, theMethodName, iClass);
876 Log() << kFATAL <<
Form(
"ROCCurve object was not created in Method = %s not found with Dataset = %s ",
877 theMethodName.
Data(), datasetname.
Data())
905 return GetROCCurve( (
TString)loader->
GetName(), theMethodName, setTitles, iClass );
924 if (fMethodsMap.find(datasetname) == fMethodsMap.end()) {
925 Log() << kERROR <<
Form(
"DataSet = %s not found in methods map.", datasetname.
Data()) <<
Endl;
929 if ( ! this->HasMethod(datasetname, theMethodName) ) {
930 Log() << kERROR <<
Form(
"Method = %s not found with Dataset = %s ", theMethodName.
Data(), datasetname.
Data()) <<
Endl;
935 if ( allowedAnalysisTypes.count(this->fAnalysisType) == 0 ) {
936 Log() << kERROR <<
Form(
"Can only generate ROC curves for analysis type kClassification and kMulticlass.") <<
Endl;
940 TMVA::ROCCurve *rocCurve = GetROC(datasetname, theMethodName, iClass);
944 Log() << kFATAL <<
Form(
"ROCCurve object was not created in Method = %s not found with Dataset = %s ", theMethodName.
Data(), datasetname.
Data()) <<
Endl;
952 graph->GetYaxis()->SetTitle(
"Background rejection (Specificity)");
953 graph->GetXaxis()->SetTitle(
"Signal efficiency (Sensitivity)");
954 graph->SetTitle(
Form(
"Signal efficiency vs. Background rejection (%s)", theMethodName.
Data()));
974 return GetROCCurveAsMultiGraph((
TString)loader->
GetName(), iClass);
995 MVector *methods = fMethodsMap[datasetname.
Data()];
996 for (
auto * method_raw : *methods) {
998 if (method ==
nullptr) {
continue; }
1004 Log() << kERROR <<
Form(
"Given class number (iClass = %i) does not exist. There are %i classes in dataset.", iClass, nClasses) <<
Endl;
1010 TGraph *
graph = this->GetROCCurve(datasetname, methodName,
false, iClass);
1011 graph->SetTitle(methodName);
1013 graph->SetLineWidth(2);
1014 graph->SetLineColor(line_color++);
1015 graph->SetFillColor(10);
1021 Log() << kERROR <<
Form(
"No metohds have class %i defined.", iClass) <<
Endl;
1058 if (fMethodsMap.find(datasetname) == fMethodsMap.end()) {
1059 Log() << kERROR <<
Form(
"DataSet = %s not found in methods map.", datasetname.
Data()) <<
Endl;
1067 TMultiGraph *multigraph = this->GetROCCurveAsMultiGraph(datasetname, iClass);
1070 multigraph->
Draw(
"AL");
1075 TString titleString =
Form(
"Signal efficiency vs. Background rejection");
1077 titleString =
Form(
"%s (Class=%i)", titleString.
Data(), iClass);
1082 multigraph->
SetTitle( titleString );
1084 canvas->
BuildLegend(0.15, 0.15, 0.35, 0.3,
"MVA Method");
1100 if (fMethodsMap.empty()) {
1101 Log() << kINFO <<
"...nothing found to train" <<
Endl;
1107 Log() << kDEBUG <<
"Train all methods for "
1111 std::map<TString,MVector*>::iterator itrMap;
1113 for(itrMap = fMethodsMap.begin();itrMap != fMethodsMap.end();++itrMap)
1115 MVector *methods=itrMap->second;
1116 MVector::iterator itrMethod;
1119 for( itrMethod = methods->begin(); itrMethod != methods->end(); ++itrMethod ) {
1123 if(mva==0)
continue;
1126 Log() << kFATAL <<
"No input data for the training provided!" <<
Endl;
1130 Log() << kFATAL <<
"You want to do regression training without specifying a target." <<
Endl;
1133 Log() << kFATAL <<
"You want to do classification training, but specified less than two classes." <<
Endl;
1136 if(!IsSilentFile()) WriteDataInformation(mva->
fDataSetInfo);
1141 <<
" not trained (training tree has less entries ["
1151 Log() << kHEADER <<
"Training finished" <<
Endl <<
Endl;
1158 Log() << kINFO <<
"Ranking input variables (method specific)..." <<
Endl;
1159 for (itrMethod = methods->begin(); itrMethod != methods->end(); ++itrMethod) {
1164 const Ranking* ranking = (*itrMethod)->CreateRanking();
1165 if (ranking != 0) ranking->
Print();
1166 else Log() << kINFO <<
"No variable ranking supplied by classifier: "
1173 if (!IsSilentFile()) {
1174 for (
UInt_t i=0; i<methods->size(); i++) {
1178 m->fTrainHistory.SaveHistory(
m->GetMethodName());
1186 if (fModelPersistence) {
1188 Log() << kHEADER <<
"=== Destroy and recreate all methods via weight files for testing ===" <<
Endl <<
Endl;
1190 if(!IsSilentFile())RootBaseDir()->cd();
1193 for (
UInt_t i=0; i<methods->size(); i++) {
1200 TString weightfile =
m->GetWeightFileName();
1207 TString testvarName =
m->GetTestvarName();
1216 Log() << kFATAL <<
"Method with type kCategory cannot be casted to MethodCategory. /Factory" <<
Endl;
1222 TString wfileDir =
m->DataInfo().GetName();
1224 m->SetWeightFileDir(wfileDir);
1225 m->SetModelPersistence(fModelPersistence);
1226 m->SetSilentFile(IsSilentFile());
1227 m->SetAnalysisType(fAnalysisType);
1229 m->ReadStateFromFile();
1230 m->SetTestvarName(testvarName);
1249 if (fMethodsMap.empty()) {
1250 Log() << kINFO <<
"...nothing found to test" <<
Endl;
1253 std::map<TString,MVector*>::iterator itrMap;
1255 for(itrMap = fMethodsMap.begin();itrMap != fMethodsMap.end();++itrMap)
1257 MVector *methods=itrMap->second;
1258 MVector::iterator itrMethod;
1261 for (itrMethod = methods->begin(); itrMethod != methods->end(); ++itrMethod) {
1270 : (analysisType ==
Types::kMulticlass ?
"Multiclass classification" :
"Classification"))
1281 if (methodTitle !=
"") {
1285 Log() << kWARNING <<
"<MakeClass> Could not find classifier \"" << methodTitle
1286 <<
"\" in list" <<
Endl;
1292 MVector *methods=fMethodsMap.find(datasetname)->second;
1293 MVector::const_iterator itrMethod;
1294 for (itrMethod = methods->begin(); itrMethod != methods->end(); ++itrMethod) {
1296 if(method==0)
continue;
1309 if (methodTitle !=
"") {
1313 Log() << kWARNING <<
"<PrintHelpMessage> Could not find classifier \"" << methodTitle
1314 <<
"\" in list" <<
Endl;
1320 MVector *methods=fMethodsMap.find(datasetname)->second;
1321 MVector::const_iterator itrMethod ;
1322 for (itrMethod = methods->begin(); itrMethod != methods->end(); ++itrMethod) {
1324 if(method==0)
continue;
1336 Log() << kINFO <<
"Evaluating all variables..." <<
Endl;
1342 this->BookMethod(loader,
"Variable",
s );
1354 if (fMethodsMap.empty()) {
1355 Log() << kINFO <<
"...nothing found to evaluate" <<
Endl;
1358 std::map<TString,MVector*>::iterator itrMap;
1360 for(itrMap = fMethodsMap.begin();itrMap != fMethodsMap.end();++itrMap)
1362 MVector *methods=itrMap->second;
1372 Int_t nmeth_used[2] = {0,0};
1374 std::vector<std::vector<TString> > mname(2);
1375 std::vector<std::vector<Double_t> > sig(2),
sep(2), roc(2);
1376 std::vector<std::vector<Double_t> > eff01(2), eff10(2), eff30(2), effArea(2);
1377 std::vector<std::vector<Double_t> > eff01err(2), eff10err(2), eff30err(2);
1378 std::vector<std::vector<Double_t> > trainEff01(2), trainEff10(2), trainEff30(2);
1380 std::vector<std::vector<Float_t> > multiclass_testEff;
1381 std::vector<std::vector<Float_t> > multiclass_trainEff;
1382 std::vector<std::vector<Float_t> > multiclass_testPur;
1383 std::vector<std::vector<Float_t> > multiclass_trainPur;
1385 std::vector<std::vector<Float_t> > train_history;
1388 std::vector<TMatrixD> multiclass_trainConfusionEffB01;
1389 std::vector<TMatrixD> multiclass_trainConfusionEffB10;
1390 std::vector<TMatrixD> multiclass_trainConfusionEffB30;
1391 std::vector<TMatrixD> multiclass_testConfusionEffB01;
1392 std::vector<TMatrixD> multiclass_testConfusionEffB10;
1393 std::vector<TMatrixD> multiclass_testConfusionEffB30;
1395 std::vector<std::vector<Double_t> > biastrain(1);
1396 std::vector<std::vector<Double_t> > biastest(1);
1397 std::vector<std::vector<Double_t> > devtrain(1);
1398 std::vector<std::vector<Double_t> > devtest(1);
1399 std::vector<std::vector<Double_t> > rmstrain(1);
1400 std::vector<std::vector<Double_t> > rmstest(1);
1401 std::vector<std::vector<Double_t> > minftrain(1);
1402 std::vector<std::vector<Double_t> > minftest(1);
1403 std::vector<std::vector<Double_t> > rhotrain(1);
1404 std::vector<std::vector<Double_t> > rhotest(1);
1407 std::vector<std::vector<Double_t> > biastrainT(1);
1408 std::vector<std::vector<Double_t> > biastestT(1);
1409 std::vector<std::vector<Double_t> > devtrainT(1);
1410 std::vector<std::vector<Double_t> > devtestT(1);
1411 std::vector<std::vector<Double_t> > rmstrainT(1);
1412 std::vector<std::vector<Double_t> > rmstestT(1);
1413 std::vector<std::vector<Double_t> > minftrainT(1);
1414 std::vector<std::vector<Double_t> > minftestT(1);
1423 for (MVector::iterator itrMethod =methods->begin(); itrMethod != methods->end(); ++itrMethod) {
1426 if(theMethod==0)
continue;
1427 theMethod->
SetFile(fgTargetFile);
1432 doRegression =
kTRUE;
1439 Log() << kINFO <<
"TestRegression (testing)" <<
Endl;
1441 biastest[0] .push_back( bias );
1442 devtest[0] .push_back( dev );
1443 rmstest[0] .push_back( rms );
1444 minftest[0] .push_back( mInf );
1445 rhotest[0] .push_back( rho );
1446 biastestT[0] .push_back( biasT );
1447 devtestT[0] .push_back( devT );
1448 rmstestT[0] .push_back( rmsT );
1449 minftestT[0] .push_back( mInfT );
1451 Log() << kINFO <<
"TestRegression (training)" <<
Endl;
1453 biastrain[0] .push_back( bias );
1454 devtrain[0] .push_back( dev );
1455 rmstrain[0] .push_back( rms );
1456 minftrain[0] .push_back( mInf );
1457 rhotrain[0] .push_back( rho );
1458 biastrainT[0].push_back( biasT );
1459 devtrainT[0] .push_back( devT );
1460 rmstrainT[0] .push_back( rmsT );
1461 minftrainT[0].push_back( mInfT );
1465 if (!IsSilentFile()) {
1466 Log() << kDEBUG <<
"\tWrite evaluation histograms to file" <<
Endl;
1474 doMulticlass =
kTRUE;
1475 Log() << kINFO <<
"Evaluate multiclass classification method: " << theMethod->
GetMethodName() <<
Endl;
1494 if (!IsSilentFile()) {
1495 Log() << kDEBUG <<
"\tWrite evaluation histograms to file" <<
Endl;
1518 eff01err[isel].push_back(err);
1520 eff10err[isel].push_back(err);
1522 eff30err[isel].push_back(err);
1531 if (!IsSilentFile()) {
1532 Log() << kDEBUG <<
"\tWrite evaluation histograms to file" <<
Endl;
1540 std::vector<TString> vtemps = mname[0];
1541 std::vector< std::vector<Double_t> > vtmp;
1542 vtmp.push_back( devtest[0] );
1543 vtmp.push_back( devtrain[0] );
1544 vtmp.push_back( biastest[0] );
1545 vtmp.push_back( biastrain[0] );
1546 vtmp.push_back( rmstest[0] );
1547 vtmp.push_back( rmstrain[0] );
1548 vtmp.push_back( minftest[0] );
1549 vtmp.push_back( minftrain[0] );
1550 vtmp.push_back( rhotest[0] );
1551 vtmp.push_back( rhotrain[0] );
1552 vtmp.push_back( devtestT[0] );
1553 vtmp.push_back( devtrainT[0] );
1554 vtmp.push_back( biastestT[0] );
1555 vtmp.push_back( biastrainT[0]);
1556 vtmp.push_back( rmstestT[0] );
1557 vtmp.push_back( rmstrainT[0] );
1558 vtmp.push_back( minftestT[0] );
1559 vtmp.push_back( minftrainT[0]);
1562 devtest[0] = vtmp[0];
1563 devtrain[0] = vtmp[1];
1564 biastest[0] = vtmp[2];
1565 biastrain[0] = vtmp[3];
1566 rmstest[0] = vtmp[4];
1567 rmstrain[0] = vtmp[5];
1568 minftest[0] = vtmp[6];
1569 minftrain[0] = vtmp[7];
1570 rhotest[0] = vtmp[8];
1571 rhotrain[0] = vtmp[9];
1572 devtestT[0] = vtmp[10];
1573 devtrainT[0] = vtmp[11];
1574 biastestT[0] = vtmp[12];
1575 biastrainT[0] = vtmp[13];
1576 rmstestT[0] = vtmp[14];
1577 rmstrainT[0] = vtmp[15];
1578 minftestT[0] = vtmp[16];
1579 minftrainT[0] = vtmp[17];
1580 }
else if (doMulticlass) {
1588 for (
Int_t k=0; k<2; k++) {
1589 std::vector< std::vector<Double_t> > vtemp;
1590 vtemp.push_back( effArea[k] );
1591 vtemp.push_back( eff10[k] );
1592 vtemp.push_back( eff01[k] );
1593 vtemp.push_back( eff30[k] );
1594 vtemp.push_back( eff10err[k] );
1595 vtemp.push_back( eff01err[k] );
1596 vtemp.push_back( eff30err[k] );
1597 vtemp.push_back( trainEff10[k] );
1598 vtemp.push_back( trainEff01[k] );
1599 vtemp.push_back( trainEff30[k] );
1600 vtemp.push_back( sig[k] );
1601 vtemp.push_back(
sep[k] );
1602 vtemp.push_back( roc[k] );
1603 std::vector<TString> vtemps = mname[k];
1605 effArea[k] = vtemp[0];
1606 eff10[k] = vtemp[1];
1607 eff01[k] = vtemp[2];
1608 eff30[k] = vtemp[3];
1609 eff10err[k] = vtemp[4];
1610 eff01err[k] = vtemp[5];
1611 eff30err[k] = vtemp[6];
1612 trainEff10[k] = vtemp[7];
1613 trainEff01[k] = vtemp[8];
1614 trainEff30[k] = vtemp[9];
1630 const Int_t nmeth = methodsNoCuts.size();
1633 if (!doRegression && !doMulticlass ) {
1639 std::vector<Double_t> rvec;
1647 std::vector<TString>* theVars =
new std::vector<TString>;
1648 std::vector<ResultsClassification*> mvaRes;
1649 for (MVector::iterator itrMethod = methodsNoCuts.begin(); itrMethod != methodsNoCuts.end(); ++itrMethod, ++ivar) {
1652 theVars->push_back(
m->GetTestvarName() );
1653 rvec.push_back(
m->GetSignalReferenceCut() );
1654 theVars->back().ReplaceAll(
"MVA_",
"" );
1674 for (
Int_t im=0; im<nmeth; im++) {
1678 Log() << kWARNING <<
"Found NaN return value in event: " << ievt
1679 <<
" for method \"" << methodsNoCuts[im]->GetName() <<
"\"" <<
Endl;
1682 else dvec[im] = retval;
1686 else { tpBkg->
AddRow( dvec ); theMat = overlapB; }
1689 for (
Int_t im=0; im<nmeth; im++) {
1690 for (
Int_t jm=im; jm<nmeth; jm++) {
1691 if ((dvec[im] - rvec[im])*(dvec[jm] - rvec[jm]) > 0) {
1693 if (im != jm) (*theMat)(jm,im)++;
1713 if (corrMatS != 0 && corrMatB != 0) {
1718 for (
Int_t im=0; im<nmeth; im++) {
1719 for (
Int_t jm=0; jm<nmeth; jm++) {
1720 mvaMatS(im,jm) = (*corrMatS)(im,jm);
1721 mvaMatB(im,jm) = (*corrMatB)(im,jm);
1726 std::vector<TString> theInputVars;
1729 for (
Int_t iv=0; iv<nvar; iv++) {
1731 for (
Int_t jm=0; jm<nmeth; jm++) {
1732 varmvaMatS(iv,jm) = (*corrMatS)(nmeth+iv,jm);
1733 varmvaMatB(iv,jm) = (*corrMatB)(nmeth+iv,jm);
1748 Log() << kINFO <<
Form(
"Dataset[%s] : ",method->
fDataSetInfo.
GetName())<<
"Correlations between input variables and MVA response (signal):" <<
Endl;
1752 Log() << kINFO <<
Form(
"Dataset[%s] : ",method->
fDataSetInfo.
GetName())<<
"Correlations between input variables and MVA response (background):" <<
Endl;
1759 Log() << kINFO <<
Form(
"Dataset[%s] : ",method->
fDataSetInfo.
GetName())<<
"The following \"overlap\" matrices contain the fraction of events for which " <<
Endl;
1760 Log() << kINFO <<
Form(
"Dataset[%s] : ",method->
fDataSetInfo.
GetName())<<
"the MVAs 'i' and 'j' have returned conform answers about \"signal-likeness\"" <<
Endl;
1761 Log() << kINFO <<
Form(
"Dataset[%s] : ",method->
fDataSetInfo.
GetName())<<
"An event is signal-like, if its MVA output exceeds the following value:" <<
Endl;
1763 Log() << kINFO <<
Form(
"Dataset[%s] : ",method->
fDataSetInfo.
GetName())<<
"which correspond to the working point: eff(signal) = 1 - eff(background)" <<
Endl;
1766 if (nmeth != (
Int_t)methods->size())
1767 Log() << kINFO <<
Form(
"Dataset[%s] : ",method->
fDataSetInfo.
GetName())<<
"Note: no correlations and overlap with cut method are provided at present" <<
Endl;
1799 TString hLine =
"--------------------------------------------------------------------------------------------------";
1800 Log() << kINFO <<
"Evaluation results ranked by smallest RMS on test sample:" <<
Endl;
1801 Log() << kINFO <<
"(\"Bias\" quotes the mean deviation of the regression from true target." <<
Endl;
1802 Log() << kINFO <<
" \"MutInf\" is the \"Mutual Information\" between regression and target." <<
Endl;
1803 Log() << kINFO <<
" Indicated by \"_T\" are the corresponding \"truncated\" quantities ob-" <<
Endl;
1804 Log() << kINFO <<
" tained when removing events deviating more than 2sigma from average.)" <<
Endl;
1805 Log() << kINFO << hLine <<
Endl;
1807 Log() << kINFO << hLine <<
Endl;
1809 for (
Int_t i=0; i<nmeth_used[0]; i++) {
1811 if(theMethod==0)
continue;
1813 Log() << kINFO <<
Form(
"%-20s %-15s:%#9.3g%#9.3g%#9.3g%#9.3g | %#5.3f %#5.3f",
1815 (
const char*)mname[0][i],
1816 biastest[0][i], biastestT[0][i],
1817 rmstest[0][i], rmstestT[0][i],
1818 minftest[0][i], minftestT[0][i] )
1821 Log() << kINFO << hLine <<
Endl;
1823 Log() << kINFO <<
"Evaluation results ranked by smallest RMS on training sample:" <<
Endl;
1824 Log() << kINFO <<
"(overtraining check)" <<
Endl;
1825 Log() << kINFO << hLine <<
Endl;
1826 Log() << kINFO <<
"DataSet Name: MVA Method: <Bias> <Bias_T> RMS RMS_T | MutInf MutInf_T" <<
Endl;
1827 Log() << kINFO << hLine <<
Endl;
1829 for (
Int_t i=0; i<nmeth_used[0]; i++) {
1831 if(theMethod==0)
continue;
1832 Log() << kINFO <<
Form(
"%-20s %-15s:%#9.3g%#9.3g%#9.3g%#9.3g | %#5.3f %#5.3f",
1834 (
const char*)mname[0][i],
1835 biastrain[0][i], biastrainT[0][i],
1836 rmstrain[0][i], rmstrainT[0][i],
1837 minftrain[0][i], minftrainT[0][i] )
1840 Log() << kINFO << hLine <<
Endl;
1842 }
else if (doMulticlass) {
1848 "-------------------------------------------------------------------------------------------------------";
1887 TString header1 =
Form(
"%-15s%-15s%-15s%-15s%-15s%-15s",
"Dataset",
"MVA Method",
"ROC AUC",
"Sig eff@B=0.01",
1888 "Sig eff@B=0.10",
"Sig eff@B=0.30");
1889 TString header2 =
Form(
"%-15s%-15s%-15s%-15s%-15s%-15s",
"Name:",
"/ Class:",
"test (train)",
"test (train)",
1890 "test (train)",
"test (train)");
1892 Log() << kINFO <<
"1-vs-rest performance metrics per class" <<
Endl;
1893 Log() << kINFO << hLine <<
Endl;
1895 Log() << kINFO <<
"Considers the listed class as signal and the other classes" <<
Endl;
1896 Log() << kINFO <<
"as background, reporting the resulting binary performance." <<
Endl;
1897 Log() << kINFO <<
"A score of 0.820 (0.850) means 0.820 was acheived on the" <<
Endl;
1898 Log() << kINFO <<
"test set and 0.850 on the training set." <<
Endl;
1901 Log() << kINFO << header1 <<
Endl;
1902 Log() << kINFO << header2 <<
Endl;
1903 for (
Int_t k = 0; k < 2; k++) {
1904 for (
Int_t i = 0; i < nmeth_used[k]; i++) {
1906 mname[k][i].ReplaceAll(
"Variable_",
"");
1909 const TString datasetName = itrMap->first;
1910 const TString mvaName = mname[k][i];
1913 if (theMethod == 0) {
1919 Log() << kINFO << row <<
Endl;
1920 Log() << kINFO <<
"------------------------------" <<
Endl;
1923 for (
UInt_t iClass = 0; iClass < numClasses; ++iClass) {
1937 const TString rocaucCmp =
Form(
"%5.3f (%5.3f)", rocaucTest, rocaucTrain);
1938 const TString effB01Cmp =
Form(
"%5.3f (%5.3f)", effB01Test, effB01Train);
1939 const TString effB10Cmp =
Form(
"%5.3f (%5.3f)", effB10Test, effB10Train);
1940 const TString effB30Cmp =
Form(
"%5.3f (%5.3f)", effB30Test, effB30Train);
1941 row =
Form(
"%-15s%-15s%-15s%-15s%-15s%-15s",
"", className.
Data(), rocaucCmp.
Data(), effB01Cmp.
Data(),
1942 effB10Cmp.
Data(), effB30Cmp.
Data());
1943 Log() << kINFO << row <<
Endl;
1945 delete rocCurveTrain;
1946 delete rocCurveTest;
1951 Log() << kINFO << hLine <<
Endl;
1956 auto printMatrix = [](
TMatrixD const &matTraining,
TMatrixD const &matTesting, std::vector<TString> classnames,
1966 for (
UInt_t iCol = 0; iCol < numClasses; ++iCol) {
1967 header +=
Form(
" %-14s", classnames[iCol].Data());
1968 headerInfo +=
Form(
" %-14s",
" test (train)");
1970 stream << kINFO << header <<
Endl;
1971 stream << kINFO << headerInfo <<
Endl;
1973 for (
UInt_t iRow = 0; iRow < numClasses; ++iRow) {
1974 stream << kINFO <<
Form(
" %-14s", classnames[iRow].Data());
1976 for (
UInt_t iCol = 0; iCol < numClasses; ++iCol) {
1978 stream << kINFO <<
Form(
" %-14s",
"-");
1980 Double_t trainValue = matTraining[iRow][iCol];
1981 Double_t testValue = matTesting[iRow][iCol];
1982 TString entry =
Form(
"%-5.3f (%-5.3f)", testValue, trainValue);
1983 stream << kINFO <<
Form(
" %-14s", entry.
Data());
1986 stream << kINFO <<
Endl;
1991 Log() << kINFO <<
"Confusion matrices for all methods" <<
Endl;
1992 Log() << kINFO << hLine <<
Endl;
1994 Log() << kINFO <<
"Does a binary comparison between the two classes given by a " <<
Endl;
1995 Log() << kINFO <<
"particular row-column combination. In each case, the class " <<
Endl;
1996 Log() << kINFO <<
"given by the row is considered signal while the class given " <<
Endl;
1997 Log() << kINFO <<
"by the column index is considered background." <<
Endl;
1999 for (
UInt_t iMethod = 0; iMethod < methods->size(); ++iMethod) {
2001 if (theMethod ==
nullptr) {
2006 std::vector<TString> classnames;
2007 for (
UInt_t iCls = 0; iCls < numClasses; ++iCls) {
2011 <<
"=== Showing confusion matrix for method : " <<
Form(
"%-15s", (
const char *)mname[0][iMethod])
2013 Log() << kINFO <<
"(Signal Efficiency for Background Efficiency 0.01%)" <<
Endl;
2014 Log() << kINFO <<
"---------------------------------------------------" <<
Endl;
2015 printMatrix(multiclass_testConfusionEffB01[iMethod], multiclass_trainConfusionEffB01[iMethod], classnames,
2019 Log() << kINFO <<
"(Signal Efficiency for Background Efficiency 0.10%)" <<
Endl;
2020 Log() << kINFO <<
"---------------------------------------------------" <<
Endl;
2021 printMatrix(multiclass_testConfusionEffB10[iMethod], multiclass_trainConfusionEffB10[iMethod], classnames,
2025 Log() << kINFO <<
"(Signal Efficiency for Background Efficiency 0.30%)" <<
Endl;
2026 Log() << kINFO <<
"---------------------------------------------------" <<
Endl;
2027 printMatrix(multiclass_testConfusionEffB30[iMethod], multiclass_trainConfusionEffB30[iMethod], classnames,
2031 Log() << kINFO << hLine <<
Endl;
2037 Log().EnableOutput();
2040 TString hLine =
"------------------------------------------------------------------------------------------"
2041 "-------------------------";
2042 Log() << kINFO <<
"Evaluation results ranked by best signal efficiency and purity (area)" <<
Endl;
2043 Log() << kINFO << hLine <<
Endl;
2044 Log() << kINFO <<
"DataSet MVA " <<
Endl;
2045 Log() << kINFO <<
"Name: Method: ROC-integ" <<
Endl;
2050 Log() << kDEBUG << hLine <<
Endl;
2051 for (
Int_t k = 0; k < 2; k++) {
2052 if (k == 1 && nmeth_used[k] > 0) {
2053 Log() << kINFO << hLine <<
Endl;
2054 Log() << kINFO <<
"Input Variables: " <<
Endl << hLine <<
Endl;
2056 for (
Int_t i = 0; i < nmeth_used[k]; i++) {
2057 TString datasetName = itrMap->first;
2058 TString methodName = mname[k][i];
2065 if (theMethod == 0) {
2071 std::vector<Bool_t> *mvaResType =
2075 if (mvaResType->size() != 0) {
2076 rocIntegral = GetROCIntegral(datasetName, methodName);
2079 if (
sep[k][i] < 0 || sig[k][i] < 0) {
2081 Log() << kINFO <<
Form(
"%-13s %-15s: %#1.3f", datasetName.
Data(), methodName.
Data(), effArea[k][i])
2093 Log() << kINFO <<
Form(
"%-13s %-15s: %#1.3f", datasetName.
Data(), methodName.
Data(), rocIntegral)
2107 Log() << kINFO << hLine <<
Endl;
2109 Log() << kINFO <<
"Testing efficiency compared to training efficiency (overtraining check)" <<
Endl;
2110 Log() << kINFO << hLine <<
Endl;
2112 <<
"DataSet MVA Signal efficiency: from test sample (from training sample) "
2114 Log() << kINFO <<
"Name: Method: @B=0.01 @B=0.10 @B=0.30 "
2116 Log() << kINFO << hLine <<
Endl;
2117 for (
Int_t k = 0; k < 2; k++) {
2118 if (k == 1 && nmeth_used[k] > 0) {
2119 Log() << kINFO << hLine <<
Endl;
2120 Log() << kINFO <<
"Input Variables: " <<
Endl << hLine <<
Endl;
2122 for (
Int_t i = 0; i < nmeth_used[k]; i++) {
2123 if (k == 1) mname[k][i].ReplaceAll(
"Variable_",
"");
2125 if (theMethod == 0)
continue;
2127 Log() << kINFO <<
Form(
"%-20s %-15s: %#1.3f (%#1.3f) %#1.3f (%#1.3f) %#1.3f (%#1.3f)",
2129 trainEff01[k][i], eff10[k][i], trainEff10[k][i], eff30[k][i], trainEff30[k][i])
2133 Log() << kINFO << hLine <<
Endl;
2136 if (
gTools().CheckForSilentOption(GetOptions()))
Log().InhibitOutput();
2141 std::list<TString> datasets;
2142 for (
Int_t k=0; k<2; k++) {
2143 for (
Int_t i=0; i<nmeth_used[k]; i++) {
2145 if(theMethod==0)
continue;
2148 if(std::find(datasets.begin(), datasets.end(), theMethod->
fDataSetInfo.
GetName()) == datasets.end())
2167 fModelPersistence=
kFALSE;
2172 if(vitype==VIType::kShort)
2173 return EvaluateImportanceShort(loader,theMethod,methodTitle,theOption);
2174 else if(vitype==VIType::kAll)
2175 return EvaluateImportanceAll(loader,theMethod,methodTitle,theOption);
2176 else if(vitype==VIType::kRandom&&nbits>10)
2178 return EvaluateImportanceRandom(loader,
pow(2,nbits),theMethod,methodTitle,theOption);
2181 std::cerr<<
"Error in Variable Importance: Random mode require more that 10 variables in the dataset."<<std::endl;
2198 uint64_t range =
pow(2, nbits);
2201 std::vector<Double_t> importances(nbits);
2203 std::vector<Double_t> ROC(range);
2205 for (
int i = 0; i < nbits; i++)importances[i] = 0;
2208 for (
x = 1;
x <range ;
x++) {
2210 std::bitset<VIBITS> xbitset(
x);
2211 if (
x == 0)
continue;
2217 for (
int index = 0; index < nbits; index++) {
2218 if (xbitset[index]) seedloader->
AddVariable(varNames[index],
'F');
2225 BookMethod(seedloader, theMethod, methodTitle, theOption);
2230 EvaluateAllMethods();
2233 ROC[
x] = GetROCIntegral(xbitset.to_string(), methodTitle);
2240 this->DeleteAllMethods();
2242 fMethodsMap.clear();
2247 for (
x = 0;
x <range ;
x++)
2250 for (uint32_t i = 0; i <
VIBITS; ++i) {
2251 if (
x & (uint64_t(1) << i)) {
2253 std::bitset<VIBITS> ybitset(
y);
2259 importances[ny] = SROC - 0.5;
2265 importances[ny] += SROC - SSROC;
2271 std::cout<<
"--- Variable Importance Results (All)"<<std::endl;
2272 return GetImportance(nbits,importances,varNames);
2275static long int sum(
long int i)
2278 for(
long int n=0;
n<i;
n++) _sum+=
pow(2,
n);
2293 long int range =
sum(nbits);
2296 std::vector<Double_t> importances(nbits);
2297 for (
int i = 0; i < nbits; i++)importances[i] = 0;
2303 std::bitset<VIBITS> xbitset(
x);
2304 if (
x == 0)
Log()<<kFATAL<<
"Error: need at least one variable.";
2311 for (
int index = 0; index < nbits; index++) {
2312 if (xbitset[index]) seedloader->
AddVariable(varNames[index],
'F');
2319 BookMethod(seedloader, theMethod, methodTitle, theOption);
2324 EvaluateAllMethods();
2327 SROC = GetROCIntegral(xbitset.to_string(), methodTitle);
2334 this->DeleteAllMethods();
2335 fMethodsMap.clear();
2339 for (uint32_t i = 0; i <
VIBITS; ++i) {
2341 y =
x & ~(uint64_t(1) << i);
2342 std::bitset<VIBITS> ybitset(
y);
2348 importances[ny] = SROC - 0.5;
2355 for (
int index = 0; index < nbits; index++) {
2356 if (ybitset[index]) subseedloader->
AddVariable(varNames[index],
'F');
2363 BookMethod(subseedloader, theMethod, methodTitle, theOption);
2368 EvaluateAllMethods();
2371 SSROC = GetROCIntegral(ybitset.to_string(), methodTitle);
2372 importances[ny] += SROC - SSROC;
2378 delete subseedloader;
2379 this->DeleteAllMethods();
2380 fMethodsMap.clear();
2383 std::cout<<
"--- Variable Importance Results (Short)"<<std::endl;
2384 return GetImportance(nbits,importances,varNames);
2400 long int range =
pow(2, nbits);
2403 std::vector<Double_t> importances(nbits);
2405 for (
int i = 0; i < nbits; i++)importances[i] = 0;
2409 x = rangen -> Integer(range);
2411 std::bitset<32> xbitset(
x);
2412 if (
x == 0)
continue;
2419 for (
int index = 0; index < nbits; index++) {
2420 if (xbitset[index]) seedloader->
AddVariable(varNames[index],
'F');
2427 BookMethod(seedloader, theMethod, methodTitle, theOption);
2432 EvaluateAllMethods();
2435 SROC = GetROCIntegral(xbitset.to_string(), methodTitle);
2443 this->DeleteAllMethods();
2444 fMethodsMap.clear();
2448 for (uint32_t i = 0; i < 32; ++i) {
2449 if (
x & (uint64_t(1) << i)) {
2451 std::bitset<32> ybitset(
y);
2457 importances[ny] = SROC - 0.5;
2458 importances_norm += importances[ny];
2466 for (
int index = 0; index < nbits; index++) {
2467 if (ybitset[index]) subseedloader->
AddVariable(varNames[index],
'F');
2474 BookMethod(subseedloader, theMethod, methodTitle, theOption);
2479 EvaluateAllMethods();
2482 SSROC = GetROCIntegral(ybitset.to_string(), methodTitle);
2483 importances[ny] += SROC - SSROC;
2489 delete subseedloader;
2490 this->DeleteAllMethods();
2491 fMethodsMap.clear();
2495 std::cout<<
"--- Variable Importance Results (Random)"<<std::endl;
2496 return GetImportance(nbits,importances,varNames);
2503 TH1F *vih1 =
new TH1F(
"vih1",
"", nbits, 0, nbits);
2508 for (
int i = 0; i < nbits; i++) {
2509 normalization = normalization + importances[i];
2518 std::vector<Double_t> x_ie(nbits), y_ie(nbits);
2519 for (
Int_t i = 1; i < nbits + 1; i++) {
2520 x_ie[i - 1] = (i - 1) * 1.;
2521 roc = 100.0 * importances[i - 1] / normalization;
2523 std::cout<<
"--- "<<varNames[i-1]<<
" = "<<roc<<
" %"<<std::endl;
2527 TGraph *g_ie =
new TGraph(nbits + 2, &x_ie[0], &y_ie[0]);
double pow(double, double)
TMatrixT< Double_t > TMatrixD
char * Form(const char *fmt,...)
R__EXTERN TStyle * gStyle
R__EXTERN TSystem * gSystem
virtual void SetTitleOffset(Float_t offset=1)
Set distance between the axis and the axis title.
virtual void SetTitleSize(Float_t size=0.04)
Set size of axis title.
virtual void SetFillColor(Color_t fcolor)
Set the fill area color.
virtual void SetBinLabel(Int_t bin, const char *label)
Set label for bin.
void CenterTitle(Bool_t center=kTRUE)
Center axis title.
virtual void SetRangeUser(Double_t ufirst, Double_t ulast)
Set the viewing range for the axis from ufirst to ulast (in user coordinates).
static Int_t GetColor(const char *hexcolor)
Static method returning color number for color specified by hex color string of form: "#rrggbb",...
A ROOT file is a suite of consecutive data records (TKey instances) with a well defined format.
A TGraph is an object made of two arrays X and Y with npoints each.
virtual void SetTitle(const char *title="")
Change (i.e.
1-D histogram with a float per channel (see TH1 documentation)}
virtual void SetDirectory(TDirectory *dir)
By default when an histogram is created, it is added to the list of histogram objects in the current ...
virtual void SetTitle(const char *title)
See GetStatOverflows for more information.
virtual void LabelsOption(Option_t *option="h", Option_t *axis="X")
Set option(s) to draw axis with labels.
static void AddDirectory(Bool_t add=kTRUE)
Sets the flag controlling the automatic add of histograms in memory.
TAxis * GetXaxis()
Get the behaviour adopted by the object about the statoverflows. See EStatOverflows for more informat...
virtual void SetBarWidth(Float_t width=0.5)
virtual void SetBinContent(Int_t bin, Double_t content)
Set bin content see convention for numbering bins in TH1::GetBin In case the bin number is greater th...
Service class for 2-Dim histogram classes.
IMethod * Create(const std::string &name, const TString &job, const TString &title, DataSetInfo &dsi, const TString &option)
creates the method if needed based on the method name using the creator function the factory has stor...
static ClassifierFactory & Instance()
access to the ClassifierFactory singleton creates the instance if needed
TString fWeightFileDirPrefix
void SetDrawProgressBar(Bool_t d)
void SetUseColor(Bool_t uc)
class TMVA::Config::VariablePlotting fVariablePlotting
void SetConfigDescription(const char *d)
OptionBase * DeclareOptionRef(T &ref, const TString &name, const TString &desc="")
void AddPreDefVal(const T &)
void SetConfigName(const char *n)
virtual void ParseOptions()
options parser
const TString & GetOptions() const
void CheckForUnusedOptions() const
checks for unused options in option string
void PrepareTrainingAndTestTree(const TCut &cut, const TString &splitOpt)
prepare the training and test trees -> same cuts for signal and background
DataSetInfo & GetDataSetInfo()
void AddVariable(const TString &expression, const TString &title, const TString &unit, char type='F', Double_t min=0, Double_t max=0)
user inserts discriminating variable in data set info
Class that contains all the data information.
UInt_t GetNVariables() const
virtual const char * GetName() const
Returns name of object.
const TMatrixD * CorrelationMatrix(const TString &className) const
UInt_t GetNClasses() const
const TString & GetSplitOptions() const
UInt_t GetNTargets() const
DataSet * GetDataSet() const
returns data set
TH2 * CreateCorrelationMatrixHist(const TMatrixD *m, const TString &hName, const TString &hTitle) const
std::vector< TString > GetListOfVariables() const
returns list of variables
ClassInfo * GetClassInfo(Int_t clNum) const
const TCut & GetCut(Int_t i) const
VariableInfo & GetVariableInfo(Int_t i)
Bool_t IsSignal(const Event *ev) const
DataSetManager * GetDataSetManager()
DataInputHandler & DataInput()
Class that contains all the data information.
Long64_t GetNEvtSigTest()
return number of signal test events in dataset
TTree * GetTree(Types::ETreeType type)
create the test/trainings tree with all the variables, the weights, the classes, the targets,...
const Event * GetEvent() const
Long64_t GetNEvents(Types::ETreeType type=Types::kMaxTreeType) const
Results * GetResults(const TString &, Types::ETreeType type, Types::EAnalysisType analysistype)
Long64_t GetNTrainingEvents() const
void SetCurrentType(Types::ETreeType type) const
const std::vector< Event * > & GetEventCollection(Types::ETreeType type=Types::kMaxTreeType) const
Long64_t GetNEvtBkgdTest()
return number of background test events in dataset
Float_t GetValue(UInt_t ivar) const
return value of i'th variable
static void SetIsTraining(Bool_t)
when this static function is called, it sets the flag whether events with negative event weight shoul...
This is the main MVA steering class.
void PrintHelpMessage(const TString &datasetname, const TString &methodTitle="") const
Print predefined help message of classifier.
Double_t GetROCIntegral(DataLoader *loader, TString theMethodName, UInt_t iClass=0)
Calculate the integral of the ROC curve, also known as the area under curve (AUC),...
Bool_t fCorrelations
verbosity level, controls granularity of logging
std::vector< IMethod * > MVector
void TrainAllMethods()
Iterates through all booked methods and calls training.
Bool_t Verbose(void) const
void WriteDataInformation(DataSetInfo &fDataSetInfo)
MethodBase * BookMethod(DataLoader *loader, TString theMethodName, TString methodTitle, TString theOption="")
Book a classifier or regression method.
Factory(TString theJobName, TFile *theTargetFile, TString theOption="")
Standard constructor.
void TestAllMethods()
Evaluates all booked methods on the testing data and adds the output to the Results in the corresponi...
Bool_t fVerbose
list of transformations to test
void EvaluateAllMethods(void)
Iterates over all MVAs that have been booked, and calls their evaluation methods.
TH1F * EvaluateImportanceRandom(DataLoader *loader, UInt_t nseeds, Types::EMVA theMethod, TString methodTitle, const char *theOption="")
TH1F * GetImportance(const int nbits, std::vector< Double_t > importances, std::vector< TString > varNames)
Bool_t fROC
enable to calculate corelations
void EvaluateAllVariables(DataLoader *loader, TString options="")
Iterates over all MVA input variables and evaluates them.
TString fVerboseLevel
verbose mode
TH1F * EvaluateImportance(DataLoader *loader, VIType vitype, Types::EMVA theMethod, TString methodTitle, const char *theOption="")
Evaluate Variable Importance.
virtual ~Factory()
Destructor.
TGraph * GetROCCurve(DataLoader *loader, TString theMethodName, Bool_t setTitles=kTRUE, UInt_t iClass=0)
Argument iClass specifies the class to generate the ROC curve in a multiclass setting.
virtual void MakeClass(const TString &datasetname, const TString &methodTitle="") const
MethodBase * BookMethodWeightfile(DataLoader *dataloader, TMVA::Types::EMVA methodType, const TString &weightfile)
Adds an already constructed method to be managed by this factory.
Bool_t fModelPersistence
the training type
std::map< TString, Double_t > OptimizeAllMethods(TString fomType="ROCIntegral", TString fitType="FitGA")
Iterates through all booked methods and sees if they use parameter tuning and if so.
ROCCurve * GetROC(DataLoader *loader, TString theMethodName, UInt_t iClass=0, Types::ETreeType type=Types::kTesting)
Private method to generate a ROCCurve instance for a given method.
TH1F * EvaluateImportanceShort(DataLoader *loader, Types::EMVA theMethod, TString methodTitle, const char *theOption="")
Types::EAnalysisType fAnalysisType
jobname, used as extension in weight file names
Bool_t HasMethod(const TString &datasetname, const TString &title) const
Checks whether a given method name is defined for a given dataset.
TH1F * EvaluateImportanceAll(DataLoader *loader, Types::EMVA theMethod, TString methodTitle, const char *theOption="")
void SetVerbose(Bool_t v=kTRUE)
IMethod * GetMethod(const TString &datasetname, const TString &title) const
Returns pointer to MVA that corresponds to given method title.
void DeleteAllMethods(void)
Delete methods.
TString fTransformations
option string given by construction (presently only "V")
void Greetings()
Print welcome message.
TMultiGraph * GetROCCurveAsMultiGraph(DataLoader *loader, UInt_t iClass)
Generate a collection of graphs, for all methods for a given class.
Interface for all concrete MVA method implementations.
virtual void PrintHelpMessage() const =0
virtual Bool_t HasAnalysisType(Types::EAnalysisType type, UInt_t numberClasses, UInt_t numberTargets)=0
virtual void MakeClass(const TString &classFileName=TString("")) const =0
Virtual base Class for all MVA method.
virtual Double_t GetSeparation(TH1 *, TH1 *) const
compute "separation" defined as
void SetSilentFile(Bool_t status)
void SetWeightFileDir(TString fileDir)
set directory of weight file
virtual void TestRegression(Double_t &bias, Double_t &biasT, Double_t &dev, Double_t &devT, Double_t &rms, Double_t &rmsT, Double_t &mInf, Double_t &mInfT, Double_t &corr, Types::ETreeType type)
calculate <sum-of-deviation-squared> of regression output versus "true" value from test sample
virtual void DeclareCompatibilityOptions()
options that are used ONLY for the READER to ensure backward compatibility they are hence without any...
TString GetMethodTypeName() const
Bool_t DoMulticlass() const
virtual Double_t GetSignificance() const
compute significance of mean difference
const char * GetName() const
Types::EAnalysisType GetAnalysisType() const
virtual TMatrixD GetMulticlassConfusionMatrix(Double_t effB, Types::ETreeType type)
Construct a confusion matrix for a multiclass classifier.
void PrintHelpMessage() const
prints out method-specific help method
virtual void WriteEvaluationHistosToFile(Types::ETreeType treetype)
writes all MVA evaluation histograms to file
virtual void TestMulticlass()
test multiclass classification
void SetupMethod()
setup of methods
virtual Double_t GetEfficiency(const TString &, Types::ETreeType, Double_t &err)
fill background efficiency (resp.
virtual void SetAnalysisType(Types::EAnalysisType type)
const TString & GetMethodName() const
Bool_t DoRegression() const
void ProcessSetup()
process all options the "CheckForUnusedOptions" is done in an independent call, since it may be overr...
virtual Double_t GetTrainingEfficiency(const TString &)
DataSetInfo & DataInfo() const
virtual void MakeClass(const TString &classFileName=TString("")) const
create reader class for method (classification only at present)
virtual void TestClassification()
initialization
void AddOutput(Types::ETreeType type, Types::EAnalysisType analysisType)
void ReadStateFromFile()
Function to write options and weights to file.
virtual std::map< TString, Double_t > OptimizeTuningParameters(TString fomType="ROCIntegral", TString fitType="FitGA")
call the Optimizer with the set of parameters and ranges that are meant to be tuned.
DataSetInfo & fDataSetInfo
Types::EMVA GetMethodType() const
void SetFile(TFile *file)
void SetModelPersistence(Bool_t status)
virtual Double_t GetROCIntegral(TH1D *histS, TH1D *histB) const
calculate the area (integral) under the ROC curve as a overall quality measure of the classification
virtual void CheckSetup()
check may be overridden by derived class (sometimes, eg, fitters are used which can only be implement...
Class for boosting a TMVA method.
void SetBoostedMethodName(TString methodName)
DataSetManager * fDataSetManager
Class for categorizing the phase space.
DataSetManager * fDataSetManager
ostringstream derivative to redirect and format output
void SetMinType(EMsgType minType)
void SetSource(const std::string &source)
static void InhibitOutput()
Double_t GetEffSForEffB(Double_t effB, const UInt_t num_points=41)
Calculate the signal efficiency (sensitivity) for a given background efficiency (sensitivity).
Double_t GetROCIntegral(const UInt_t points=41)
Calculates the ROC integral (AUC)
TGraph * GetROCCurve(const UInt_t points=100)
Returns a new TGraph containing the ROC curve.
Ranking for variables in method (implementation)
virtual void Print() const
get maximum length of variable names
Class that is the base-class for a vector of result.
Class which takes the results of a multiclass classification.
Class that is the base-class for a vector of result.
Singleton class for Global types used by TMVA.
static Types & Instance()
the the single instance of "Types" if existing already, or create it (Singleton)
const TString & GetLabel() const
A TMultiGraph is a collection of TGraph (or derived) objects.
TList * GetListOfGraphs() const
virtual void Add(TGraph *graph, Option_t *chopt="")
Add a new graph to the list of graphs.
TH1F * GetHistogram()
Returns a pointer to the histogram used to draw the axis.
virtual void Draw(Option_t *chopt="")
Draw this multigraph with its current attributes.
TAxis * GetYaxis()
Get y axis of the graph.
TAxis * GetXaxis()
Get x axis of the graph.
virtual void SetTitle(const char *title="")
Set the title of the TNamed.
virtual TObject * Clone(const char *newname="") const
Make a clone of an object using the Streamer facility.
virtual const char * GetName() const
Returns name of object.
@ kOverwrite
overwrite existing object with same name
virtual TLegend * BuildLegend(Double_t x1=0.3, Double_t y1=0.21, Double_t x2=0.3, Double_t y2=0.21, const char *title="", Option_t *option="")
Build a legend from the graphical objects in the pad.
virtual void SetGrid(Int_t valuex=1, Int_t valuey=1)
Principal Components Analysis (PCA)
virtual void AddRow(const Double_t *x)
Add a data point and update the covariance matrix.
const TMatrixD * GetCovarianceMatrix() const
virtual void MakePrincipals()
Perform the principal components analysis.
Random number generator class based on M.
void ToLower()
Change string to lower-case.
int CompareTo(const char *cs, ECaseCompare cmp=kExact) const
Compare a string to char *cs2.
const char * Data() const
TString & ReplaceAll(const TString &s1, const TString &s2)
Bool_t BeginsWith(const char *s, ECaseCompare cmp=kExact) const
Bool_t Contains(const char *pat, ECaseCompare cmp=kExact) const
void SetOptStat(Int_t stat=1)
The type of information printed in the histogram statistics box can be selected via the parameter mod...
void SetTitleXOffset(Float_t offset=1)
virtual int MakeDirectory(const char *name)
Make a directory.
virtual Int_t Write(const char *name=0, Int_t option=0, Int_t bufsize=0)
Write this object to the current directory.
RPY_EXPORTED TCppMethod_t GetMethod(TCppScope_t scope, TCppIndex_t imeth)
static constexpr double s
void GetMethodName(TString &name, TKey *mkey)
void DataLoaderCopy(TMVA::DataLoader *des, TMVA::DataLoader *src)
void CreateVariableTransforms(const TString &trafoDefinition, TMVA::DataSetInfo &dataInfo, TMVA::TransformationHandler &transformationHandler, TMVA::MsgLogger &log)
MsgLogger & Endl(MsgLogger &ml)
static long int sum(long int i)
const Int_t MinNoTrainingEvents