121 fTransformations (
"I" ),
123 fVerboseLevel ( kINFO ),
127 fJobName ( jobName ),
128 fAnalysisType (
Types::kClassification ),
129 fModelPersistence (
kTRUE)
159 DeclareOptionRef( color,
"Color",
"Flag for coloured screen output (default: True, if in batch mode: False)" );
160 DeclareOptionRef(
fTransformations,
"Transformations",
"List of transformations to test; formatting example: \"Transformations=I;D;P;U;G,D\", for identity, decorrelation, PCA, Uniform and Gaussianisation followed by decorrelation transformations" );
163 DeclareOptionRef( silent,
"Silent",
"Batch mode: boolean silent flag inhibiting any output from TMVA after the creation of the factory class object (default: False)" );
165 "DrawProgressBar",
"Draw progress bar to display training, testing and evaluation schedule (default: True)" );
168 "Option to save the trained model in xml file or using serialization");
172 "AnalysisType",
"Set the analysis type (Classification, Regression, Multiclass, Auto) (default: Auto)" );
205 fTransformations (
"I" ),
209 fSilentFile (
kTRUE ),
210 fJobName ( jobName ),
211 fAnalysisType (
Types::kClassification ),
212 fModelPersistence (
kTRUE)
243 DeclareOptionRef( color,
"Color",
"Flag for coloured screen output (default: True, if in batch mode: False)" );
244 DeclareOptionRef(
fTransformations,
"Transformations",
"List of transformations to test; formatting example: \"Transformations=I;D;P;U;G,D\", for identity, decorrelation, PCA, Uniform and Gaussianisation followed by decorrelation transformations" );
247 DeclareOptionRef( silent,
"Silent",
"Batch mode: boolean silent flag inhibiting any output from TMVA after the creation of the factory class object (default: False)" );
249 "DrawProgressBar",
"Draw progress bar to display training, testing and evaluation schedule (default: True)" );
252 "Option to save the trained model in xml file or using serialization");
256 "AnalysisType",
"Set the analysis type (Classification, Regression, Multiclass, Auto) (default: Auto)" );
306 return fModelPersistence;
314 std::vector<TMVA::VariableTransformBase*>::iterator trfIt = fDefaultTrfs.begin();
315 for (;trfIt != fDefaultTrfs.end(); ++trfIt)
delete (*trfIt);
317 this->DeleteAllMethods();
332 std::map<TString,MVector*>::iterator itrMap;
334 for(itrMap = fMethodsMap.begin();itrMap != fMethodsMap.end();++itrMap)
336 MVector *methods=itrMap->second;
338 MVector::iterator itrMethod = methods->begin();
339 for (; itrMethod != methods->end(); ++itrMethod) {
340 Log() << kDEBUG <<
"Delete method: " << (*itrMethod)->GetName() <<
Endl;
380 if(fMethodsMap.find(datasetname)!=fMethodsMap.end())
382 if (
GetMethod( datasetname,methodTitle ) != 0) {
383 Log() << kFATAL <<
"Booking failed since method with title <"
384 << methodTitle <<
"> already exists "<<
"in with DataSet Name <"<< loader->
GetName()<<
"> "
390 Log() << kHEADER <<
"Booking method: " <<
gTools().
Color(
"bold") << methodTitle
398 "Number of times the classifier will be boosted" );
402 if(fModelPersistence)
415 Log() << kDEBUG <<
"Boost Number is " << boostNum <<
" > 0: train boosted classifier" <<
Endl;
419 Log() << kFATAL <<
"Method with type kBoost cannot be casted to MethodCategory. /Factory" <<
Endl;
421 if (fModelPersistence)
426 methBoost->
SetFile(fgTargetFile);
431 if (method==0)
return 0;
437 Log() << kFATAL <<
"Method with type kCategory cannot be casted to MethodCategory. /Factory" <<
Endl;
442 methCat->
SetFile(fgTargetFile);
450 Log() << kWARNING <<
"Method " << method->
GetMethodTypeName() <<
" is not capable of handling " ;
475 if(fMethodsMap.find(datasetname)==fMethodsMap.end())
478 fMethodsMap[datasetname]=mvector;
480 fMethodsMap[datasetname]->push_back( method );
512 if (method ==
nullptr)
return nullptr;
515 Log() << kERROR <<
"Cannot handle category methods for now." <<
Endl;
519 if(fModelPersistence) {
539 if (HasMethod(datasetname, methodTitle) != 0) {
540 Log() << kFATAL <<
"Booking failed since method with title <"
541 << methodTitle <<
"> already exists "<<
"in with DataSet Name <"<< loader->
GetName()<<
"> "
548 if(fMethodsMap.count(datasetname) == 0) {
550 fMethodsMap[datasetname] = mvector;
553 fMethodsMap[datasetname]->push_back( method );
563 if(fMethodsMap.find(datasetname)==fMethodsMap.end())
return 0;
565 MVector *methods=fMethodsMap.find(datasetname)->second;
567 MVector::const_iterator itrMethod;
569 for (itrMethod = methods->begin(); itrMethod != methods->end(); ++itrMethod) {
581 if(fMethodsMap.find(datasetname)==fMethodsMap.end())
return 0;
583 std::string methodName = methodTitle.
Data();
585 return ( 0 == methodName.compare(
m->GetName() ) );
589 Bool_t isMethodNameExisting = std::any_of( methods->begin(), methods->end(), isEqualToMethodName);
591 return isMethodNameExisting;
600 if(!RootBaseDir()->GetDirectory(fDataSetInfo.
GetName())) RootBaseDir()->mkdir(fDataSetInfo.
GetName());
603 RootBaseDir()->cd(fDataSetInfo.
GetName());
650 processTrfs = fTransformations;
653 std::vector<TMVA::TransformationHandler*> trfs;
657 std::vector<TString>::iterator trfsDefIt = trfsDef.begin();
658 for (; trfsDefIt!=trfsDef.end(); ++trfsDefIt) {
663 Log() << kDEBUG <<
"current transformation string: '" << trfS.
Data() <<
"'" <<
Endl;
669 if (trfS.
BeginsWith(
'I')) identityTrHandler = trfs.back();
675 std::vector<TMVA::TransformationHandler*>::iterator trfIt = trfs.begin();
677 for (;trfIt != trfs.end(); ++trfIt) {
679 (*trfIt)->SetRootDir(RootBaseDir()->GetDirectory(fDataSetInfo.
GetName()));
680 (*trfIt)->CalcTransformations(inputEvents);
685 for (trfIt = trfs.begin(); trfIt != trfs.end(); ++trfIt)
delete *trfIt;
697 std::map<TString,MVector*>::iterator itrMap;
698 std::map<TString,Double_t> TunedParameters;
699 for(itrMap = fMethodsMap.begin();itrMap != fMethodsMap.end();++itrMap)
701 MVector *methods=itrMap->second;
703 MVector::iterator itrMethod;
706 for( itrMethod = methods->begin(); itrMethod != methods->end(); ++itrMethod ) {
710 Log() << kFATAL <<
"Dynamic cast to MethodBase failed" <<
Endl;
711 return TunedParameters;
716 <<
" not trained (training tree has less entries ["
727 Log() << kINFO <<
"Optimization of tuning parameters finished for Method:"<<mva->
GetName() <<
Endl;
731 return TunedParameters;
759 if (fMethodsMap.find(datasetname) == fMethodsMap.end()) {
760 Log() << kERROR <<
Form(
"DataSet = %s not found in methods map.", datasetname.
Data()) <<
Endl;
764 if (!this->HasMethod(datasetname, theMethodName)) {
765 Log() << kERROR <<
Form(
"Method = %s not found with Dataset = %s ", theMethodName.
Data(), datasetname.
Data())
771 if (allowedAnalysisTypes.count(this->fAnalysisType) == 0) {
772 Log() << kERROR <<
Form(
"Can only generate ROC curves for analysis type kClassification and kMulticlass.")
784 Log() << kERROR <<
Form(
"Given class number (iClass = %i) does not exist. There are %i classes in dataset.",
794 std::vector<Bool_t> *mvaResTypes =
dynamic_cast<ResultsClassification *
>(results)->GetValueVectorTypes();
795 std::vector<Float_t> mvaResWeights;
798 mvaResWeights.reserve(eventCollection.size());
799 for (
auto ev : eventCollection) {
800 mvaResWeights.push_back(ev->GetWeight());
803 rocCurve =
new TMVA::ROCCurve(*mvaRes, *mvaResTypes, mvaResWeights);
806 std::vector<Float_t> mvaRes;
807 std::vector<Bool_t> mvaResTypes;
808 std::vector<Float_t> mvaResWeights;
810 std::vector<std::vector<Float_t>> *rawMvaRes =
dynamic_cast<ResultsMulticlass *
>(results)->GetValueVector();
815 mvaRes.reserve(rawMvaRes->size());
816 for (
auto item : *rawMvaRes) {
817 mvaRes.push_back(item[iClass]);
821 mvaResTypes.reserve(eventCollection.size());
822 mvaResWeights.reserve(eventCollection.size());
823 for (
auto ev : eventCollection) {
824 mvaResTypes.push_back(ev->GetClass() == iClass);
825 mvaResWeights.push_back(ev->GetWeight());
828 rocCurve =
new TMVA::ROCCurve(mvaRes, mvaResTypes, mvaResWeights);
844 return GetROCIntegral((
TString)loader->
GetName(), theMethodName, iClass);
857 if (fMethodsMap.find(datasetname) == fMethodsMap.end()) {
858 Log() << kERROR <<
Form(
"DataSet = %s not found in methods map.", datasetname.
Data()) <<
Endl;
862 if ( ! this->HasMethod(datasetname, theMethodName) ) {
863 Log() << kERROR <<
Form(
"Method = %s not found with Dataset = %s ", theMethodName.
Data(), datasetname.
Data()) <<
Endl;
868 if ( allowedAnalysisTypes.count(this->fAnalysisType) == 0 ) {
869 Log() << kERROR <<
Form(
"Can only generate ROC integral for analysis type kClassification. and kMulticlass.")
874 TMVA::ROCCurve *rocCurve = GetROC(datasetname, theMethodName, iClass);
876 Log() << kFATAL <<
Form(
"ROCCurve object was not created in Method = %s not found with Dataset = %s ",
877 theMethodName.
Data(), datasetname.
Data())
905 return GetROCCurve( (
TString)loader->
GetName(), theMethodName, setTitles, iClass );
924 if (fMethodsMap.find(datasetname) == fMethodsMap.end()) {
925 Log() << kERROR <<
Form(
"DataSet = %s not found in methods map.", datasetname.
Data()) <<
Endl;
929 if ( ! this->HasMethod(datasetname, theMethodName) ) {
930 Log() << kERROR <<
Form(
"Method = %s not found with Dataset = %s ", theMethodName.
Data(), datasetname.
Data()) <<
Endl;
935 if ( allowedAnalysisTypes.count(this->fAnalysisType) == 0 ) {
936 Log() << kERROR <<
Form(
"Can only generate ROC curves for analysis type kClassification and kMulticlass.") <<
Endl;
940 TMVA::ROCCurve *rocCurve = GetROC(datasetname, theMethodName, iClass);
944 Log() << kFATAL <<
Form(
"ROCCurve object was not created in Method = %s not found with Dataset = %s ", theMethodName.
Data(), datasetname.
Data()) <<
Endl;
952 graph->GetYaxis()->SetTitle(
"Background rejection (Specificity)");
953 graph->GetXaxis()->SetTitle(
"Signal efficiency (Sensitivity)");
954 graph->SetTitle(
Form(
"Signal efficiency vs. Background rejection (%s)", theMethodName.
Data()));
974 return GetROCCurveAsMultiGraph((
TString)loader->
GetName(), iClass);
995 MVector *methods = fMethodsMap[datasetname.
Data()];
996 for (
auto * method_raw : *methods) {
998 if (method ==
nullptr) {
continue; }
1004 Log() << kERROR <<
Form(
"Given class number (iClass = %i) does not exist. There are %i classes in dataset.", iClass, nClasses) <<
Endl;
1010 TGraph *
graph = this->GetROCCurve(datasetname, methodName,
false, iClass);
1011 graph->SetTitle(methodName);
1013 graph->SetLineWidth(2);
1014 graph->SetLineColor(line_color++);
1015 graph->SetFillColor(10);
1021 Log() << kERROR <<
Form(
"No metohds have class %i defined.", iClass) <<
Endl;
1058 if (fMethodsMap.find(datasetname) == fMethodsMap.end()) {
1059 Log() << kERROR <<
Form(
"DataSet = %s not found in methods map.", datasetname.
Data()) <<
Endl;
1067 TMultiGraph *multigraph = this->GetROCCurveAsMultiGraph(datasetname, iClass);
1070 multigraph->
Draw(
"AL");
1075 TString titleString =
Form(
"Signal efficiency vs. Background rejection");
1077 titleString =
Form(
"%s (Class=%i)", titleString.
Data(), iClass);
1082 multigraph->
SetTitle( titleString );
1084 canvas->
BuildLegend(0.15, 0.15, 0.35, 0.3,
"MVA Method");
1100 if (fMethodsMap.empty()) {
1101 Log() << kINFO <<
"...nothing found to train" <<
Endl;
1107 Log() << kDEBUG <<
"Train all methods for "
1111 std::map<TString,MVector*>::iterator itrMap;
1113 for(itrMap = fMethodsMap.begin();itrMap != fMethodsMap.end();++itrMap)
1115 MVector *methods=itrMap->second;
1116 MVector::iterator itrMethod;
1119 for( itrMethod = methods->begin(); itrMethod != methods->end(); ++itrMethod ) {
1123 if(mva==0)
continue;
1126 Log() << kFATAL <<
"No input data for the training provided!" <<
Endl;
1130 Log() << kFATAL <<
"You want to do regression training without specifying a target." <<
Endl;
1133 Log() << kFATAL <<
"You want to do classification training, but specified less than two classes." <<
Endl;
1136 if(!IsSilentFile()) WriteDataInformation(mva->
fDataSetInfo);
1141 <<
" not trained (training tree has less entries ["
1151 Log() << kHEADER <<
"Training finished" <<
Endl <<
Endl;
1158 Log() << kINFO <<
"Ranking input variables (method specific)..." <<
Endl;
1159 for (itrMethod = methods->begin(); itrMethod != methods->end(); ++itrMethod) {
1164 const Ranking* ranking = (*itrMethod)->CreateRanking();
1165 if (ranking != 0) ranking->
Print();
1166 else Log() << kINFO <<
"No variable ranking supplied by classifier: "
1176 if (fModelPersistence) {
1178 Log() << kHEADER <<
"=== Destroy and recreate all methods via weight files for testing ===" <<
Endl <<
Endl;
1180 if(!IsSilentFile())RootBaseDir()->cd();
1183 for (
UInt_t i=0; i<methods->size(); i++) {
1189 TString weightfile =
m->GetWeightFileName();
1195 TString testvarName =
m->GetTestvarName();
1203 if( !methCat )
Log() << kFATAL <<
"Method with type kCategory cannot be casted to MethodCategory. /Factory" <<
Endl;
1209 TString fFileDir=
m->DataInfo().GetName();
1211 m->SetWeightFileDir(fFileDir);
1212 m->SetModelPersistence(fModelPersistence);
1213 m->SetSilentFile(IsSilentFile());
1214 m->SetAnalysisType(fAnalysisType);
1216 m->ReadStateFromFile();
1217 m->SetTestvarName(testvarName);
1236 if (fMethodsMap.empty()) {
1237 Log() << kINFO <<
"...nothing found to test" <<
Endl;
1240 std::map<TString,MVector*>::iterator itrMap;
1242 for(itrMap = fMethodsMap.begin();itrMap != fMethodsMap.end();++itrMap)
1244 MVector *methods=itrMap->second;
1245 MVector::iterator itrMethod;
1248 for( itrMethod = methods->begin(); itrMethod != methods->end(); ++itrMethod ) {
1251 if(mva==0)
continue;
1265 if (methodTitle !=
"") {
1269 Log() << kWARNING <<
"<MakeClass> Could not find classifier \"" << methodTitle
1270 <<
"\" in list" <<
Endl;
1276 MVector *methods=fMethodsMap.find(datasetname)->second;
1277 MVector::const_iterator itrMethod;
1278 for (itrMethod = methods->begin(); itrMethod != methods->end(); ++itrMethod) {
1280 if(method==0)
continue;
1293 if (methodTitle !=
"") {
1297 Log() << kWARNING <<
"<PrintHelpMessage> Could not find classifier \"" << methodTitle
1298 <<
"\" in list" <<
Endl;
1304 MVector *methods=fMethodsMap.find(datasetname)->second;
1305 MVector::const_iterator itrMethod ;
1306 for (itrMethod = methods->begin(); itrMethod != methods->end(); ++itrMethod) {
1308 if(method==0)
continue;
1320 Log() << kINFO <<
"Evaluating all variables..." <<
Endl;
1326 this->BookMethod(loader,
"Variable",
s );
1338 if (fMethodsMap.empty()) {
1339 Log() << kINFO <<
"...nothing found to evaluate" <<
Endl;
1342 std::map<TString,MVector*>::iterator itrMap;
1344 for(itrMap = fMethodsMap.begin();itrMap != fMethodsMap.end();++itrMap)
1346 MVector *methods=itrMap->second;
1356 Int_t nmeth_used[2] = {0,0};
1358 std::vector<std::vector<TString> > mname(2);
1359 std::vector<std::vector<Double_t> > sig(2),
sep(2), roc(2);
1360 std::vector<std::vector<Double_t> > eff01(2), eff10(2), eff30(2), effArea(2);
1361 std::vector<std::vector<Double_t> > eff01err(2), eff10err(2), eff30err(2);
1362 std::vector<std::vector<Double_t> > trainEff01(2), trainEff10(2), trainEff30(2);
1364 std::vector<std::vector<Float_t> > multiclass_testEff;
1365 std::vector<std::vector<Float_t> > multiclass_trainEff;
1366 std::vector<std::vector<Float_t> > multiclass_testPur;
1367 std::vector<std::vector<Float_t> > multiclass_trainPur;
1370 std::vector<TMatrixD> multiclass_trainConfusionEffB01;
1371 std::vector<TMatrixD> multiclass_trainConfusionEffB10;
1372 std::vector<TMatrixD> multiclass_trainConfusionEffB30;
1373 std::vector<TMatrixD> multiclass_testConfusionEffB01;
1374 std::vector<TMatrixD> multiclass_testConfusionEffB10;
1375 std::vector<TMatrixD> multiclass_testConfusionEffB30;
1377 std::vector<std::vector<Double_t> > biastrain(1);
1378 std::vector<std::vector<Double_t> > biastest(1);
1379 std::vector<std::vector<Double_t> > devtrain(1);
1380 std::vector<std::vector<Double_t> > devtest(1);
1381 std::vector<std::vector<Double_t> > rmstrain(1);
1382 std::vector<std::vector<Double_t> > rmstest(1);
1383 std::vector<std::vector<Double_t> > minftrain(1);
1384 std::vector<std::vector<Double_t> > minftest(1);
1385 std::vector<std::vector<Double_t> > rhotrain(1);
1386 std::vector<std::vector<Double_t> > rhotest(1);
1389 std::vector<std::vector<Double_t> > biastrainT(1);
1390 std::vector<std::vector<Double_t> > biastestT(1);
1391 std::vector<std::vector<Double_t> > devtrainT(1);
1392 std::vector<std::vector<Double_t> > devtestT(1);
1393 std::vector<std::vector<Double_t> > rmstrainT(1);
1394 std::vector<std::vector<Double_t> > rmstestT(1);
1395 std::vector<std::vector<Double_t> > minftrainT(1);
1396 std::vector<std::vector<Double_t> > minftestT(1);
1405 for (MVector::iterator itrMethod =methods->begin(); itrMethod != methods->end(); ++itrMethod) {
1408 if(theMethod==0)
continue;
1409 theMethod->
SetFile(fgTargetFile);
1414 doRegression =
kTRUE;
1421 Log() << kINFO <<
"TestRegression (testing)" <<
Endl;
1423 biastest[0] .push_back( bias );
1424 devtest[0] .push_back( dev );
1425 rmstest[0] .push_back( rms );
1426 minftest[0] .push_back( mInf );
1427 rhotest[0] .push_back( rho );
1428 biastestT[0] .push_back( biasT );
1429 devtestT[0] .push_back( devT );
1430 rmstestT[0] .push_back( rmsT );
1431 minftestT[0] .push_back( mInfT );
1433 Log() << kINFO <<
"TestRegression (training)" <<
Endl;
1435 biastrain[0] .push_back( bias );
1436 devtrain[0] .push_back( dev );
1437 rmstrain[0] .push_back( rms );
1438 minftrain[0] .push_back( mInf );
1439 rhotrain[0] .push_back( rho );
1440 biastrainT[0].push_back( biasT );
1441 devtrainT[0] .push_back( devT );
1442 rmstrainT[0] .push_back( rmsT );
1443 minftrainT[0].push_back( mInfT );
1449 Log() << kDEBUG <<
"\tWrite evaluation histograms to file" <<
Endl;
1457 doMulticlass =
kTRUE;
1458 Log() << kINFO <<
"Evaluate multiclass classification method: " << theMethod->
GetMethodName() <<
Endl;
1477 if (not IsSilentFile()) {
1478 Log() << kDEBUG <<
"\tWrite evaluation histograms to file" <<
Endl;
1501 eff01err[isel].push_back(err);
1503 eff10err[isel].push_back(err);
1505 eff30err[isel].push_back(err);
1514 if (!IsSilentFile()) {
1515 Log() << kDEBUG <<
"\tWrite evaluation histograms to file" <<
Endl;
1523 std::vector<TString> vtemps = mname[0];
1524 std::vector< std::vector<Double_t> > vtmp;
1525 vtmp.push_back( devtest[0] );
1526 vtmp.push_back( devtrain[0] );
1527 vtmp.push_back( biastest[0] );
1528 vtmp.push_back( biastrain[0] );
1529 vtmp.push_back( rmstest[0] );
1530 vtmp.push_back( rmstrain[0] );
1531 vtmp.push_back( minftest[0] );
1532 vtmp.push_back( minftrain[0] );
1533 vtmp.push_back( rhotest[0] );
1534 vtmp.push_back( rhotrain[0] );
1535 vtmp.push_back( devtestT[0] );
1536 vtmp.push_back( devtrainT[0] );
1537 vtmp.push_back( biastestT[0] );
1538 vtmp.push_back( biastrainT[0]);
1539 vtmp.push_back( rmstestT[0] );
1540 vtmp.push_back( rmstrainT[0] );
1541 vtmp.push_back( minftestT[0] );
1542 vtmp.push_back( minftrainT[0]);
1545 devtest[0] = vtmp[0];
1546 devtrain[0] = vtmp[1];
1547 biastest[0] = vtmp[2];
1548 biastrain[0] = vtmp[3];
1549 rmstest[0] = vtmp[4];
1550 rmstrain[0] = vtmp[5];
1551 minftest[0] = vtmp[6];
1552 minftrain[0] = vtmp[7];
1553 rhotest[0] = vtmp[8];
1554 rhotrain[0] = vtmp[9];
1555 devtestT[0] = vtmp[10];
1556 devtrainT[0] = vtmp[11];
1557 biastestT[0] = vtmp[12];
1558 biastrainT[0] = vtmp[13];
1559 rmstestT[0] = vtmp[14];
1560 rmstrainT[0] = vtmp[15];
1561 minftestT[0] = vtmp[16];
1562 minftrainT[0] = vtmp[17];
1563 }
else if (doMulticlass) {
1571 for (
Int_t k=0; k<2; k++) {
1572 std::vector< std::vector<Double_t> > vtemp;
1573 vtemp.push_back( effArea[k] );
1574 vtemp.push_back( eff10[k] );
1575 vtemp.push_back( eff01[k] );
1576 vtemp.push_back( eff30[k] );
1577 vtemp.push_back( eff10err[k] );
1578 vtemp.push_back( eff01err[k] );
1579 vtemp.push_back( eff30err[k] );
1580 vtemp.push_back( trainEff10[k] );
1581 vtemp.push_back( trainEff01[k] );
1582 vtemp.push_back( trainEff30[k] );
1583 vtemp.push_back( sig[k] );
1584 vtemp.push_back(
sep[k] );
1585 vtemp.push_back( roc[k] );
1586 std::vector<TString> vtemps = mname[k];
1588 effArea[k] = vtemp[0];
1589 eff10[k] = vtemp[1];
1590 eff01[k] = vtemp[2];
1591 eff30[k] = vtemp[3];
1592 eff10err[k] = vtemp[4];
1593 eff01err[k] = vtemp[5];
1594 eff30err[k] = vtemp[6];
1595 trainEff10[k] = vtemp[7];
1596 trainEff01[k] = vtemp[8];
1597 trainEff30[k] = vtemp[9];
1613 const Int_t nmeth = methodsNoCuts.size();
1616 if (!doRegression && !doMulticlass ) {
1622 std::vector<Double_t> rvec;
1630 std::vector<TString>* theVars =
new std::vector<TString>;
1631 std::vector<ResultsClassification*> mvaRes;
1632 for (MVector::iterator itrMethod = methodsNoCuts.begin(); itrMethod != methodsNoCuts.end(); ++itrMethod, ++ivar) {
1635 theVars->push_back(
m->GetTestvarName() );
1636 rvec.push_back(
m->GetSignalReferenceCut() );
1637 theVars->back().ReplaceAll(
"MVA_",
"" );
1657 for (
Int_t im=0; im<nmeth; im++) {
1661 Log() << kWARNING <<
"Found NaN return value in event: " << ievt
1662 <<
" for method \"" << methodsNoCuts[im]->GetName() <<
"\"" <<
Endl;
1665 else dvec[im] = retval;
1669 else { tpBkg->
AddRow( dvec ); theMat = overlapB; }
1672 for (
Int_t im=0; im<nmeth; im++) {
1673 for (
Int_t jm=im; jm<nmeth; jm++) {
1674 if ((dvec[im] - rvec[im])*(dvec[jm] - rvec[jm]) > 0) {
1676 if (im != jm) (*theMat)(jm,im)++;
1696 if (corrMatS != 0 && corrMatB != 0) {
1701 for (
Int_t im=0; im<nmeth; im++) {
1702 for (
Int_t jm=0; jm<nmeth; jm++) {
1703 mvaMatS(im,jm) = (*corrMatS)(im,jm);
1704 mvaMatB(im,jm) = (*corrMatB)(im,jm);
1709 std::vector<TString> theInputVars;
1712 for (
Int_t iv=0; iv<nvar; iv++) {
1714 for (
Int_t jm=0; jm<nmeth; jm++) {
1715 varmvaMatS(iv,jm) = (*corrMatS)(nmeth+iv,jm);
1716 varmvaMatB(iv,jm) = (*corrMatB)(nmeth+iv,jm);
1731 Log() << kINFO <<
Form(
"Dataset[%s] : ",method->
fDataSetInfo.
GetName())<<
"Correlations between input variables and MVA response (signal):" <<
Endl;
1735 Log() << kINFO <<
Form(
"Dataset[%s] : ",method->
fDataSetInfo.
GetName())<<
"Correlations between input variables and MVA response (background):" <<
Endl;
1742 Log() << kINFO <<
Form(
"Dataset[%s] : ",method->
fDataSetInfo.
GetName())<<
"The following \"overlap\" matrices contain the fraction of events for which " <<
Endl;
1743 Log() << kINFO <<
Form(
"Dataset[%s] : ",method->
fDataSetInfo.
GetName())<<
"the MVAs 'i' and 'j' have returned conform answers about \"signal-likeness\"" <<
Endl;
1744 Log() << kINFO <<
Form(
"Dataset[%s] : ",method->
fDataSetInfo.
GetName())<<
"An event is signal-like, if its MVA output exceeds the following value:" <<
Endl;
1746 Log() << kINFO <<
Form(
"Dataset[%s] : ",method->
fDataSetInfo.
GetName())<<
"which correspond to the working point: eff(signal) = 1 - eff(background)" <<
Endl;
1749 if (nmeth != (
Int_t)methods->size())
1750 Log() << kINFO <<
Form(
"Dataset[%s] : ",method->
fDataSetInfo.
GetName())<<
"Note: no correlations and overlap with cut method are provided at present" <<
Endl;
1782 TString hLine =
"--------------------------------------------------------------------------------------------------";
1783 Log() << kINFO <<
"Evaluation results ranked by smallest RMS on test sample:" <<
Endl;
1784 Log() << kINFO <<
"(\"Bias\" quotes the mean deviation of the regression from true target." <<
Endl;
1785 Log() << kINFO <<
" \"MutInf\" is the \"Mutual Information\" between regression and target." <<
Endl;
1786 Log() << kINFO <<
" Indicated by \"_T\" are the corresponding \"truncated\" quantities ob-" <<
Endl;
1787 Log() << kINFO <<
" tained when removing events deviating more than 2sigma from average.)" <<
Endl;
1788 Log() << kINFO << hLine <<
Endl;
1790 Log() << kINFO << hLine <<
Endl;
1792 for (
Int_t i=0; i<nmeth_used[0]; i++) {
1794 if(theMethod==0)
continue;
1796 Log() << kINFO <<
Form(
"%-20s %-15s:%#9.3g%#9.3g%#9.3g%#9.3g | %#5.3f %#5.3f",
1798 (
const char*)mname[0][i],
1799 biastest[0][i], biastestT[0][i],
1800 rmstest[0][i], rmstestT[0][i],
1801 minftest[0][i], minftestT[0][i] )
1804 Log() << kINFO << hLine <<
Endl;
1806 Log() << kINFO <<
"Evaluation results ranked by smallest RMS on training sample:" <<
Endl;
1807 Log() << kINFO <<
"(overtraining check)" <<
Endl;
1808 Log() << kINFO << hLine <<
Endl;
1809 Log() << kINFO <<
"DataSet Name: MVA Method: <Bias> <Bias_T> RMS RMS_T | MutInf MutInf_T" <<
Endl;
1810 Log() << kINFO << hLine <<
Endl;
1812 for (
Int_t i=0; i<nmeth_used[0]; i++) {
1814 if(theMethod==0)
continue;
1815 Log() << kINFO <<
Form(
"%-20s %-15s:%#9.3g%#9.3g%#9.3g%#9.3g | %#5.3f %#5.3f",
1817 (
const char*)mname[0][i],
1818 biastrain[0][i], biastrainT[0][i],
1819 rmstrain[0][i], rmstrainT[0][i],
1820 minftrain[0][i], minftrainT[0][i] )
1823 Log() << kINFO << hLine <<
Endl;
1825 }
else if (doMulticlass) {
1831 "-------------------------------------------------------------------------------------------------------";
1870 TString header1 =
Form(
"%-15s%-15s%-15s%-15s%-15s%-15s",
"Dataset",
"MVA Method",
"ROC AUC",
"Sig eff@B=0.01",
1871 "Sig eff@B=0.10",
"Sig eff@B=0.30");
1872 TString header2 =
Form(
"%-15s%-15s%-15s%-15s%-15s%-15s",
"Name:",
"/ Class:",
"test (train)",
"test (train)",
1873 "test (train)",
"test (train)");
1875 Log() << kINFO <<
"1-vs-rest performance metrics per class" <<
Endl;
1876 Log() << kINFO << hLine <<
Endl;
1878 Log() << kINFO <<
"Considers the listed class as signal and the other classes" <<
Endl;
1879 Log() << kINFO <<
"as background, reporting the resulting binary performance." <<
Endl;
1880 Log() << kINFO <<
"A score of 0.820 (0.850) means 0.820 was acheived on the" <<
Endl;
1881 Log() << kINFO <<
"test set and 0.850 on the training set." <<
Endl;
1884 Log() << kINFO << header1 <<
Endl;
1885 Log() << kINFO << header2 <<
Endl;
1886 for (
Int_t k = 0; k < 2; k++) {
1887 for (
Int_t i = 0; i < nmeth_used[k]; i++) {
1889 mname[k][i].ReplaceAll(
"Variable_",
"");
1892 const TString datasetName = itrMap->first;
1893 const TString mvaName = mname[k][i];
1896 if (theMethod == 0) {
1902 Log() << kINFO << row <<
Endl;
1903 Log() << kINFO <<
"------------------------------" <<
Endl;
1906 for (
UInt_t iClass = 0; iClass < numClasses; ++iClass) {
1920 const TString rocaucCmp =
Form(
"%5.3f (%5.3f)", rocaucTest, rocaucTrain);
1921 const TString effB01Cmp =
Form(
"%5.3f (%5.3f)", effB01Test, effB01Train);
1922 const TString effB10Cmp =
Form(
"%5.3f (%5.3f)", effB10Test, effB10Train);
1923 const TString effB30Cmp =
Form(
"%5.3f (%5.3f)", effB30Test, effB30Train);
1924 row =
Form(
"%-15s%-15s%-15s%-15s%-15s%-15s",
"", className.
Data(), rocaucCmp.
Data(), effB01Cmp.
Data(),
1925 effB10Cmp.
Data(), effB30Cmp.
Data());
1926 Log() << kINFO << row <<
Endl;
1928 delete rocCurveTrain;
1929 delete rocCurveTest;
1934 Log() << kINFO << hLine <<
Endl;
1939 auto printMatrix = [](
TMatrixD const &matTraining,
TMatrixD const &matTesting, std::vector<TString> classnames,
1949 for (
UInt_t iCol = 0; iCol < numClasses; ++iCol) {
1950 header +=
Form(
" %-14s", classnames[iCol].Data());
1951 headerInfo +=
Form(
" %-14s",
" test (train)");
1953 stream << kINFO << header <<
Endl;
1954 stream << kINFO << headerInfo <<
Endl;
1956 for (
UInt_t iRow = 0; iRow < numClasses; ++iRow) {
1957 stream << kINFO <<
Form(
" %-14s", classnames[iRow].Data());
1959 for (
UInt_t iCol = 0; iCol < numClasses; ++iCol) {
1961 stream << kINFO <<
Form(
" %-14s",
"-");
1963 Double_t trainValue = matTraining[iRow][iCol];
1964 Double_t testValue = matTesting[iRow][iCol];
1965 TString entry =
Form(
"%-5.3f (%-5.3f)", testValue, trainValue);
1966 stream << kINFO <<
Form(
" %-14s", entry.
Data());
1969 stream << kINFO <<
Endl;
1974 Log() << kINFO <<
"Confusion matrices for all methods" <<
Endl;
1975 Log() << kINFO << hLine <<
Endl;
1977 Log() << kINFO <<
"Does a binary comparison between the two classes given by a " <<
Endl;
1978 Log() << kINFO <<
"particular row-column combination. In each case, the class " <<
Endl;
1979 Log() << kINFO <<
"given by the row is considered signal while the class given " <<
Endl;
1980 Log() << kINFO <<
"by the column index is considered background." <<
Endl;
1982 for (
UInt_t iMethod = 0; iMethod < methods->size(); ++iMethod) {
1984 if (theMethod ==
nullptr) {
1989 std::vector<TString> classnames;
1990 for (
UInt_t iCls = 0; iCls < numClasses; ++iCls) {
1994 <<
"=== Showing confusion matrix for method : " <<
Form(
"%-15s", (
const char *)mname[0][iMethod])
1996 Log() << kINFO <<
"(Signal Efficiency for Background Efficiency 0.01%)" <<
Endl;
1997 Log() << kINFO <<
"---------------------------------------------------" <<
Endl;
1998 printMatrix(multiclass_testConfusionEffB01[iMethod], multiclass_trainConfusionEffB01[iMethod], classnames,
2002 Log() << kINFO <<
"(Signal Efficiency for Background Efficiency 0.10%)" <<
Endl;
2003 Log() << kINFO <<
"---------------------------------------------------" <<
Endl;
2004 printMatrix(multiclass_testConfusionEffB10[iMethod], multiclass_trainConfusionEffB10[iMethod], classnames,
2008 Log() << kINFO <<
"(Signal Efficiency for Background Efficiency 0.30%)" <<
Endl;
2009 Log() << kINFO <<
"---------------------------------------------------" <<
Endl;
2010 printMatrix(multiclass_testConfusionEffB30[iMethod], multiclass_trainConfusionEffB30[iMethod], classnames,
2014 Log() << kINFO << hLine <<
Endl;
2020 Log().EnableOutput();
2023 TString hLine =
"------------------------------------------------------------------------------------------"
2024 "-------------------------";
2025 Log() << kINFO <<
"Evaluation results ranked by best signal efficiency and purity (area)" <<
Endl;
2026 Log() << kINFO << hLine <<
Endl;
2027 Log() << kINFO <<
"DataSet MVA " <<
Endl;
2028 Log() << kINFO <<
"Name: Method: ROC-integ" <<
Endl;
2033 Log() << kDEBUG << hLine <<
Endl;
2034 for (
Int_t k = 0; k < 2; k++) {
2035 if (k == 1 && nmeth_used[k] > 0) {
2036 Log() << kINFO << hLine <<
Endl;
2037 Log() << kINFO <<
"Input Variables: " <<
Endl << hLine <<
Endl;
2039 for (
Int_t i = 0; i < nmeth_used[k]; i++) {
2040 TString datasetName = itrMap->first;
2041 TString methodName = mname[k][i];
2048 if (theMethod == 0) {
2054 std::vector<Bool_t> *mvaResType =
2058 if (mvaResType->size() != 0) {
2059 rocIntegral = GetROCIntegral(datasetName, methodName);
2062 if (
sep[k][i] < 0 || sig[k][i] < 0) {
2064 Log() << kINFO <<
Form(
"%-13s %-15s: %#1.3f", datasetName.
Data(), methodName.
Data(), effArea[k][i])
2076 Log() << kINFO <<
Form(
"%-13s %-15s: %#1.3f", datasetName.
Data(), methodName.
Data(), rocIntegral)
2090 Log() << kINFO << hLine <<
Endl;
2092 Log() << kINFO <<
"Testing efficiency compared to training efficiency (overtraining check)" <<
Endl;
2093 Log() << kINFO << hLine <<
Endl;
2095 <<
"DataSet MVA Signal efficiency: from test sample (from training sample) "
2097 Log() << kINFO <<
"Name: Method: @B=0.01 @B=0.10 @B=0.30 "
2099 Log() << kINFO << hLine <<
Endl;
2100 for (
Int_t k = 0; k < 2; k++) {
2101 if (k == 1 && nmeth_used[k] > 0) {
2102 Log() << kINFO << hLine <<
Endl;
2103 Log() << kINFO <<
"Input Variables: " <<
Endl << hLine <<
Endl;
2105 for (
Int_t i = 0; i < nmeth_used[k]; i++) {
2106 if (k == 1) mname[k][i].ReplaceAll(
"Variable_",
"");
2108 if (theMethod == 0)
continue;
2110 Log() << kINFO <<
Form(
"%-20s %-15s: %#1.3f (%#1.3f) %#1.3f (%#1.3f) %#1.3f (%#1.3f)",
2112 trainEff01[k][i], eff10[k][i], trainEff10[k][i], eff30[k][i], trainEff30[k][i])
2116 Log() << kINFO << hLine <<
Endl;
2119 if (
gTools().CheckForSilentOption(GetOptions()))
Log().InhibitOutput();
2124 std::list<TString> datasets;
2125 for (
Int_t k=0; k<2; k++) {
2126 for (
Int_t i=0; i<nmeth_used[k]; i++) {
2128 if(theMethod==0)
continue;
2131 if(std::find(datasets.begin(), datasets.end(), theMethod->
fDataSetInfo.
GetName()) == datasets.end())
2150 fModelPersistence=
kFALSE;
2155 if(vitype==VIType::kShort)
2156 return EvaluateImportanceShort(loader,theMethod,methodTitle,theOption);
2157 else if(vitype==VIType::kAll)
2158 return EvaluateImportanceAll(loader,theMethod,methodTitle,theOption);
2159 else if(vitype==VIType::kRandom&&nbits>10)
2161 return EvaluateImportanceRandom(loader,
pow(2,nbits),theMethod,methodTitle,theOption);
2164 std::cerr<<
"Error in Variable Importance: Random mode require more that 10 variables in the dataset."<<std::endl;
2181 uint64_t range =
pow(2, nbits);
2184 std::vector<Double_t> importances(nbits);
2186 std::vector<Double_t> ROC(range);
2188 for (
int i = 0; i < nbits; i++)importances[i] = 0;
2191 for (
x = 1;
x <range ;
x++) {
2193 std::bitset<VIBITS> xbitset(
x);
2194 if (
x == 0)
continue;
2200 for (
int index = 0; index < nbits; index++) {
2201 if (xbitset[index]) seedloader->
AddVariable(varNames[index],
'F');
2208 BookMethod(seedloader, theMethod, methodTitle, theOption);
2213 EvaluateAllMethods();
2216 ROC[
x] = GetROCIntegral(xbitset.to_string(), methodTitle);
2223 this->DeleteAllMethods();
2225 fMethodsMap.clear();
2230 for (
x = 0;
x <range ;
x++)
2233 for (uint32_t i = 0; i <
VIBITS; ++i) {
2236 std::bitset<VIBITS> ybitset(
y);
2242 importances[ny] = SROC - 0.5;
2248 importances[ny] += SROC - SSROC;
2254 std::cout<<
"--- Variable Importance Results (All)"<<std::endl;
2255 return GetImportance(nbits,importances,varNames);
2258static long int sum(
long int i)
2261 for(
long int n=0;
n<i;
n++) _sum+=
pow(2,
n);
2276 long int range =
sum(nbits);
2279 std::vector<Double_t> importances(nbits);
2280 for (
int i = 0; i < nbits; i++)importances[i] = 0;
2286 std::bitset<VIBITS> xbitset(
x);
2287 if (
x == 0)
Log()<<kFATAL<<
"Error: need at least one variable.";
2294 for (
int index = 0; index < nbits; index++) {
2295 if (xbitset[index]) seedloader->
AddVariable(varNames[index],
'F');
2302 BookMethod(seedloader, theMethod, methodTitle, theOption);
2307 EvaluateAllMethods();
2310 SROC = GetROCIntegral(xbitset.to_string(), methodTitle);
2317 this->DeleteAllMethods();
2318 fMethodsMap.clear();
2322 for (uint32_t i = 0; i <
VIBITS; ++i) {
2325 std::bitset<VIBITS> ybitset(
y);
2331 importances[ny] = SROC - 0.5;
2338 for (
int index = 0; index < nbits; index++) {
2339 if (ybitset[index]) subseedloader->
AddVariable(varNames[index],
'F');
2346 BookMethod(subseedloader, theMethod, methodTitle, theOption);
2351 EvaluateAllMethods();
2354 SSROC = GetROCIntegral(ybitset.to_string(), methodTitle);
2355 importances[ny] += SROC - SSROC;
2361 delete subseedloader;
2362 this->DeleteAllMethods();
2363 fMethodsMap.clear();
2366 std::cout<<
"--- Variable Importance Results (Short)"<<std::endl;
2367 return GetImportance(nbits,importances,varNames);
2383 long int range =
pow(2, nbits);
2386 std::vector<Double_t> importances(nbits);
2388 for (
int i = 0; i < nbits; i++)importances[i] = 0;
2392 x = rangen -> Integer(range);
2394 std::bitset<32> xbitset(
x);
2395 if (
x == 0)
continue;
2402 for (
int index = 0; index < nbits; index++) {
2403 if (xbitset[index]) seedloader->
AddVariable(varNames[index],
'F');
2410 BookMethod(seedloader, theMethod, methodTitle, theOption);
2415 EvaluateAllMethods();
2418 SROC = GetROCIntegral(xbitset.to_string(), methodTitle);
2426 this->DeleteAllMethods();
2427 fMethodsMap.clear();
2431 for (uint32_t i = 0; i < 32; ++i) {
2434 std::bitset<32> ybitset(
y);
2440 importances[ny] = SROC - 0.5;
2441 importances_norm += importances[ny];
2449 for (
int index = 0; index < nbits; index++) {
2450 if (ybitset[index]) subseedloader->
AddVariable(varNames[index],
'F');
2457 BookMethod(subseedloader, theMethod, methodTitle, theOption);
2462 EvaluateAllMethods();
2465 SSROC = GetROCIntegral(ybitset.to_string(), methodTitle);
2466 importances[ny] += SROC - SSROC;
2472 delete subseedloader;
2473 this->DeleteAllMethods();
2474 fMethodsMap.clear();
2478 std::cout<<
"--- Variable Importance Results (Random)"<<std::endl;
2479 return GetImportance(nbits,importances,varNames);
2486 TH1F *vih1 =
new TH1F(
"vih1",
"", nbits, 0, nbits);
2491 for (
int i = 0; i < nbits; i++) {
2492 normalization = normalization + importances[i];
2502 for (
Int_t i = 1; i < nbits + 1; i++) {
2503 x_ie[i - 1] = (i - 1) * 1.;
2504 roc = 100.0 * importances[i - 1] / normalization;
2506 std::cout<<
"--- "<<varNames[i-1]<<
" = "<<roc<<
" %"<<std::endl;
double pow(double, double)
TMatrixT< Double_t > TMatrixD
char * Form(const char *fmt,...)
R__EXTERN TStyle * gStyle
R__EXTERN TSystem * gSystem
virtual void SetTitleOffset(Float_t offset=1)
Set distance between the axis and the axis title Offset is a correction factor with respect to the "s...
virtual void SetTitleSize(Float_t size=0.04)
Set size of axis title The size is expressed in per cent of the pad width.
virtual void SetFillColor(Color_t fcolor)
Set the fill area color.
virtual void SetBinLabel(Int_t bin, const char *label)
Set label for bin.
void CenterTitle(Bool_t center=kTRUE)
Center axis title.
virtual void SetRangeUser(Double_t ufirst, Double_t ulast)
Set the viewing range for the axis from ufirst to ulast (in user coordinates).
static Int_t GetColor(const char *hexcolor)
Static method returning color number for color specified by hex color string of form: "#rrggbb",...
A ROOT file is a suite of consecutive data records (TKey instances) with a well defined format.
A Graph is a graphics object made of two arrays X and Y with npoints each.
virtual void SetTitle(const char *title="")
Change (i.e.
1-D histogram with a float per channel (see TH1 documentation)}
virtual void SetDirectory(TDirectory *dir)
By default when an histogram is created, it is added to the list of histogram objects in the current ...
virtual void SetTitle(const char *title)
See GetStatOverflows for more information.
virtual void LabelsOption(Option_t *option="h", Option_t *axis="X")
Set option(s) to draw axis with labels.
static void AddDirectory(Bool_t add=kTRUE)
Sets the flag controlling the automatic add of histograms in memory.
TAxis * GetXaxis()
Get the behaviour adopted by the object about the statoverflows. See EStatOverflows for more informat...
virtual void SetBarWidth(Float_t width=0.5)
virtual void SetBinContent(Int_t bin, Double_t content)
Set bin content see convention for numbering bins in TH1::GetBin In case the bin number is greater th...
Service class for 2-Dim histogram classes.
IMethod * Create(const std::string &name, const TString &job, const TString &title, DataSetInfo &dsi, const TString &option)
creates the method if needed based on the method name using the creator function the factory has stor...
static ClassifierFactory & Instance()
access to the ClassifierFactory singleton creates the instance if needed
void SetDrawProgressBar(Bool_t d)
void SetUseColor(Bool_t uc)
class TMVA::Config::VariablePlotting fVariablePlotting
void SetConfigDescription(const char *d)
OptionBase * DeclareOptionRef(T &ref, const TString &name, const TString &desc="")
void AddPreDefVal(const T &)
void SetConfigName(const char *n)
virtual void ParseOptions()
options parser
const TString & GetOptions() const
void CheckForUnusedOptions() const
checks for unused options in option string
void PrepareTrainingAndTestTree(const TCut &cut, const TString &splitOpt)
prepare the training and test trees -> same cuts for signal and background
DataSetInfo & GetDataSetInfo()
void AddVariable(const TString &expression, const TString &title, const TString &unit, char type='F', Double_t min=0, Double_t max=0)
user inserts discriminating variable in data set info
Class that contains all the data information.
UInt_t GetNVariables() const
virtual const char * GetName() const
Returns name of object.
const TMatrixD * CorrelationMatrix(const TString &className) const
UInt_t GetNClasses() const
const TString & GetSplitOptions() const
UInt_t GetNTargets() const
DataSet * GetDataSet() const
returns data set
TH2 * CreateCorrelationMatrixHist(const TMatrixD *m, const TString &hName, const TString &hTitle) const
std::vector< TString > GetListOfVariables() const
returns list of variables
ClassInfo * GetClassInfo(Int_t clNum) const
const TCut & GetCut(Int_t i) const
VariableInfo & GetVariableInfo(Int_t i)
Bool_t IsSignal(const Event *ev) const
DataSetManager * GetDataSetManager()
DataInputHandler & DataInput()
Class that contains all the data information.
Long64_t GetNEvtSigTest()
return number of signal test events in dataset
TTree * GetTree(Types::ETreeType type)
create the test/trainings tree with all the variables, the weights, the classes, the targets,...
const Event * GetEvent() const
Long64_t GetNEvents(Types::ETreeType type=Types::kMaxTreeType) const
Results * GetResults(const TString &, Types::ETreeType type, Types::EAnalysisType analysistype)
Long64_t GetNTrainingEvents() const
void SetCurrentType(Types::ETreeType type) const
const std::vector< Event * > & GetEventCollection(Types::ETreeType type=Types::kMaxTreeType) const
Long64_t GetNEvtBkgdTest()
return number of background test events in dataset
Float_t GetValue(UInt_t ivar) const
return value of i'th variable
static void SetIsTraining(Bool_t)
when this static function is called, it sets the flag whether events with negative event weight shoul...
This is the main MVA steering class.
void PrintHelpMessage(const TString &datasetname, const TString &methodTitle="") const
Print predefined help message of classifier.
Double_t GetROCIntegral(DataLoader *loader, TString theMethodName, UInt_t iClass=0)
Calculate the integral of the ROC curve, also known as the area under curve (AUC),...
Bool_t fCorrelations
verbosity level, controls granularity of logging
std::vector< IMethod * > MVector
void TrainAllMethods()
Iterates through all booked methods and calls training.
Bool_t Verbose(void) const
void WriteDataInformation(DataSetInfo &fDataSetInfo)
MethodBase * BookMethod(DataLoader *loader, TString theMethodName, TString methodTitle, TString theOption="")
Book a classifier or regression method.
Factory(TString theJobName, TFile *theTargetFile, TString theOption="")
Standard constructor.
void TestAllMethods()
Evaluates all booked methods on the testing data and adds the output to the Results in the corresponi...
Bool_t fVerbose
list of transformations to test
void EvaluateAllMethods(void)
Iterates over all MVAs that have been booked, and calls their evaluation methods.
TH1F * EvaluateImportanceRandom(DataLoader *loader, UInt_t nseeds, Types::EMVA theMethod, TString methodTitle, const char *theOption="")
TH1F * GetImportance(const int nbits, std::vector< Double_t > importances, std::vector< TString > varNames)
Bool_t fROC
enable to calculate corelations
void EvaluateAllVariables(DataLoader *loader, TString options="")
Iterates over all MVA input variables and evaluates them.
TString fVerboseLevel
verbose mode
TH1F * EvaluateImportance(DataLoader *loader, VIType vitype, Types::EMVA theMethod, TString methodTitle, const char *theOption="")
Evaluate Variable Importance.
virtual const char * GetName() const
Returns name of object.
virtual ~Factory()
Destructor.
TGraph * GetROCCurve(DataLoader *loader, TString theMethodName, Bool_t setTitles=kTRUE, UInt_t iClass=0)
Argument iClass specifies the class to generate the ROC curve in a multiclass setting.
virtual void MakeClass(const TString &datasetname, const TString &methodTitle="") const
Bool_t IsModelPersistence()
MethodBase * BookMethodWeightfile(DataLoader *dataloader, TMVA::Types::EMVA methodType, const TString &weightfile)
Adds an already constructed method to be managed by this factory.
Bool_t fModelPersistence
the training type
std::map< TString, Double_t > OptimizeAllMethods(TString fomType="ROCIntegral", TString fitType="FitGA")
Iterates through all booked methods and sees if they use parameter tuning and if so.
ROCCurve * GetROC(DataLoader *loader, TString theMethodName, UInt_t iClass=0, Types::ETreeType type=Types::kTesting)
Private method to generate a ROCCurve instance for a given method.
TH1F * EvaluateImportanceShort(DataLoader *loader, Types::EMVA theMethod, TString methodTitle, const char *theOption="")
Types::EAnalysisType fAnalysisType
jobname, used as extension in weight file names
Bool_t HasMethod(const TString &datasetname, const TString &title) const
Checks whether a given method name is defined for a given dataset.
TH1F * EvaluateImportanceAll(DataLoader *loader, Types::EMVA theMethod, TString methodTitle, const char *theOption="")
void SetVerbose(Bool_t v=kTRUE)
IMethod * GetMethod(const TString &datasetname, const TString &title) const
Returns pointer to MVA that corresponds to given method title.
void DeleteAllMethods(void)
Delete methods.
TString fTransformations
option string given by construction (presently only "V")
void Greetings()
Print welcome message.
TMultiGraph * GetROCCurveAsMultiGraph(DataLoader *loader, UInt_t iClass)
Generate a collection of graphs, for all methods for a given class.
Interface for all concrete MVA method implementations.
virtual void PrintHelpMessage() const =0
virtual Bool_t HasAnalysisType(Types::EAnalysisType type, UInt_t numberClasses, UInt_t numberTargets)=0
virtual void MakeClass(const TString &classFileName=TString("")) const =0
Virtual base Class for all MVA method.
virtual Double_t GetSeparation(TH1 *, TH1 *) const
compute "separation" defined as
void SetSilentFile(Bool_t status)
void SetWeightFileDir(TString fileDir)
set directory of weight file
virtual void TestRegression(Double_t &bias, Double_t &biasT, Double_t &dev, Double_t &devT, Double_t &rms, Double_t &rmsT, Double_t &mInf, Double_t &mInfT, Double_t &corr, Types::ETreeType type)
calculate <sum-of-deviation-squared> of regression output versus "true" value from test sample
virtual void DeclareCompatibilityOptions()
options that are used ONLY for the READER to ensure backward compatibility they are hence without any...
TString GetMethodTypeName() const
Bool_t DoMulticlass() const
virtual Double_t GetSignificance() const
compute significance of mean difference
const char * GetName() const
Types::EAnalysisType GetAnalysisType() const
virtual TMatrixD GetMulticlassConfusionMatrix(Double_t effB, Types::ETreeType type)
Construct a confusion matrix for a multiclass classifier.
void PrintHelpMessage() const
prints out method-specific help method
virtual void WriteEvaluationHistosToFile(Types::ETreeType treetype)
writes all MVA evaluation histograms to file
virtual void TestMulticlass()
test multiclass classification
void SetupMethod()
setup of methods
virtual Double_t GetEfficiency(const TString &, Types::ETreeType, Double_t &err)
fill background efficiency (resp.
virtual void SetAnalysisType(Types::EAnalysisType type)
const TString & GetMethodName() const
Bool_t DoRegression() const
void ProcessSetup()
process all options the "CheckForUnusedOptions" is done in an independent call, since it may be overr...
virtual Double_t GetTrainingEfficiency(const TString &)
DataSetInfo & DataInfo() const
virtual void MakeClass(const TString &classFileName=TString("")) const
create reader class for method (classification only at present)
virtual void TestClassification()
initialization
void AddOutput(Types::ETreeType type, Types::EAnalysisType analysisType)
void ReadStateFromFile()
Function to write options and weights to file.
virtual std::map< TString, Double_t > OptimizeTuningParameters(TString fomType="ROCIntegral", TString fitType="FitGA")
call the Optimizer with the set of parameters and ranges that are meant to be tuned.
DataSetInfo & fDataSetInfo
Types::EMVA GetMethodType() const
void SetFile(TFile *file)
void SetModelPersistence(Bool_t status)
virtual Double_t GetROCIntegral(TH1D *histS, TH1D *histB) const
calculate the area (integral) under the ROC curve as a overall quality measure of the classification
virtual void CheckSetup()
check may be overridden by derived class (sometimes, eg, fitters are used which can only be implement...
Class for boosting a TMVA method.
void SetBoostedMethodName(TString methodName)
DataSetManager * fDataSetManager
Class for categorizing the phase space.
DataSetManager * fDataSetManager
ostringstream derivative to redirect and format output
void SetMinType(EMsgType minType)
void SetSource(const std::string &source)
static void InhibitOutput()
Double_t GetEffSForEffB(Double_t effB, const UInt_t num_points=41)
Calculate the signal efficiency (sensitivity) for a given background efficiency (sensitivity).
Double_t GetROCIntegral(const UInt_t points=41)
Calculates the ROC integral (AUC)
TGraph * GetROCCurve(const UInt_t points=100)
Returns a new TGraph containing the ROC curve.
Ranking for variables in method (implementation)
virtual void Print() const
get maximum length of variable names
Class that is the base-class for a vector of result.
Class which takes the results of a multiclass classification.
Class that is the base-class for a vector of result.
Singleton class for Global types used by TMVA.
static Types & Instance()
the the single instance of "Types" if existing already, or create it (Singleton)
const TString & GetLabel() const
A TMultiGraph is a collection of TGraph (or derived) objects.
TList * GetListOfGraphs() const
virtual void Add(TGraph *graph, Option_t *chopt="")
Add a new graph to the list of graphs.
TH1F * GetHistogram()
Returns a pointer to the histogram used to draw the axis.
virtual void Draw(Option_t *chopt="")
Draw this multigraph with its current attributes.
TAxis * GetYaxis()
Get y axis of the graph.
TAxis * GetXaxis()
Get x axis of the graph.
virtual void SetTitle(const char *title="")
Set the title of the TNamed.
virtual TObject * Clone(const char *newname="") const
Make a clone of an object using the Streamer facility.
virtual const char * GetName() const
Returns name of object.
@ kOverwrite
overwrite existing object with same name
virtual TLegend * BuildLegend(Double_t x1=0.3, Double_t y1=0.21, Double_t x2=0.3, Double_t y2=0.21, const char *title="", Option_t *option="")
Build a legend from the graphical objects in the pad.
virtual void SetGrid(Int_t valuex=1, Int_t valuey=1)
Principal Components Analysis (PCA)
virtual void AddRow(const Double_t *x)
Add a data point and update the covariance matrix.
const TMatrixD * GetCovarianceMatrix() const
virtual void MakePrincipals()
Perform the principal components analysis.
Random number generator class based on M.
void ToLower()
Change string to lower-case.
int CompareTo(const char *cs, ECaseCompare cmp=kExact) const
Compare a string to char *cs2.
const char * Data() const
TString & ReplaceAll(const TString &s1, const TString &s2)
Bool_t BeginsWith(const char *s, ECaseCompare cmp=kExact) const
Bool_t Contains(const char *pat, ECaseCompare cmp=kExact) const
void SetOptStat(Int_t stat=1)
The type of information printed in the histogram statistics box can be selected via the parameter mod...
void SetTitleXOffset(Float_t offset=1)
virtual int MakeDirectory(const char *name)
Make a directory.
virtual Int_t Write(const char *name=0, Int_t option=0, Int_t bufsize=0)
Write this object to the current directory.
std::string GetMethodName(TCppMethod_t)
TCppMethod_t GetMethod(TCppScope_t scope, TCppIndex_t imeth)
static constexpr double s
void DataLoaderCopy(TMVA::DataLoader *des, TMVA::DataLoader *src)
void CreateVariableTransforms(const TString &trafoDefinition, TMVA::DataSetInfo &dataInfo, TMVA::TransformationHandler &transformationHandler, TMVA::MsgLogger &log)
MsgLogger & Endl(MsgLogger &ml)
static long int sum(long int i)
const Int_t MinNoTrainingEvents