102 #define READXML kTRUE 118 fTransformations (
"I" ),
123 fJobName ( jobName ),
124 fAnalysisType (
Types::kClassification ),
125 fModelPersistence (
kTRUE)
151 DeclareOptionRef( color,
"Color",
"Flag for coloured screen output (default: True, if in batch mode: False)" );
152 DeclareOptionRef(
fTransformations,
"Transformations",
"List of transformations to test; formatting example: \"Transformations=I;D;P;U;G,D\", for identity, decorrelation, PCA, Uniform and Gaussianisation followed by decorrelation transformations" );
155 DeclareOptionRef( silent,
"Silent",
"Batch mode: boolean silent flag inhibiting any output from TMVA after the creation of the factory class object (default: False)" );
157 "DrawProgressBar",
"Draw progress bar to display training, testing and evaluation schedule (default: True)" );
160 "Option to save the trained model in xml file or using serialization");
164 "AnalysisType",
"Set the analysis type (Classification, Regression, Multiclass, Auto) (default: Auto)" );
227 DeclareOptionRef( color,
"Color",
"Flag for coloured screen output (default: True, if in batch mode: False)" );
228 DeclareOptionRef(
fTransformations,
"Transformations",
"List of transformations to test; formatting example: \"Transformations=I;D;P;U;G,D\", for identity, decorrelation, PCA, Uniform and Gaussianisation followed by decorrelation transformations" );
231 DeclareOptionRef( silent,
"Silent",
"Batch mode: boolean silent flag inhibiting any output from TMVA after the creation of the factory class object (default: False)" );
233 "DrawProgressBar",
"Draw progress bar to display training, testing and evaluation schedule (default: True)" );
236 "Option to save the trained model in xml file or using serialization");
240 "AnalysisType",
"Set the analysis type (Classification, Regression, Multiclass, Auto) (default: Auto)" );
295 std::vector<TMVA::VariableTransformBase*>::iterator trfIt =
fDefaultTrfs.begin();
296 for (;trfIt !=
fDefaultTrfs.end(); trfIt++)
delete (*trfIt);
313 std::map<TString,MVector*>::iterator itrMap;
317 MVector *methods=itrMap->second;
319 MVector::iterator itrMethod = methods->begin();
320 for (; itrMethod != methods->end(); itrMethod++) {
321 Log() <<
kDEBUG <<
"Delete method: " << (*itrMethod)->GetName() <<
Endl;
362 if (
GetMethod( datasetname,methodTitle ) != 0) {
363 Log() <<
kFATAL <<
"Booking failed since method with title <" 364 << methodTitle <<
"> already exists "<<
"in with DataSet Name <"<< loader->
GetName()<<
"> " 378 "Number of times the classifier will be boosted" );
398 Log() <<
kDEBUG <<
"Boost Number is " << boostNum <<
" > 0: train boosted classifier" <<
Endl;
406 Log() <<
kFATAL <<
"Method with type kBoost cannot be casted to MethodCategory. /Factory" <<
Endl;
417 if (method==0)
return 0;
423 Log() <<
kFATAL <<
"Method with type kCategory cannot be casted to MethodCategory. /Factory" <<
Endl;
489 MVector::const_iterator itrMethod;
491 for (itrMethod = methods->begin(); itrMethod != methods->end(); itrMethod++) {
556 std::vector<TMVA::TransformationHandler*> trfs;
560 std::vector<TString>::iterator trfsDefIt = trfsDef.begin();
561 for (; trfsDefIt!=trfsDef.end(); trfsDefIt++) {
566 Log() <<
kDEBUG <<
"current transformation string: '" << trfS.
Data() <<
"'" <<
Endl;
572 if (trfS.
BeginsWith(
'I')) identityTrHandler = trfs.back();
578 std::vector<TMVA::TransformationHandler*>::iterator trfIt = trfs.begin();
580 for (;trfIt != trfs.end(); trfIt++) {
583 (*trfIt)->CalcTransformations(inputEvents);
588 for (trfIt = trfs.begin(); trfIt != trfs.end(); trfIt++)
delete *trfIt;
600 std::map<TString,MVector*>::iterator itrMap;
601 std::map<TString,Double_t> TunedParameters;
604 MVector *methods=itrMap->second;
606 MVector::iterator itrMethod;
609 for( itrMethod = methods->begin(); itrMethod != methods->end(); itrMethod++ ) {
613 Log() <<
kFATAL <<
"Dynamic cast to MethodBase failed" <<
Endl;
614 return TunedParameters;
619 <<
" not trained (training tree has less entries [" 630 Log() <<
kINFO <<
"Optimization of tuning paremters finished for Method:"<<mva->
GetName() <<
Endl;
634 return TunedParameters;
652 MVector::iterator itrMethod = methods->begin();
654 while (itrMethod != methods->end()) {
676 std::vector<Bool_t> *mvaResType =
dynamic_cast<ResultsClassification *
>(results)->GetValueVectorTypes();
679 if (!fROCCurve)
Log() <<
kFATAL <<
Form(
"ROCCurve object was not created in Method = %s not found with Dataset = %s ", theMethodName.
Data(), datasetname.
Data()) <<
Endl;
700 MVector::iterator itrMethod = methods->begin();
702 while (itrMethod != methods->end()) {
724 std::vector<Bool_t> *mvaResType =
dynamic_cast<ResultsClassification *
>(results)->GetValueVectorTypes();
727 if (!fROCCurve)
Log() <<
kFATAL <<
Form(
"ROCCurve object was not created in Method = %s not found with Dataset = %s ", theMethodName.
Data(), datasetname.
Data()) <<
Endl;
732 fGraph->GetYaxis()->SetTitle(
"Background Rejection");
733 fGraph->GetXaxis()->SetTitle(
"Signal Efficiency");
734 fGraph->SetTitle(
Form(
"Background Rejection vs. Signal Efficiency (%s)",method->
GetMethodName().
Data()));
763 TLegend *fLegend =
new TLegend(0.15, 0.15, 0.35, 0.3,
"MVA Method");
768 MVector::iterator itr = methods->begin();
770 while (itr != methods->end()) {
782 std::vector<Float_t> *mvaRes =
784 std::vector<Bool_t> *mvaResType =
796 fGraph->GetYaxis()->SetTitle(
"Background Rejection");
797 fGraph->GetXaxis()->SetTitle(
"Signal Efficiency");
798 fGraph->SetTitle(
"Background Rejection vs. Signal Efficiency");
804 fGraph->SetLineWidth(2);
805 fGraph->SetLineColor(++line_color);
836 Log() <<
kDEBUG <<
"Train all methods for " 840 std::map<TString,MVector*>::iterator itrMap;
844 MVector *methods=itrMap->second;
845 MVector::iterator itrMethod;
848 for( itrMethod = methods->begin(); itrMethod != methods->end(); itrMethod++ ) {
855 Log() <<
kFATAL <<
"No input data for the training provided!" <<
Endl;
859 Log() <<
kFATAL <<
"You want to do regression training without specifying a target." <<
Endl;
862 Log() <<
kFATAL <<
"You want to do classification training, but specified less than two classes." <<
Endl;
870 <<
" not trained (training tree has less entries [" 887 Log() <<
kINFO <<
"Ranking input variables (method specific)..." <<
Endl;
888 for (itrMethod = methods->begin(); itrMethod != methods->end(); itrMethod++) {
893 const Ranking* ranking = (*itrMethod)->CreateRanking();
894 if (ranking != 0) ranking->
Print();
895 else Log() <<
kINFO <<
"No variable ranking supplied by classifier: " 907 Log() <<
kHEADER <<
"=== Destroy and recreate all methods via weight files for testing ===" <<
Endl <<
Endl;
912 for (
UInt_t i=0; i<methods->size(); i++) {
930 dataSetInfo, weightfile ) );
933 if( !methCat )
Log() <<
kFATAL <<
"Method with type kCategory cannot be casted to MethodCategory. /Factory" <<
Endl;
967 std::map<TString,MVector*>::iterator itrMap;
971 MVector *methods=itrMap->second;
972 MVector::iterator itrMethod;
975 for( itrMethod = methods->begin(); itrMethod != methods->end(); itrMethod++ ) {
991 if (methodTitle !=
"") {
995 Log() <<
kWARNING <<
"<MakeClass> Could not find classifier \"" << methodTitle
996 <<
"\" in list" <<
Endl;
1003 MVector::const_iterator itrMethod;
1004 for (itrMethod = methods->begin(); itrMethod != methods->end(); itrMethod++) {
1006 if(method==0)
continue;
1019 if (methodTitle !=
"") {
1023 Log() <<
kWARNING <<
"<PrintHelpMessage> Could not find classifier \"" << methodTitle
1024 <<
"\" in list" <<
Endl;
1031 MVector::const_iterator itrMethod ;
1032 for (itrMethod = methods->begin(); itrMethod != methods->end(); itrMethod++) {
1034 if(method==0)
continue;
1046 Log() <<
kINFO <<
"Evaluating all variables..." <<
Endl;
1051 if (options.
Contains(
"V")) s +=
":V";
1065 Log() <<
kINFO <<
"...nothing found to evaluate" <<
Endl;
1068 std::map<TString,MVector*>::iterator itrMap;
1072 MVector *methods=itrMap->second;
1082 Int_t nmeth_used[2] = {0,0};
1084 std::vector<std::vector<TString> > mname(2);
1085 std::vector<std::vector<Double_t> > sig(2),
sep(2), roc(2);
1086 std::vector<std::vector<Double_t> > eff01(2), eff10(2), eff30(2), effArea(2);
1087 std::vector<std::vector<Double_t> > eff01err(2), eff10err(2), eff30err(2);
1088 std::vector<std::vector<Double_t> > trainEff01(2), trainEff10(2), trainEff30(2);
1090 std::vector<std::vector<Float_t> > multiclass_testEff;
1091 std::vector<std::vector<Float_t> > multiclass_trainEff;
1092 std::vector<std::vector<Float_t> > multiclass_testPur;
1093 std::vector<std::vector<Float_t> > multiclass_trainPur;
1095 std::vector<std::vector<Double_t> > biastrain(1);
1096 std::vector<std::vector<Double_t> > biastest(1);
1097 std::vector<std::vector<Double_t> > devtrain(1);
1098 std::vector<std::vector<Double_t> > devtest(1);
1099 std::vector<std::vector<Double_t> > rmstrain(1);
1100 std::vector<std::vector<Double_t> > rmstest(1);
1101 std::vector<std::vector<Double_t> > minftrain(1);
1102 std::vector<std::vector<Double_t> > minftest(1);
1103 std::vector<std::vector<Double_t> > rhotrain(1);
1104 std::vector<std::vector<Double_t> > rhotest(1);
1107 std::vector<std::vector<Double_t> > biastrainT(1);
1108 std::vector<std::vector<Double_t> > biastestT(1);
1109 std::vector<std::vector<Double_t> > devtrainT(1);
1110 std::vector<std::vector<Double_t> > devtestT(1);
1111 std::vector<std::vector<Double_t> > rmstrainT(1);
1112 std::vector<std::vector<Double_t> > rmstestT(1);
1113 std::vector<std::vector<Double_t> > minftrainT(1);
1114 std::vector<std::vector<Double_t> > minftestT(1);
1123 for (MVector::iterator itrMethod =methods->begin(); itrMethod != methods->end(); itrMethod++) {
1126 if(theMethod==0)
continue;
1132 doRegression =
kTRUE;
1140 biastest[0] .push_back( bias );
1141 devtest[0] .push_back( dev );
1142 rmstest[0] .push_back( rms );
1143 minftest[0] .push_back( mInf );
1144 rhotest[0] .push_back( rho );
1145 biastestT[0] .push_back( biasT );
1146 devtestT[0] .push_back( devT );
1147 rmstestT[0] .push_back( rmsT );
1148 minftestT[0] .push_back( mInfT );
1151 biastrain[0] .push_back( bias );
1152 devtrain[0] .push_back( dev );
1153 rmstrain[0] .push_back( rms );
1154 minftrain[0] .push_back( mInf );
1155 rhotrain[0] .push_back( rho );
1156 biastrainT[0].push_back( biasT );
1157 devtrainT[0] .push_back( devT );
1158 rmstrainT[0] .push_back( rmsT );
1159 minftrainT[0].push_back( mInfT );
1165 Log() <<
kDEBUG <<
"\tWrite evaluation histograms to file" <<
Endl;
1171 doMulticlass =
kTRUE;
1175 Log() <<
kDEBUG <<
"\tWrite evaluation histograms to file" <<
Endl;
1202 eff01err[isel].push_back( err );
1204 eff10err[isel].push_back( err );
1206 eff30err[isel].push_back( err );
1217 Log() <<
kDEBUG <<
"\tWrite evaluation histograms to file" <<
Endl;
1225 std::vector<TString> vtemps = mname[0];
1226 std::vector< std::vector<Double_t> > vtmp;
1227 vtmp.push_back( devtest[0] );
1228 vtmp.push_back( devtrain[0] );
1229 vtmp.push_back( biastest[0] );
1230 vtmp.push_back( biastrain[0] );
1231 vtmp.push_back( rmstest[0] );
1232 vtmp.push_back( rmstrain[0] );
1233 vtmp.push_back( minftest[0] );
1234 vtmp.push_back( minftrain[0] );
1235 vtmp.push_back( rhotest[0] );
1236 vtmp.push_back( rhotrain[0] );
1237 vtmp.push_back( devtestT[0] );
1238 vtmp.push_back( devtrainT[0] );
1239 vtmp.push_back( biastestT[0] );
1240 vtmp.push_back( biastrainT[0]);
1241 vtmp.push_back( rmstestT[0] );
1242 vtmp.push_back( rmstrainT[0] );
1243 vtmp.push_back( minftestT[0] );
1244 vtmp.push_back( minftrainT[0]);
1247 devtest[0] = vtmp[0];
1248 devtrain[0] = vtmp[1];
1249 biastest[0] = vtmp[2];
1250 biastrain[0] = vtmp[3];
1251 rmstest[0] = vtmp[4];
1252 rmstrain[0] = vtmp[5];
1253 minftest[0] = vtmp[6];
1254 minftrain[0] = vtmp[7];
1255 rhotest[0] = vtmp[8];
1256 rhotrain[0] = vtmp[9];
1257 devtestT[0] = vtmp[10];
1258 devtrainT[0] = vtmp[11];
1259 biastestT[0] = vtmp[12];
1260 biastrainT[0] = vtmp[13];
1261 rmstestT[0] = vtmp[14];
1262 rmstrainT[0] = vtmp[15];
1263 minftestT[0] = vtmp[16];
1264 minftrainT[0] = vtmp[17];
1266 else if (doMulticlass) {
1272 for (
Int_t k=0; k<2; k++) {
1273 std::vector< std::vector<Double_t> > vtemp;
1274 vtemp.push_back( effArea[k] );
1275 vtemp.push_back( eff10[k] );
1276 vtemp.push_back( eff01[k] );
1277 vtemp.push_back( eff30[k] );
1278 vtemp.push_back( eff10err[k] );
1279 vtemp.push_back( eff01err[k] );
1280 vtemp.push_back( eff30err[k] );
1281 vtemp.push_back( trainEff10[k] );
1282 vtemp.push_back( trainEff01[k] );
1283 vtemp.push_back( trainEff30[k] );
1284 vtemp.push_back( sig[k] );
1285 vtemp.push_back(
sep[k] );
1286 vtemp.push_back( roc[k] );
1287 std::vector<TString> vtemps = mname[k];
1289 effArea[k] = vtemp[0];
1290 eff10[k] = vtemp[1];
1291 eff01[k] = vtemp[2];
1292 eff30[k] = vtemp[3];
1293 eff10err[k] = vtemp[4];
1294 eff01err[k] = vtemp[5];
1295 eff30err[k] = vtemp[6];
1296 trainEff10[k] = vtemp[7];
1297 trainEff01[k] = vtemp[8];
1298 trainEff30[k] = vtemp[9];
1314 const Int_t nmeth = methodsNoCuts.size();
1317 if (!doRegression && !doMulticlass ) {
1323 std::vector<Double_t> rvec;
1331 std::vector<TString>* theVars =
new std::vector<TString>;
1332 std::vector<ResultsClassification*> mvaRes;
1333 for (MVector::iterator itrMethod = methodsNoCuts.begin(); itrMethod != methodsNoCuts.end(); itrMethod++, ivar++) {
1338 theVars->back().ReplaceAll(
"MVA_",
"" );
1358 for (
Int_t im=0; im<nmeth; im++) {
1362 Log() <<
kWARNING <<
"Found NaN return value in event: " << ievt
1363 <<
" for method \"" << methodsNoCuts[im]->GetName() <<
"\"" <<
Endl;
1366 else dvec[im] = retval;
1370 else { tpBkg->
AddRow( dvec ); theMat = overlapB; }
1373 for (
Int_t im=0; im<nmeth; im++) {
1374 for (
Int_t jm=im; jm<nmeth; jm++) {
1375 if ((dvec[im] - rvec[im])*(dvec[jm] - rvec[jm]) > 0) {
1377 if (im != jm) (*theMat)(jm,im)++;
1397 if (corrMatS != 0 && corrMatB != 0) {
1402 for (
Int_t im=0; im<nmeth; im++) {
1403 for (
Int_t jm=0; jm<nmeth; jm++) {
1404 mvaMatS(im,jm) = (*corrMatS)(im,jm);
1405 mvaMatB(im,jm) = (*corrMatB)(im,jm);
1410 std::vector<TString> theInputVars;
1413 for (
Int_t iv=0; iv<nvar; iv++) {
1415 for (
Int_t jm=0; jm<nmeth; jm++) {
1416 varmvaMatS(iv,jm) = (*corrMatS)(nmeth+iv,jm);
1417 varmvaMatB(iv,jm) = (*corrMatB)(nmeth+iv,jm);
1436 Log() <<
kINFO <<
Form(
"Dataset[%s] : ",method->
fDataSetInfo.
GetName())<<
"Correlations between input variables and MVA response (background):" << Endl;
1450 if (nmeth != (
Int_t)methods->size())
1483 TString hLine =
"--------------------------------------------------------------------------------------------------";
1484 Log() <<
kINFO <<
"Evaluation results ranked by smallest RMS on test sample:" <<
Endl;
1485 Log() <<
kINFO <<
"(\"Bias\" quotes the mean deviation of the regression from true target." <<
Endl;
1486 Log() <<
kINFO <<
" \"MutInf\" is the \"Mutual Information\" between regression and target." <<
Endl;
1487 Log() <<
kINFO <<
" Indicated by \"_T\" are the corresponding \"truncated\" quantities ob-" <<
Endl;
1488 Log() <<
kINFO <<
" tained when removing events deviating more than 2sigma from average.)" <<
Endl;
1493 for (
Int_t i=0; i<nmeth_used[0]; i++) {
1495 if(theMethod==0)
continue;
1497 Log() <<
kINFO <<
Form(
"%-20s %-15s:%#9.3g%#9.3g%#9.3g%#9.3g | %#5.3f %#5.3f",
1499 (
const char*)mname[0][i],
1500 biastest[0][i], biastestT[0][i],
1501 rmstest[0][i], rmstestT[0][i],
1502 minftest[0][i], minftestT[0][i] )
1507 Log() <<
kINFO <<
"Evaluation results ranked by smallest RMS on training sample:" <<
Endl;
1510 Log() <<
kINFO <<
"DataSet Name: MVA Method: <Bias> <Bias_T> RMS RMS_T | MutInf MutInf_T" <<
Endl;
1513 for (
Int_t i=0; i<nmeth_used[0]; i++) {
1515 if(theMethod==0)
continue;
1516 Log() <<
kINFO <<
Form(
"%-20s %-15s:%#9.3g%#9.3g%#9.3g%#9.3g | %#5.3f %#5.3f",
1518 (
const char*)mname[0][i],
1519 biastrain[0][i], biastrainT[0][i],
1520 rmstrain[0][i], rmstrainT[0][i],
1521 minftrain[0][i], minftrainT[0][i] )
1527 else if( doMulticlass ){
1529 TString hLine =
"-------------------------------------------------------------------------------------------------------";
1530 Log() <<
kINFO <<
"Evaluation results ranked by best signal efficiency times signal purity " <<
Endl;
1533 for (MVector::iterator itrMethod = methods->begin(); itrMethod != methods->end(); itrMethod++) {
1535 if(theMethod==0)
continue;
1537 TString header=
"DataSet Name MVA Method ";
1543 for (
Int_t i=0; i<nmeth_used[0]; i++) {
1546 res +=
Form(
"%#1.3f ",(multiclass_testEff[i][icls])*(multiclass_testPur[i][icls]));
1560 TString hLine =
"-------------------------------------------------------------------------------------------------------------------";
1561 Log() <<
kINFO <<
"Evaluation results ranked by best signal efficiency and purity (area)" <<
Endl;
1569 for (
Int_t k=0; k<2; k++) {
1570 if (k == 1 && nmeth_used[k] > 0) {
1572 Log() <<
kINFO <<
"Input Variables: " << Endl << hLine <<
Endl;
1574 for (
Int_t i=0; i<nmeth_used[k]; i++) {
1575 if (k == 1) mname[k][i].ReplaceAll(
"Variable_",
"" );
1578 if(theMethod==0)
continue;
1581 std::vector<Bool_t> *mvaResType =
dynamic_cast<ResultsClassification *
>(results)->GetValueVectorTypes();
1584 if (mvaResType->size() != 0) {
1589 if (
sep[k][i] < 0 || sig[k][i] < 0) {
1592 itrMap->first.Data(),
1593 (
const char*)mname[k][i],
1594 effArea[k][i]) <<
Endl;
1606 itrMap->first.Data(),
1607 (
const char*)mname[k][i],
1608 fROCalcValue) <<
Endl;
1618 if (fROCCurve)
delete fROCCurve;
1623 Log() <<
kINFO <<
"Testing efficiency compared to training efficiency (overtraining check)" <<
Endl;
1625 Log() <<
kINFO <<
"DataSet MVA Signal efficiency: from test sample (from training sample) " <<
Endl;
1626 Log() <<
kINFO <<
"Name: Method: @B=0.01 @B=0.10 @B=0.30 " <<
Endl;
1628 for (
Int_t k=0; k<2; k++) {
1629 if (k == 1 && nmeth_used[k] > 0) {
1631 Log() <<
kINFO <<
"Input Variables: " << Endl << hLine <<
Endl;
1633 for (
Int_t i=0; i<nmeth_used[k]; i++) {
1634 if (k == 1) mname[k][i].ReplaceAll(
"Variable_",
"" );
1636 if(theMethod==0)
continue;
1638 Log() <<
kINFO <<
Form(
"%-20s %-15s: %#1.3f (%#1.3f) %#1.3f (%#1.3f) %#1.3f (%#1.3f)",
1640 (
const char*)mname[k][i],
1641 eff01[k][i],trainEff01[k][i],
1642 eff10[k][i],trainEff10[k][i],
1643 eff30[k][i],trainEff30[k][i]) <<
Endl;
1654 std::list<TString> datasets;
1655 for (
Int_t k=0; k<2; k++) {
1656 for (
Int_t i=0; i<nmeth_used[k]; i++) {
1658 if(theMethod==0)
continue;
1661 if(std::find(datasets.begin(), datasets.end(), theMethod->
fDataSetInfo.
GetName()) == datasets.end())
1694 std::cerr<<
"Error in Variable Importance: Random mode require more that 10 variables in the dataset."<<std::endl;
1709 uint64_t range =
pow(2, nbits);
1712 std::vector<Double_t> importances(nbits);
1714 std::vector<Double_t> ROC(range);
1716 for (
int i = 0; i < nbits; i++)importances[i] = 0;
1719 for ( x = 1; x <range ; x++) {
1721 std::bitset<VIBITS> xbitset(x);
1722 if (x == 0)
continue;
1728 for (
int index = 0; index < nbits; index++) {
1729 if (xbitset[index]) seedloader->
AddVariable(varNames[index],
'F');
1736 BookMethod(seedloader, theMethod, methodTitle, theOption);
1758 for ( x = 0; x <range ; x++)
1761 for (uint32_t i = 0; i <
VIBITS; ++i) {
1764 std::bitset<VIBITS> ybitset(y);
1770 importances[
ny] = SROC - 0.5;
1776 importances[
ny] += SROC - SSROC;
1782 std::cout<<
"--- Variable Importance Results (All)"<<std::endl;
1786 static long int sum(
long int i)
1789 for(
long int n=0;
n<i;
n++) _sum+=
pow(2,
n);
1803 long int range =
sum(nbits);
1806 std::vector<Double_t> importances(nbits);
1807 for (
int i = 0; i < nbits; i++)importances[i] = 0;
1813 std::bitset<VIBITS> xbitset(x);
1814 if (x == 0)
Log()<<
kFATAL<<
"Error: need at least one variable.";
1821 for (
int index = 0; index < nbits; index++) {
1822 if (xbitset[index]) seedloader->
AddVariable(varNames[index],
'F');
1829 BookMethod(seedloader, theMethod, methodTitle, theOption);
1849 for (uint32_t i = 0; i <
VIBITS; ++i) {
1852 std::bitset<VIBITS> ybitset(y);
1858 importances[
ny] = SROC - 0.5;
1865 for (
int index = 0; index < nbits; index++) {
1866 if (ybitset[index]) subseedloader->
AddVariable(varNames[index],
'F');
1873 BookMethod(subseedloader, theMethod, methodTitle, theOption);
1882 importances[
ny] += SROC - SSROC;
1888 delete subseedloader;
1893 std::cout<<
"--- Variable Importance Results (Short)"<<std::endl;
1908 long int range =
pow(2, nbits);
1911 std::vector<Double_t> importances(nbits);
1913 for (
int i = 0; i < nbits; i++)importances[i] = 0;
1917 x = rangen -> Integer(range);
1919 std::bitset<32> xbitset(x);
1920 if (x == 0)
continue;
1927 for (
int index = 0; index < nbits; index++) {
1928 if (xbitset[index]) seedloader->
AddVariable(varNames[index],
'F');
1935 BookMethod(seedloader, theMethod, methodTitle, theOption);
1956 for (uint32_t i = 0; i < 32; ++i) {
1959 std::bitset<32> ybitset(y);
1965 importances[
ny] = SROC - 0.5;
1966 importances_norm += importances[
ny];
1974 for (
int index = 0; index < nbits; index++) {
1975 if (ybitset[index]) subseedloader->
AddVariable(varNames[index],
'F');
1982 BookMethod(subseedloader, theMethod, methodTitle, theOption);
1991 importances[
ny] += SROC - SSROC;
1997 delete subseedloader;
2003 std::cout<<
"--- Variable Importance Results (Random)"<<std::endl;
2011 TH1F *vih1 =
new TH1F(
"vih1",
"", nbits, 0, nbits);
2016 for (
int i = 0; i < nbits; i++) {
2017 normalization = normalization + importances[i];
2027 for (
Int_t i = 1; i < nbits + 1; i++) {
2028 x_ie[i - 1] = (i - 1) * 1.;
2029 roc = 100.0 * importances[i - 1] / normalization;
2031 std::cout<<
"--- "<<varNames[i-1]<<
" = "<<roc<<
" %"<<std::endl;
IMethod * Create(const std::string &name, const TString &job, const TString &title, DataSetInfo &dsi, const TString &option)
creates the method if needed based on the method name using the creator function the factory has stor...
static ClassifierFactory & Instance()
access to the ClassifierFactory singleton creates the instance if needed
virtual void SetTitleOffset(Float_t offset=1)
Set distance between the axis and the axis title Offset is a correction factor with respect to the "s...
void SetModelPersistence(Bool_t status)
virtual const char * GetName() const
Returns name of object.
virtual Int_t Write(const char *name=0, Int_t option=0, Int_t bufsize=0)
Write this object to the current directory.
TH1F * GetImportance(const int nbits, std::vector< Double_t > importances, std::vector< TString > varNames)
Double_t GetROCIntegral(DataLoader *loader, TString theMethodName)
virtual void MakeClass(const TString &classFileName=TString("")) const
create reader class for method (classification only at present)
UInt_t GetNVariables() const
DataSetManager * fDataSetManager
static long int sum(long int i)
Principal Components Analysis (PCA)
MethodBase * BookMethod(DataLoader *loader, TString theMethodName, TString methodTitle, TString theOption="")
Random number generator class based on M.
MsgLogger & Endl(MsgLogger &ml)
void AddOutput(Types::ETreeType type, Types::EAnalysisType analysisType)
This class displays a legend box (TPaveText) containing several legend entries.
std::vector< TMVA::VariableTransformBase * > fDefaultTrfs
ROOT output file.
virtual void LabelsOption(Option_t *option="h", Option_t *axis="X")
Set option(s) to draw axis with labels.
void EvaluateAllVariables(DataLoader *loader, TString options="")
iterates over all MVA input varables and evaluates them
Bool_t fROC
enable to calculate corelations
virtual void SetDirectory(TDirectory *dir)
By default when an histogram is created, it is added to the list of histogram objects in the current ...
TString & ReplaceAll(const TString &s1, const TString &s2)
R__EXTERN TStyle * gStyle
static Types & Instance()
the the single instance of "Types" if existin already, or create it (Signleton)
virtual void WriteEvaluationHistosToFile(Types::ETreeType treetype)
writes all MVA evaluation histograms to file
TH1F * EvaluateImportanceShort(DataLoader *loader, Types::EMVA theMethod, TString methodTitle, const char *theOption="")
virtual std::map< TString, Double_t > OptimizeTuningParameters(TString fomType="ROCIntegral", TString fitType="FitGA")
call the Optimzier with the set of paremeters and ranges that are meant to be tuned.
virtual void Draw(Option_t *option="")
Draw this legend with its current attributes.
THist< 1, float, THistStatContent, THistStatUncertainty > TH1F
std::vector< TString > GetListOfVariables() const
returns list of variables
OptionBase * DeclareOptionRef(T &ref, const TString &name, const TString &desc="")
A ROOT file is a suite of consecutive data records (TKey instances) with a well defined format...
Bool_t Verbose(void) const
virtual int MakeDirectory(const char *name)
Make a directory.
DataSetInfo & DefaultDataSetInfo()
virtual void MakeClass(const TString &classFileName=TString("")) const =0
TString fTransformations
option string given by construction (presently only "V")
tomato 1-D histogram with a float per channel (see TH1 documentation)}
void ToLower()
Change string to lower-case.
virtual TDirectory * mkdir(const char *name, const char *title="")
Create a sub-directory and return a pointer to the created directory.
void TrainAllMethods()
iterates through all booked methods and calls training
virtual void TestMulticlass()
test multiclass classification
virtual void SetTitle(const char *title="")
Set graph title.
UInt_t GetNClasses() const
void DataLoaderCopy(TMVA::DataLoader *des, TMVA::DataLoader *src)
void WriteDataInformation(DataSetInfo &fDataSetInfo)
const std::vector< Event * > & GetEventCollection(Types::ETreeType type=Types::kMaxTreeType) const
const TString & GetLabel() const
void SetSilentFile(Bool_t status)
virtual Bool_t HasAnalysisType(Types::EAnalysisType type, UInt_t numberClasses, UInt_t numberTargets)=0
void CenterTitle(Bool_t center=kTRUE)
Center axis title.
virtual Double_t GetROCIntegral(TH1D *histS, TH1D *histB) const
calculate the area (integral) under the ROC curve as a overall quality measure of the classification ...
void AddVariable(const TString &expression, const TString &title, const TString &unit, char type='F', Double_t min=0, Double_t max=0)
TH1F * EvaluateImportanceRandom(DataLoader *loader, UInt_t nseeds, Types::EMVA theMethod, TString methodTitle, const char *theOption="")
overwrite existing object with same name
static void AddDirectory(Bool_t add=kTRUE)
Sets the flag controlling the automatic add of histograms in memory.
virtual void SetBarWidth(Float_t width=0.5)
virtual void SetRangeUser(Double_t ufirst, Double_t ulast)
Set the viewing range for the axis from ufirst to ulast (in user coordinates).
static void InhibitOutput()
static void SetIsTraining(Bool_t)
when this static function is called, it sets the flag whether events with negative event weight shoul...
DataSetInfo & fDataSetInfo
TH2 * CreateCorrelationMatrixHist(const TMatrixD *m, const TString &hName, const TString &hTitle) const
TH1F * EvaluateImportance(DataLoader *loader, VIType vitype, Types::EMVA theMethod, TString methodTitle, const char *theOption="")
Evaluate Variable Importance.
virtual void SetGrid(Int_t valuex=1, Int_t valuey=1)
Bool_t fModelPersistence
the training type
TTree * GetTree(Types::ETreeType type)
create the test/trainings tree with all the variables, the weights, the classes, the targets...
void ReadStateFromFile()
Function to write options and weights to file.
double pow(double, double)
void PrintHelpMessage() const
prints out method-specific help method
std::vector< std::vector< double > > Data
virtual void ParseOptions()
options parser
void SetupMethod()
setup of methods
DataSetInfo & DataInfo() const
Bool_t DoRegression() const
void SetMinType(EMsgType minType)
void SetDrawProgressBar(Bool_t d)
TH1F * EvaluateImportanceAll(DataLoader *loader, Types::EMVA theMethod, TString methodTitle, const char *theOption="")
Bool_t IsModelPersistence()
std::map< TString, MVector * > fMethodsMap
Bool_t fSilentFile
enable to calculate ROC values
Long64_t GetNTrainingEvents() const
virtual Double_t GetEfficiency(const TString &, Types::ETreeType, Double_t &err)
fill background efficiency (resp.
void CreateVariableTransforms(const TString &trafoDefinition, TMVA::DataSetInfo &dataInfo, TMVA::TransformationHandler &transformationHandler, TMVA::MsgLogger &log)
virtual std::vector< Float_t > GetMulticlassEfficiency(std::vector< std::vector< Float_t > > &purity)
TMatrixT< Double_t > TMatrixD
Bool_t DoMulticlass() const
const Int_t MinNoTrainingEvents
virtual ~Factory()
destructor delete fATreeEvent;
std::map< TString, Double_t > OptimizeAllMethods(TString fomType="ROCIntegral", TString fitType="FitGA")
iterates through all booked methods and sees if they use parameter tuning and if so.
TDirectory * RootBaseDir()
virtual Int_t Write(const char *name=0, Int_t option=0, Int_t bufsize=0)
Write this object to the current directory.
UInt_t GetNTargets() const
Results * GetResults(const TString &, Types::ETreeType type, Types::EAnalysisType analysistype)
TString info(resultsName+"/"); switch(type) { case Types::kTraining: info += "kTraining/"; break; cas...
std::string GetMethodName(TCppMethod_t)
DataSetManager * GetDataSetManager()
Service class for 2-Dim histogram classes.
Double_t GetROCIntegral()
ROC Integral (AUC)
R__EXTERN TSystem * gSystem
virtual void AddRow(const Double_t *x)
Add a data point and update the covariance matrix.
const char * GetName() const
Long64_t GetNEvtSigTest()
return number of signal test events in dataset
ClassInfo * GetClassInfo(Int_t clNum) const
virtual void SetFillColor(Color_t fcolor)
Set the fill area color.
const TMatrixD * CorrelationMatrix(const TString &className) const
Bool_t fCorrelations
verbose mode
void EvaluateAllMethods(void)
iterates over all MVAs that have been booked, and calls their evaluation methods
Bool_t BeginsWith(const char *s, ECaseCompare cmp=kExact) const
virtual void SetBinContent(Int_t bin, Double_t content)
Set bin content see convention for numbering bins in TH1::GetBin In case the bin number is greater th...
char * Form(const char *fmt,...)
DataSetManager * fDataSetManager
const TMatrixD * GetCovarianceMatrix() const
const TString & GetMethodName() const
static Int_t GetColor(const char *hexcolor)
Static method returning color number for color specified by hex color string of form: "#rrggbb"...
void Greetings()
print welcome message options are: kLogoWelcomeMsg, kIsometricWelcomeMsg, kLeanWelcomeMsg ...
virtual void MakePrincipals()
Perform the principal components analysis.
void SetBoostedMethodName(TString methodName)
void SetVerbose(Bool_t v=kTRUE)
virtual Double_t GetSignificance() const
compute significance of mean difference significance = |<S> - |/Sqrt(RMS_S2 + RMS_B2) ...
Long64_t GetNEvtBkgdTest()
return number of background test events in dataset
TString GetWeightFileName() const
retrieve weight file name
virtual void SetTitleSize(Float_t size=0.04)
Set size of axis title The size is expressed in per cent of the pad width.
Float_t GetValue(UInt_t ivar) const
return value of i'th variable
virtual void Print() const
get maximum length of variable names
void PrepareTrainingAndTestTree(const TCut &cut, const TString &splitOpt)
virtual void CheckSetup()
check may be overridden by derived class (sometimes, eg, fitters are used which can only be implement...
TLegendEntry * AddEntry(const TObject *obj, const char *label="", Option_t *option="lpf")
Add a new entry to this legend.
virtual void PrintHelpMessage() const =0
virtual Double_t GetSeparation(TH1 *, TH1 *) const
compute "separation" defined as <s2> = (1/2) Int_-oo..+oo { (S(x) - B(x))^2/(S(x) + B(x)) dx } ...
void SetFile(TFile *file)
static void DestroyInstance()
static function: destroy TMVA instance
void SetCurrentType(Types::ETreeType type) const
Bool_t Contains(const char *pat, ECaseCompare cmp=kExact) const
VariableInfo & GetVariableInfo(Int_t i)
virtual void SetBinLabel(Int_t bin, const char *label)
Set label for bin.
void AddPreDefVal(const T &)
virtual const char * GetName() const
Returns name of object.
void ProcessSetup()
process all options the "CheckForUnusedOptions" is done in an independent call, since it may be overr...
const TString & GetOptions() const
virtual TObject * Clone(const char *newname="") const
Make a clone of an object using the Streamer facility.
void SetUseColor(Bool_t uc)
void SetConfigName(const char *n)
void SetSource(const std::string &source)
TGraph * GetROCCurve(DataLoader *loader, TString theMethodName, Bool_t fLegend=kTRUE)
void SetTitleXOffset(Float_t offset=1)
void PrintHelpMessage(const TString &datasetname, const TString &methodTitle="") const
Print predefined help message of classifier iterate over methods and test.
TGraph * GetROCCurve(const UInt_t points=100)
virtual void TestRegression(Double_t &bias, Double_t &biasT, Double_t &dev, Double_t &devT, Double_t &rms, Double_t &rmsT, Double_t &mInf, Double_t &mInfT, Double_t &corr, Types::ETreeType type)
calculate <sum-of-deviation-squared> of regression output versus "true" value from test sample ...
DataSetManager * fDataSetManager
virtual Bool_t cd(const char *path=0)
Change current directory to "this" directory.
const TString & GetSplitOptions() const
Factory(TString theJobName, TFile *theTargetFile, TString theOption="")
standard constructor jobname : this name will appear in all weight file names produced by the MVAs th...
DataInputHandler & DataInput()
TString GetMethodTypeName() const
const TCut & GetCut(Int_t i) const
void SetWeightFileDir(TString fileDir)
set directory of weight file
TString fJobName
used in contructor wihtout file
Double_t GetSignalReferenceCut() const
A Graph is a graphics object made of two arrays X and Y with npoints each.
void DeleteAllMethods(void)
delete methods
void SetOptStat(Int_t stat=1)
The type of information printed in the histogram statistics box can be selected via the parameter mod...
Long64_t GetNEvents(Types::ETreeType type=Types::kMaxTreeType) const
virtual Double_t GetTrainingEfficiency(const TString &)
Bool_t IsSignal(const Event *ev) const
Types::EAnalysisType GetAnalysisType() const
Types::EAnalysisType fAnalysisType
jobname, used as extension in weight file names
std::vector< IMethod * > MVector
virtual const char * GetName() const
Returns name of object.
static void EnableOutput()
virtual void MakeClass(const TString &datasetname, const TString &methodTitle="") const
const TString & GetTestvarName() const
IMethod * GetMethod(const TString &datasetname, const TString &title) const
virtual void SetTitle(const char *title="")
Set the title of the TNamed.
void SetTestvarName(const TString &v="")
DataSet * GetDataSet() const
returns data set
Types::EMVA GetMethodType() const
void CheckForUnusedOptions() const
checks for unused options in option string
virtual void TestClassification()
initialization
const Event * GetEvent() const
virtual void SetAnalysisType(Types::EAnalysisType type)
void SetConfigDescription(const char *d)
Bool_t fVerbose
List of transformations to test.
const char * Data() const