102 #define READXML kTRUE 118 fTransformations (
"I" ),
123 fJobName ( jobName ),
124 fAnalysisType (
Types::kClassification ),
125 fModelPersistence (
kTRUE)
151 DeclareOptionRef( color,
"Color",
"Flag for coloured screen output (default: True, if in batch mode: False)" );
152 DeclareOptionRef(
fTransformations,
"Transformations",
"List of transformations to test; formatting example: \"Transformations=I;D;P;U;G,D\", for identity, decorrelation, PCA, Uniform and Gaussianisation followed by decorrelation transformations" );
155 DeclareOptionRef( silent,
"Silent",
"Batch mode: boolean silent flag inhibiting any output from TMVA after the creation of the factory class object (default: False)" );
157 "DrawProgressBar",
"Draw progress bar to display training, testing and evaluation schedule (default: True)" );
160 "Option to save the trained model in xml file or using serialization");
164 "AnalysisType",
"Set the analysis type (Classification, Regression, Multiclass, Auto) (default: Auto)" );
227 DeclareOptionRef( color,
"Color",
"Flag for coloured screen output (default: True, if in batch mode: False)" );
228 DeclareOptionRef(
fTransformations,
"Transformations",
"List of transformations to test; formatting example: \"Transformations=I;D;P;U;G,D\", for identity, decorrelation, PCA, Uniform and Gaussianisation followed by decorrelation transformations" );
231 DeclareOptionRef( silent,
"Silent",
"Batch mode: boolean silent flag inhibiting any output from TMVA after the creation of the factory class object (default: False)" );
233 "DrawProgressBar",
"Draw progress bar to display training, testing and evaluation schedule (default: True)" );
236 "Option to save the trained model in xml file or using serialization");
240 "AnalysisType",
"Set the analysis type (Classification, Regression, Multiclass, Auto) (default: Auto)" );
295 std::vector<TMVA::VariableTransformBase*>::iterator trfIt =
fDefaultTrfs.begin();
296 for (;trfIt !=
fDefaultTrfs.end(); trfIt++)
delete (*trfIt);
313 std::map<TString,MVector*>::iterator itrMap;
317 MVector *methods=itrMap->second;
319 MVector::iterator itrMethod = methods->begin();
320 for (; itrMethod != methods->end(); itrMethod++) {
321 Log() <<
kDEBUG <<
"Delete method: " << (*itrMethod)->GetName() <<
Endl;
362 if (
GetMethod( datasetname,methodTitle ) != 0) {
363 Log() <<
kFATAL <<
"Booking failed since method with title <" 364 << methodTitle <<
"> already exists "<<
"in with DataSet Name <"<< loader->
GetName()<<
"> " 378 "Number of times the classifier will be boosted" );
398 Log() <<
kDEBUG <<
"Boost Number is " << boostNum <<
" > 0: train boosted classifier" <<
Endl;
406 Log() <<
kFATAL <<
"Method with type kBoost cannot be casted to MethodCategory. /Factory" <<
Endl;
417 if (method==0)
return 0;
423 Log() <<
kFATAL <<
"Method with type kCategory cannot be casted to MethodCategory. /Factory" <<
Endl;
489 MVector::const_iterator itrMethod;
491 for (itrMethod = methods->begin(); itrMethod != methods->end(); itrMethod++) {
556 std::vector<TMVA::TransformationHandler*> trfs;
560 std::vector<TString>::iterator trfsDefIt = trfsDef.begin();
561 for (; trfsDefIt!=trfsDef.end(); trfsDefIt++) {
566 Log() <<
kDEBUG <<
"current transformation string: '" << trfS.
Data() <<
"'" <<
Endl;
572 if (trfS.
BeginsWith(
'I')) identityTrHandler = trfs.back();
578 std::vector<TMVA::TransformationHandler*>::iterator trfIt = trfs.begin();
580 for (;trfIt != trfs.end(); trfIt++) {
583 (*trfIt)->CalcTransformations(inputEvents);
588 for (trfIt = trfs.begin(); trfIt != trfs.end(); trfIt++)
delete *trfIt;
600 std::map<TString,MVector*>::iterator itrMap;
601 std::map<TString,Double_t> TunedParameters;
604 MVector *methods=itrMap->second;
606 MVector::iterator itrMethod;
609 for( itrMethod = methods->begin(); itrMethod != methods->end(); itrMethod++ ) {
613 Log() <<
kFATAL <<
"Dynamic cast to MethodBase failed" <<
Endl;
614 return TunedParameters;
619 <<
" not trained (training tree has less entries [" 630 Log() <<
kINFO <<
"Optimization of tuning paremters finished for Method:"<<mva->
GetName() <<
Endl;
634 return TunedParameters;
652 MVector::iterator itrMethod = methods->begin();
654 while (itrMethod != methods->end()) {
676 std::vector<Bool_t> *mvaResType =
dynamic_cast<ResultsClassification *
>(results)->GetValueVectorTypes();
679 if (!fROCCurve)
Log() <<
kFATAL <<
Form(
"ROCCurve object was not created in Method = %s not found with Dataset = %s ", theMethodName.
Data(), datasetname.
Data()) <<
Endl;
700 MVector::iterator itrMethod = methods->begin();
702 while (itrMethod != methods->end()) {
724 std::vector<Bool_t> *mvaResType =
dynamic_cast<ResultsClassification *
>(results)->GetValueVectorTypes();
727 if (!fROCCurve)
Log() <<
kFATAL <<
Form(
"ROCCurve object was not created in Method = %s not found with Dataset = %s ", theMethodName.
Data(), datasetname.
Data()) <<
Endl;
732 fGraph->GetYaxis()->SetTitle(
"Background Rejection");
733 fGraph->GetXaxis()->SetTitle(
"Signal Efficiency");
734 fGraph->SetTitle(
Form(
"Background Rejection vs. Signal Efficiency (%s)",method->
GetMethodName().
Data()));
763 TLegend *fLegend =
new TLegend(0.15, 0.15, 0.35, 0.3,
"MVA Method");
768 MVector::iterator itr = methods->begin();
770 while (itr != methods->end()) {
782 std::vector<Float_t> *mvaRes =
784 std::vector<Bool_t> *mvaResType =
796 fGraph->GetYaxis()->SetTitle(
"Background Rejection");
797 fGraph->GetXaxis()->SetTitle(
"Signal Efficiency");
798 fGraph->SetTitle(
"Background Rejection vs. Signal Efficiency");
804 fGraph->SetLineWidth(2);
805 fGraph->SetLineColor(++line_color);
835 Log() <<
kDEBUG <<
"Train all methods for " 839 std::map<TString,MVector*>::iterator itrMap;
843 MVector *methods=itrMap->second;
844 MVector::iterator itrMethod;
847 for( itrMethod = methods->begin(); itrMethod != methods->end(); itrMethod++ ) {
854 Log() <<
kFATAL <<
"No input data for the training provided!" <<
Endl;
858 Log() <<
kFATAL <<
"You want to do regression training without specifying a target." <<
Endl;
861 Log() <<
kFATAL <<
"You want to do classification training, but specified less than two classes." <<
Endl;
869 <<
" not trained (training tree has less entries [" 886 Log() <<
kINFO <<
"Ranking input variables (method specific)..." <<
Endl;
887 for (itrMethod = methods->begin(); itrMethod != methods->end(); itrMethod++) {
892 const Ranking* ranking = (*itrMethod)->CreateRanking();
893 if (ranking != 0) ranking->
Print();
894 else Log() <<
kINFO <<
"No variable ranking supplied by classifier: " 906 Log() <<
kHEADER <<
"=== Destroy and recreate all methods via weight files for testing ===" <<
Endl <<
Endl;
911 for (
UInt_t i=0; i<methods->size(); i++) {
929 dataSetInfo, weightfile ) );
932 if( !methCat )
Log() <<
kFATAL <<
"Method with type kCategory cannot be casted to MethodCategory. /Factory" <<
Endl;
966 std::map<TString,MVector*>::iterator itrMap;
970 MVector *methods=itrMap->second;
971 MVector::iterator itrMethod;
974 for( itrMethod = methods->begin(); itrMethod != methods->end(); itrMethod++ ) {
981 (analysisType ==
Types::kMulticlass ?
"Multiclass classification" :
"Classification")) <<
" performance" << Endl <<
Endl;
990 if (methodTitle !=
"") {
994 Log() <<
kWARNING <<
"<MakeClass> Could not find classifier \"" << methodTitle
995 <<
"\" in list" <<
Endl;
1002 MVector::const_iterator itrMethod;
1003 for (itrMethod = methods->begin(); itrMethod != methods->end(); itrMethod++) {
1005 if(method==0)
continue;
1018 if (methodTitle !=
"") {
1022 Log() <<
kWARNING <<
"<PrintHelpMessage> Could not find classifier \"" << methodTitle
1023 <<
"\" in list" <<
Endl;
1030 MVector::const_iterator itrMethod ;
1031 for (itrMethod = methods->begin(); itrMethod != methods->end(); itrMethod++) {
1033 if(method==0)
continue;
1045 Log() <<
kINFO <<
"Evaluating all variables..." <<
Endl;
1050 if (options.
Contains(
"V")) s +=
":V";
1064 Log() <<
kINFO <<
"...nothing found to evaluate" <<
Endl;
1067 std::map<TString,MVector*>::iterator itrMap;
1071 MVector *methods=itrMap->second;
1081 Int_t nmeth_used[2] = {0,0};
1083 std::vector<std::vector<TString> > mname(2);
1084 std::vector<std::vector<Double_t> > sig(2),
sep(2), roc(2);
1085 std::vector<std::vector<Double_t> > eff01(2), eff10(2), eff30(2), effArea(2);
1086 std::vector<std::vector<Double_t> > eff01err(2), eff10err(2), eff30err(2);
1087 std::vector<std::vector<Double_t> > trainEff01(2), trainEff10(2), trainEff30(2);
1089 std::vector<std::vector<Float_t> > multiclass_testEff;
1090 std::vector<std::vector<Float_t> > multiclass_trainEff;
1091 std::vector<std::vector<Float_t> > multiclass_testPur;
1092 std::vector<std::vector<Float_t> > multiclass_trainPur;
1094 std::vector<std::vector<Double_t> > biastrain(1);
1095 std::vector<std::vector<Double_t> > biastest(1);
1096 std::vector<std::vector<Double_t> > devtrain(1);
1097 std::vector<std::vector<Double_t> > devtest(1);
1098 std::vector<std::vector<Double_t> > rmstrain(1);
1099 std::vector<std::vector<Double_t> > rmstest(1);
1100 std::vector<std::vector<Double_t> > minftrain(1);
1101 std::vector<std::vector<Double_t> > minftest(1);
1102 std::vector<std::vector<Double_t> > rhotrain(1);
1103 std::vector<std::vector<Double_t> > rhotest(1);
1106 std::vector<std::vector<Double_t> > biastrainT(1);
1107 std::vector<std::vector<Double_t> > biastestT(1);
1108 std::vector<std::vector<Double_t> > devtrainT(1);
1109 std::vector<std::vector<Double_t> > devtestT(1);
1110 std::vector<std::vector<Double_t> > rmstrainT(1);
1111 std::vector<std::vector<Double_t> > rmstestT(1);
1112 std::vector<std::vector<Double_t> > minftrainT(1);
1113 std::vector<std::vector<Double_t> > minftestT(1);
1122 for (MVector::iterator itrMethod =methods->begin(); itrMethod != methods->end(); itrMethod++) {
1125 if(theMethod==0)
continue;
1131 doRegression =
kTRUE;
1139 biastest[0] .push_back( bias );
1140 devtest[0] .push_back( dev );
1141 rmstest[0] .push_back( rms );
1142 minftest[0] .push_back( mInf );
1143 rhotest[0] .push_back( rho );
1144 biastestT[0] .push_back( biasT );
1145 devtestT[0] .push_back( devT );
1146 rmstestT[0] .push_back( rmsT );
1147 minftestT[0] .push_back( mInfT );
1150 biastrain[0] .push_back( bias );
1151 devtrain[0] .push_back( dev );
1152 rmstrain[0] .push_back( rms );
1153 minftrain[0] .push_back( mInf );
1154 rhotrain[0] .push_back( rho );
1155 biastrainT[0].push_back( biasT );
1156 devtrainT[0] .push_back( devT );
1157 rmstrainT[0] .push_back( rmsT );
1158 minftrainT[0].push_back( mInfT );
1164 Log() <<
kDEBUG <<
"\tWrite evaluation histograms to file" <<
Endl;
1170 doMulticlass =
kTRUE;
1174 Log() <<
kDEBUG <<
"\tWrite evaluation histograms to file" <<
Endl;
1201 eff01err[isel].push_back( err );
1203 eff10err[isel].push_back( err );
1205 eff30err[isel].push_back( err );
1216 Log() <<
kDEBUG <<
"\tWrite evaluation histograms to file" <<
Endl;
1224 std::vector<TString> vtemps = mname[0];
1225 std::vector< std::vector<Double_t> > vtmp;
1226 vtmp.push_back( devtest[0] );
1227 vtmp.push_back( devtrain[0] );
1228 vtmp.push_back( biastest[0] );
1229 vtmp.push_back( biastrain[0] );
1230 vtmp.push_back( rmstest[0] );
1231 vtmp.push_back( rmstrain[0] );
1232 vtmp.push_back( minftest[0] );
1233 vtmp.push_back( minftrain[0] );
1234 vtmp.push_back( rhotest[0] );
1235 vtmp.push_back( rhotrain[0] );
1236 vtmp.push_back( devtestT[0] );
1237 vtmp.push_back( devtrainT[0] );
1238 vtmp.push_back( biastestT[0] );
1239 vtmp.push_back( biastrainT[0]);
1240 vtmp.push_back( rmstestT[0] );
1241 vtmp.push_back( rmstrainT[0] );
1242 vtmp.push_back( minftestT[0] );
1243 vtmp.push_back( minftrainT[0]);
1246 devtest[0] = vtmp[0];
1247 devtrain[0] = vtmp[1];
1248 biastest[0] = vtmp[2];
1249 biastrain[0] = vtmp[3];
1250 rmstest[0] = vtmp[4];
1251 rmstrain[0] = vtmp[5];
1252 minftest[0] = vtmp[6];
1253 minftrain[0] = vtmp[7];
1254 rhotest[0] = vtmp[8];
1255 rhotrain[0] = vtmp[9];
1256 devtestT[0] = vtmp[10];
1257 devtrainT[0] = vtmp[11];
1258 biastestT[0] = vtmp[12];
1259 biastrainT[0] = vtmp[13];
1260 rmstestT[0] = vtmp[14];
1261 rmstrainT[0] = vtmp[15];
1262 minftestT[0] = vtmp[16];
1263 minftrainT[0] = vtmp[17];
1265 else if (doMulticlass) {
1271 for (
Int_t k=0; k<2; k++) {
1272 std::vector< std::vector<Double_t> > vtemp;
1273 vtemp.push_back( effArea[k] );
1274 vtemp.push_back( eff10[k] );
1275 vtemp.push_back( eff01[k] );
1276 vtemp.push_back( eff30[k] );
1277 vtemp.push_back( eff10err[k] );
1278 vtemp.push_back( eff01err[k] );
1279 vtemp.push_back( eff30err[k] );
1280 vtemp.push_back( trainEff10[k] );
1281 vtemp.push_back( trainEff01[k] );
1282 vtemp.push_back( trainEff30[k] );
1283 vtemp.push_back( sig[k] );
1284 vtemp.push_back(
sep[k] );
1285 vtemp.push_back( roc[k] );
1286 std::vector<TString> vtemps = mname[k];
1288 effArea[k] = vtemp[0];
1289 eff10[k] = vtemp[1];
1290 eff01[k] = vtemp[2];
1291 eff30[k] = vtemp[3];
1292 eff10err[k] = vtemp[4];
1293 eff01err[k] = vtemp[5];
1294 eff30err[k] = vtemp[6];
1295 trainEff10[k] = vtemp[7];
1296 trainEff01[k] = vtemp[8];
1297 trainEff30[k] = vtemp[9];
1313 const Int_t nmeth = methodsNoCuts.size();
1316 if (!doRegression && !doMulticlass ) {
1322 std::vector<Double_t> rvec;
1330 std::vector<TString>* theVars =
new std::vector<TString>;
1331 std::vector<ResultsClassification*> mvaRes;
1332 for (MVector::iterator itrMethod = methodsNoCuts.begin(); itrMethod != methodsNoCuts.end(); itrMethod++, ivar++) {
1337 theVars->back().ReplaceAll(
"MVA_",
"" );
1357 for (
Int_t im=0; im<nmeth; im++) {
1361 Log() <<
kWARNING <<
"Found NaN return value in event: " << ievt
1362 <<
" for method \"" << methodsNoCuts[im]->GetName() <<
"\"" <<
Endl;
1365 else dvec[im] = retval;
1369 else { tpBkg->
AddRow( dvec ); theMat = overlapB; }
1372 for (
Int_t im=0; im<nmeth; im++) {
1373 for (
Int_t jm=im; jm<nmeth; jm++) {
1374 if ((dvec[im] - rvec[im])*(dvec[jm] - rvec[jm]) > 0) {
1376 if (im != jm) (*theMat)(jm,im)++;
1396 if (corrMatS != 0 && corrMatB != 0) {
1401 for (
Int_t im=0; im<nmeth; im++) {
1402 for (
Int_t jm=0; jm<nmeth; jm++) {
1403 mvaMatS(im,jm) = (*corrMatS)(im,jm);
1404 mvaMatB(im,jm) = (*corrMatB)(im,jm);
1409 std::vector<TString> theInputVars;
1412 for (
Int_t iv=0; iv<nvar; iv++) {
1414 for (
Int_t jm=0; jm<nmeth; jm++) {
1415 varmvaMatS(iv,jm) = (*corrMatS)(nmeth+iv,jm);
1416 varmvaMatB(iv,jm) = (*corrMatB)(nmeth+iv,jm);
1435 Log() <<
kINFO <<
Form(
"Dataset[%s] : ",method->
fDataSetInfo.
GetName())<<
"Correlations between input variables and MVA response (background):" << Endl;
1442 Log() <<
kINFO <<
Form(
"Dataset[%s] : ",method->
fDataSetInfo.
GetName())<<
"The following \"overlap\" matrices contain the fraction of events for which " << Endl;
1443 Log() <<
kINFO <<
Form(
"Dataset[%s] : ",method->
fDataSetInfo.
GetName())<<
"the MVAs 'i' and 'j' have returned conform answers about \"signal-likeness\"" << Endl;
1444 Log() <<
kINFO <<
Form(
"Dataset[%s] : ",method->
fDataSetInfo.
GetName())<<
"An event is signal-like, if its MVA output exceeds the following value:" << Endl;
1446 Log() <<
kINFO <<
Form(
"Dataset[%s] : ",method->
fDataSetInfo.
GetName())<<
"which correspond to the working point: eff(signal) = 1 - eff(background)" << Endl;
1449 if (nmeth != (
Int_t)methods->size())
1450 Log() <<
kINFO <<
Form(
"Dataset[%s] : ",method->
fDataSetInfo.
GetName())<<
"Note: no correlations and overlap with cut method are provided at present" << Endl;
1482 TString hLine =
"--------------------------------------------------------------------------------------------------";
1483 Log() <<
kINFO <<
"Evaluation results ranked by smallest RMS on test sample:" <<
Endl;
1484 Log() <<
kINFO <<
"(\"Bias\" quotes the mean deviation of the regression from true target." <<
Endl;
1485 Log() <<
kINFO <<
" \"MutInf\" is the \"Mutual Information\" between regression and target." <<
Endl;
1486 Log() <<
kINFO <<
" Indicated by \"_T\" are the corresponding \"truncated\" quantities ob-" <<
Endl;
1487 Log() <<
kINFO <<
" tained when removing events deviating more than 2sigma from average.)" <<
Endl;
1492 for (
Int_t i=0; i<nmeth_used[0]; i++) {
1494 if(theMethod==0)
continue;
1496 Log() <<
kINFO <<
Form(
"%-20s %-15s:%#9.3g%#9.3g%#9.3g%#9.3g | %#5.3f %#5.3f",
1498 (
const char*)mname[0][i],
1499 biastest[0][i], biastestT[0][i],
1500 rmstest[0][i], rmstestT[0][i],
1501 minftest[0][i], minftestT[0][i] )
1506 Log() <<
kINFO <<
"Evaluation results ranked by smallest RMS on training sample:" <<
Endl;
1509 Log() <<
kINFO <<
"DataSet Name: MVA Method: <Bias> <Bias_T> RMS RMS_T | MutInf MutInf_T" <<
Endl;
1512 for (
Int_t i=0; i<nmeth_used[0]; i++) {
1514 if(theMethod==0)
continue;
1515 Log() <<
kINFO <<
Form(
"%-20s %-15s:%#9.3g%#9.3g%#9.3g%#9.3g | %#5.3f %#5.3f",
1517 (
const char*)mname[0][i],
1518 biastrain[0][i], biastrainT[0][i],
1519 rmstrain[0][i], rmstrainT[0][i],
1520 minftrain[0][i], minftrainT[0][i] )
1526 else if( doMulticlass ){
1528 TString hLine =
"-------------------------------------------------------------------------------------------------------";
1529 Log() <<
kINFO <<
"Evaluation results ranked by best signal efficiency times signal purity " <<
Endl;
1532 for (MVector::iterator itrMethod = methods->begin(); itrMethod != methods->end(); itrMethod++) {
1534 if(theMethod==0)
continue;
1536 TString header=
"DataSet Name MVA Method ";
1542 for (
Int_t i=0; i<nmeth_used[0]; i++) {
1545 res +=
Form(
"%#1.3f ",(multiclass_testEff[i][icls])*(multiclass_testPur[i][icls]));
1559 TString hLine =
"-------------------------------------------------------------------------------------------------------------------";
1560 Log() <<
kINFO <<
"Evaluation results ranked by best signal efficiency and purity (area)" <<
Endl;
1568 for (
Int_t k=0; k<2; k++) {
1569 if (k == 1 && nmeth_used[k] > 0) {
1571 Log() <<
kINFO <<
"Input Variables: " << Endl << hLine <<
Endl;
1573 for (
Int_t i=0; i<nmeth_used[k]; i++) {
1574 if (k == 1) mname[k][i].ReplaceAll(
"Variable_",
"" );
1577 if(theMethod==0)
continue;
1580 std::vector<Bool_t> *mvaResType =
dynamic_cast<ResultsClassification *
>(results)->GetValueVectorTypes();
1583 if (mvaResType->size() != 0) {
1588 if (
sep[k][i] < 0 || sig[k][i] < 0) {
1591 itrMap->first.Data(),
1592 (
const char*)mname[k][i],
1593 effArea[k][i]) <<
Endl;
1605 itrMap->first.Data(),
1606 (
const char*)mname[k][i],
1607 fROCalcValue) <<
Endl;
1617 if (fROCCurve)
delete fROCCurve;
1622 Log() <<
kINFO <<
"Testing efficiency compared to training efficiency (overtraining check)" <<
Endl;
1624 Log() <<
kINFO <<
"DataSet MVA Signal efficiency: from test sample (from training sample) " <<
Endl;
1625 Log() <<
kINFO <<
"Name: Method: @B=0.01 @B=0.10 @B=0.30 " <<
Endl;
1627 for (
Int_t k=0; k<2; k++) {
1628 if (k == 1 && nmeth_used[k] > 0) {
1630 Log() <<
kINFO <<
"Input Variables: " << Endl << hLine <<
Endl;
1632 for (
Int_t i=0; i<nmeth_used[k]; i++) {
1633 if (k == 1) mname[k][i].ReplaceAll(
"Variable_",
"" );
1635 if(theMethod==0)
continue;
1637 Log() <<
kINFO <<
Form(
"%-20s %-15s: %#1.3f (%#1.3f) %#1.3f (%#1.3f) %#1.3f (%#1.3f)",
1639 (
const char*)mname[k][i],
1640 eff01[k][i],trainEff01[k][i],
1641 eff10[k][i],trainEff10[k][i],
1642 eff30[k][i],trainEff30[k][i]) <<
Endl;
1653 std::list<TString> datasets;
1654 for (
Int_t k=0; k<2; k++) {
1655 for (
Int_t i=0; i<nmeth_used[k]; i++) {
1657 if(theMethod==0)
continue;
1660 if(std::find(datasets.begin(), datasets.end(), theMethod->
fDataSetInfo.
GetName()) == datasets.end())
1693 std::cerr<<
"Error in Variable Importance: Random mode require more that 10 variables in the dataset."<<std::endl;
1708 uint64_t range =
pow(2, nbits);
1711 std::vector<Double_t> importances(nbits);
1713 std::vector<Double_t> ROC(range);
1715 for (
int i = 0; i < nbits; i++)importances[i] = 0;
1718 for ( x = 1; x <range ; x++) {
1720 std::bitset<VIBITS> xbitset(x);
1721 if (x == 0)
continue;
1727 for (
int index = 0; index < nbits; index++) {
1728 if (xbitset[index]) seedloader->
AddVariable(varNames[index],
'F');
1735 BookMethod(seedloader, theMethod, methodTitle, theOption);
1757 for ( x = 0; x <range ; x++)
1760 for (uint32_t i = 0; i <
VIBITS; ++i) {
1763 std::bitset<VIBITS> ybitset(y);
1769 importances[
ny] = SROC - 0.5;
1775 importances[
ny] += SROC - SSROC;
1781 std::cout<<
"--- Variable Importance Results (All)"<<std::endl;
1785 static long int sum(
long int i)
1788 for(
long int n=0;
n<i;
n++) _sum+=
pow(2,
n);
1802 long int range =
sum(nbits);
1805 std::vector<Double_t> importances(nbits);
1806 for (
int i = 0; i < nbits; i++)importances[i] = 0;
1812 std::bitset<VIBITS> xbitset(x);
1813 if (x == 0)
Log()<<
kFATAL<<
"Error: need at least one variable.";
1820 for (
int index = 0; index < nbits; index++) {
1821 if (xbitset[index]) seedloader->
AddVariable(varNames[index],
'F');
1828 BookMethod(seedloader, theMethod, methodTitle, theOption);
1848 for (uint32_t i = 0; i <
VIBITS; ++i) {
1851 std::bitset<VIBITS> ybitset(y);
1857 importances[
ny] = SROC - 0.5;
1864 for (
int index = 0; index < nbits; index++) {
1865 if (ybitset[index]) subseedloader->
AddVariable(varNames[index],
'F');
1872 BookMethod(subseedloader, theMethod, methodTitle, theOption);
1881 importances[
ny] += SROC - SSROC;
1887 delete subseedloader;
1892 std::cout<<
"--- Variable Importance Results (Short)"<<std::endl;
1907 long int range =
pow(2, nbits);
1910 std::vector<Double_t> importances(nbits);
1912 for (
int i = 0; i < nbits; i++)importances[i] = 0;
1916 x = rangen -> Integer(range);
1918 std::bitset<32> xbitset(x);
1919 if (x == 0)
continue;
1926 for (
int index = 0; index < nbits; index++) {
1927 if (xbitset[index]) seedloader->
AddVariable(varNames[index],
'F');
1934 BookMethod(seedloader, theMethod, methodTitle, theOption);
1955 for (uint32_t i = 0; i < 32; ++i) {
1958 std::bitset<32> ybitset(y);
1964 importances[
ny] = SROC - 0.5;
1965 importances_norm += importances[
ny];
1973 for (
int index = 0; index < nbits; index++) {
1974 if (ybitset[index]) subseedloader->
AddVariable(varNames[index],
'F');
1981 BookMethod(subseedloader, theMethod, methodTitle, theOption);
1990 importances[
ny] += SROC - SSROC;
1996 delete subseedloader;
2002 std::cout<<
"--- Variable Importance Results (Random)"<<std::endl;
2010 TH1F *vih1 =
new TH1F(
"vih1",
"", nbits, 0, nbits);
2015 for (
int i = 0; i < nbits; i++) {
2016 normalization = normalization + importances[i];
2026 for (
Int_t i = 1; i < nbits + 1; i++) {
2027 x_ie[i - 1] = (i - 1) * 1.;
2028 roc = 100.0 * importances[i - 1] / normalization;
2030 std::cout<<
"--- "<<varNames[i-1]<<
" = "<<roc<<
" %"<<std::endl;
IMethod * Create(const std::string &name, const TString &job, const TString &title, DataSetInfo &dsi, const TString &option)
creates the method if needed based on the method name using the creator function the factory has stor...
static ClassifierFactory & Instance()
access to the ClassifierFactory singleton creates the instance if needed
virtual void SetTitleOffset(Float_t offset=1)
Set distance between the axis and the axis title Offset is a correction factor with respect to the "s...
void SetModelPersistence(Bool_t status)
virtual Int_t Write(const char *name=0, Int_t option=0, Int_t bufsize=0)
Write this object to the current directory.
TH1F * GetImportance(const int nbits, std::vector< Double_t > importances, std::vector< TString > varNames)
Double_t GetROCIntegral(DataLoader *loader, TString theMethodName)
DataSetManager * fDataSetManager
static long int sum(long int i)
Principal Components Analysis (PCA)
MethodBase * BookMethod(DataLoader *loader, TString theMethodName, TString methodTitle, TString theOption="")
Random number generator class based on M.
MsgLogger & Endl(MsgLogger &ml)
void AddOutput(Types::ETreeType type, Types::EAnalysisType analysisType)
This class displays a legend box (TPaveText) containing several legend entries.
std::vector< TMVA::VariableTransformBase * > fDefaultTrfs
ROOT output file.
virtual void LabelsOption(Option_t *option="h", Option_t *axis="X")
Set option(s) to draw axis with labels.
void EvaluateAllVariables(DataLoader *loader, TString options="")
iterates over all MVA input varables and evaluates them
Bool_t fROC
enable to calculate corelations
virtual void SetDirectory(TDirectory *dir)
By default when an histogram is created, it is added to the list of histogram objects in the current ...
TString & ReplaceAll(const TString &s1, const TString &s2)
const char * GetName() const
R__EXTERN TStyle * gStyle
static Types & Instance()
the the single instance of "Types" if existin already, or create it (Signleton)
void CheckForUnusedOptions() const
checks for unused options in option string
virtual void WriteEvaluationHistosToFile(Types::ETreeType treetype)
writes all MVA evaluation histograms to file
UInt_t GetNClasses() const
TH1F * EvaluateImportanceShort(DataLoader *loader, Types::EMVA theMethod, TString methodTitle, const char *theOption="")
virtual std::map< TString, Double_t > OptimizeTuningParameters(TString fomType="ROCIntegral", TString fitType="FitGA")
call the Optimzier with the set of paremeters and ranges that are meant to be tuned.
virtual void Draw(Option_t *option="")
Draw this legend with its current attributes.
THist< 1, float, THistStatContent, THistStatUncertainty > TH1F
OptionBase * DeclareOptionRef(T &ref, const TString &name, const TString &desc="")
A ROOT file is a suite of consecutive data records (TKey instances) with a well defined format...
virtual int MakeDirectory(const char *name)
Make a directory.
DataSetInfo & DefaultDataSetInfo()
UInt_t GetNTargets() const
TString fTransformations
option string given by construction (presently only "V")
tomato 1-D histogram with a float per channel (see TH1 documentation)}
void ToLower()
Change string to lower-case.
virtual TDirectory * mkdir(const char *name, const char *title="")
Create a sub-directory and return a pointer to the created directory.
void TrainAllMethods()
iterates through all booked methods and calls training
virtual void TestMulticlass()
test multiclass classification
virtual void SetTitle(const char *title="")
Set graph title.
void DataLoaderCopy(TMVA::DataLoader *des, TMVA::DataLoader *src)
void WriteDataInformation(DataSetInfo &fDataSetInfo)
void SetSilentFile(Bool_t status)
virtual Bool_t HasAnalysisType(Types::EAnalysisType type, UInt_t numberClasses, UInt_t numberTargets)=0
void CenterTitle(Bool_t center=kTRUE)
Center axis title.
UInt_t GetNVariables() const
Bool_t BeginsWith(const char *s, ECaseCompare cmp=kExact) const
IMethod * GetMethod(const TString &datasetname, const TString &title) const
DataSet * GetDataSet() const
returns data set
void AddVariable(const TString &expression, const TString &title, const TString &unit, char type='F', Double_t min=0, Double_t max=0)
TH1F * EvaluateImportanceRandom(DataLoader *loader, UInt_t nseeds, Types::EMVA theMethod, TString methodTitle, const char *theOption="")
overwrite existing object with same name
const TString & GetMethodName() const
static void AddDirectory(Bool_t add=kTRUE)
Sets the flag controlling the automatic add of histograms in memory.
TString GetWeightFileName() const
retrieve weight file name
Float_t GetValue(UInt_t ivar) const
return value of i'th variable
virtual void SetBarWidth(Float_t width=0.5)
virtual const char * GetName() const
Returns name of object.
const char * Data() const
virtual void SetRangeUser(Double_t ufirst, Double_t ulast)
Set the viewing range for the axis from ufirst to ulast (in user coordinates).
Types::EAnalysisType GetAnalysisType() const
static void InhibitOutput()
static void SetIsTraining(Bool_t)
when this static function is called, it sets the flag whether events with negative event weight shoul...
DataSetInfo & fDataSetInfo
TH1F * EvaluateImportance(DataLoader *loader, VIType vitype, Types::EMVA theMethod, TString methodTitle, const char *theOption="")
Evaluate Variable Importance.
Bool_t IsSignal(const Event *ev) const
virtual void SetGrid(Int_t valuex=1, Int_t valuey=1)
Bool_t fModelPersistence
the training type
TTree * GetTree(Types::ETreeType type)
create the test/trainings tree with all the variables, the weights, the classes, the targets...
void ReadStateFromFile()
Function to write options and weights to file.
virtual void MakeClass(const TString &classFileName=TString("")) const
create reader class for method (classification only at present)
double pow(double, double)
Bool_t DoMulticlass() const
std::vector< std::vector< double > > Data
virtual void ParseOptions()
options parser
void SetupMethod()
setup of methods
void SetMinType(EMsgType minType)
void SetDrawProgressBar(Bool_t d)
TH1F * EvaluateImportanceAll(DataLoader *loader, Types::EMVA theMethod, TString methodTitle, const char *theOption="")
Types::EMVA GetMethodType() const
virtual TObject * Clone(const char *newname="") const
Make a clone of an object using the Streamer facility.
Bool_t IsModelPersistence()
std::map< TString, MVector * > fMethodsMap
Bool_t fSilentFile
enable to calculate ROC values
virtual Double_t GetEfficiency(const TString &, Types::ETreeType, Double_t &err)
fill background efficiency (resp.
void CreateVariableTransforms(const TString &trafoDefinition, TMVA::DataSetInfo &dataInfo, TMVA::TransformationHandler &transformationHandler, TMVA::MsgLogger &log)
virtual std::vector< Float_t > GetMulticlassEfficiency(std::vector< std::vector< Float_t > > &purity)
void PrintHelpMessage(const TString &datasetname, const TString &methodTitle="") const
Print predefined help message of classifier iterate over methods and test.
virtual void MakeClass(const TString &classFileName=TString("")) const =0
const TMatrixD * GetCovarianceMatrix() const
TMatrixT< Double_t > TMatrixD
const Int_t MinNoTrainingEvents
virtual ~Factory()
destructor delete fATreeEvent;
virtual void PrintHelpMessage() const =0
std::map< TString, Double_t > OptimizeAllMethods(TString fomType="ROCIntegral", TString fitType="FitGA")
iterates through all booked methods and sees if they use parameter tuning and if so.
TDirectory * RootBaseDir()
virtual Int_t Write(const char *name=0, Int_t option=0, Int_t bufsize=0)
Write this object to the current directory.
Results * GetResults(const TString &, Types::ETreeType type, Types::EAnalysisType analysistype)
TString info(resultsName+"/"); switch(type) { case Types::kTraining: info += "kTraining/"; break; cas...
std::string GetMethodName(TCppMethod_t)
DataSetManager * GetDataSetManager()
Service class for 2-Dim histogram classes.
Double_t GetROCIntegral()
ROC Integral (AUC)
R__EXTERN TSystem * gSystem
virtual void AddRow(const Double_t *x)
Add a data point and update the covariance matrix.
Long64_t GetNEvtSigTest()
return number of signal test events in dataset
virtual void SetFillColor(Color_t fcolor)
Set the fill area color.
Bool_t fCorrelations
verbose mode
void EvaluateAllMethods(void)
iterates over all MVAs that have been booked, and calls their evaluation methods
ClassInfo * GetClassInfo(Int_t clNum) const
virtual void SetBinContent(Int_t bin, Double_t content)
Set bin content see convention for numbering bins in TH1::GetBin In case the bin number is greater th...
const TMatrixD * CorrelationMatrix(const TString &className) const
char * Form(const char *fmt,...)
DataSetManager * fDataSetManager
static Int_t GetColor(const char *hexcolor)
Static method returning color number for color specified by hex color string of form: "#rrggbb"...
virtual const char * GetName() const
Returns name of object.
virtual Double_t GetSignificance() const
compute significance of mean difference significance = |<S> - |/Sqrt(RMS_S2 + RMS_B2) ...
void Greetings()
print welcome message options are: kLogoWelcomeMsg, kIsometricWelcomeMsg, kLeanWelcomeMsg ...
const TCut & GetCut(Int_t i) const
virtual void MakePrincipals()
Perform the principal components analysis.
void SetBoostedMethodName(TString methodName)
void SetVerbose(Bool_t v=kTRUE)
Long64_t GetNEvtBkgdTest()
return number of background test events in dataset
virtual void SetTitleSize(Float_t size=0.04)
Set size of axis title The size is expressed in per cent of the pad width.
TH2 * CreateCorrelationMatrixHist(const TMatrixD *m, const TString &hName, const TString &hTitle) const
const TString & GetSplitOptions() const
const Event * GetEvent() const
void SetCurrentType(Types::ETreeType type) const
void PrepareTrainingAndTestTree(const TCut &cut, const TString &splitOpt)
virtual const char * GetName() const
Returns name of object.
virtual void CheckSetup()
check may be overridden by derived class (sometimes, eg, fitters are used which can only be implement...
TLegendEntry * AddEntry(const TObject *obj, const char *label="", Option_t *option="lpf")
Add a new entry to this legend.
virtual Double_t GetROCIntegral(TH1D *histS, TH1D *histB) const
calculate the area (integral) under the ROC curve as a overall quality measure of the classification ...
void SetFile(TFile *file)
Long64_t GetNEvents(Types::ETreeType type=Types::kMaxTreeType) const
static void DestroyInstance()
static function: destroy TMVA instance
DataSetInfo & DataInfo() const
VariableInfo & GetVariableInfo(Int_t i)
virtual void SetBinLabel(Int_t bin, const char *label)
Set label for bin.
void AddPreDefVal(const T &)
void ProcessSetup()
process all options the "CheckForUnusedOptions" is done in an independent call, since it may be overr...
void SetUseColor(Bool_t uc)
void SetConfigName(const char *n)
void SetSource(const std::string &source)
TGraph * GetROCCurve(DataLoader *loader, TString theMethodName, Bool_t fLegend=kTRUE)
Bool_t DoRegression() const
void SetTitleXOffset(Float_t offset=1)
virtual void MakeClass(const TString &datasetname, const TString &methodTitle="") const
virtual Double_t GetSeparation(TH1 *, TH1 *) const
compute "separation" defined as <s2> = (1/2) Int_-oo..+oo { (S(x) - B(x))^2/(S(x) + B(x)) dx } ...
TGraph * GetROCCurve(const UInt_t points=100)
virtual void TestRegression(Double_t &bias, Double_t &biasT, Double_t &dev, Double_t &devT, Double_t &rms, Double_t &rmsT, Double_t &mInf, Double_t &mInfT, Double_t &corr, Types::ETreeType type)
calculate <sum-of-deviation-squared> of regression output versus "true" value from test sample ...
DataSetManager * fDataSetManager
virtual Bool_t cd(const char *path=0)
Change current directory to "this" directory.
Bool_t Verbose(void) const
Factory(TString theJobName, TFile *theTargetFile, TString theOption="")
standard constructor jobname : this name will appear in all weight file names produced by the MVAs th...
DataInputHandler & DataInput()
void PrintHelpMessage() const
prints out method-specific help method
const TString & GetOptions() const
Bool_t Contains(const char *pat, ECaseCompare cmp=kExact) const
void SetWeightFileDir(TString fileDir)
set directory of weight file
TString fJobName
used in contructor wihtout file
A Graph is a graphics object made of two arrays X and Y with npoints each.
void DeleteAllMethods(void)
delete methods
void SetOptStat(Int_t stat=1)
The type of information printed in the histogram statistics box can be selected via the parameter mod...
virtual Double_t GetTrainingEfficiency(const TString &)
const std::vector< Event * > & GetEventCollection(Types::ETreeType type=Types::kMaxTreeType) const
std::vector< TString > GetListOfVariables() const
returns list of variables
Long64_t GetNTrainingEvents() const
Types::EAnalysisType fAnalysisType
jobname, used as extension in weight file names
std::vector< IMethod * > MVector
Double_t GetSignalReferenceCut() const
virtual void Print() const
get maximum length of variable names
static void EnableOutput()
TString GetMethodTypeName() const
const TString & GetLabel() const
virtual void SetTitle(const char *title="")
Set the title of the TNamed.
void SetTestvarName(const TString &v="")
virtual void TestClassification()
initialization
virtual void SetAnalysisType(Types::EAnalysisType type)
void SetConfigDescription(const char *d)
Bool_t fVerbose
List of transformations to test.
const TString & GetTestvarName() const