88 TFile* TMVA::Factory::fgTargetFile = 0;
92 #define RECREATE_METHODS kTRUE
103 : Configurable ( theOption ),
104 fDataSetManager (
NULL ),
105 fDataInputHandler ( new DataInputHandler ),
106 fTransformations (
"I" ),
108 fJobName ( jobName ),
109 fDataAssignType ( kAssignEvents ),
110 fATreeEvent (
NULL ),
111 fAnalysisType ( Types::kClassification )
113 fgTargetFile = theTargetFile;
116 fDataSetManager =
new DataSetManager( *fDataInputHandler );
120 if (
gTools().CheckForSilentOption( GetOptions() ))
Log().InhibitOutput();
124 SetConfigDescription(
"Configuration options for Factory running" );
125 SetConfigName( GetName() );
139 DeclareOptionRef( fVerbose,
"V",
"Verbose flag" );
140 DeclareOptionRef( color,
"Color",
"Flag for coloured screen output (default: True, if in batch mode: False)" );
141 DeclareOptionRef( fTransformations,
"Transformations",
"List of transformations to test; formatting example: \"Transformations=I;D;P;U;G,D\", for identity, decorrelation, PCA, Uniform and Gaussianisation followed by decorrelation transformations" );
142 DeclareOptionRef( silent,
"Silent",
"Batch mode: boolean silent flag inhibiting any output from TMVA after the creation of the factory class object (default: False)" );
143 DeclareOptionRef( drawProgressBar,
144 "DrawProgressBar",
"Draw progress bar to display training, testing and evaluation schedule (default: True)" );
147 DeclareOptionRef( analysisType,
148 "AnalysisType",
"Set the analysis type (Classification, Regression, Multiclass, Auto) (default: Auto)" );
149 AddPreDefVal(
TString(
"Classification"));
150 AddPreDefVal(
TString(
"Regression"));
151 AddPreDefVal(
TString(
"Multiclass"));
155 CheckForUnusedOptions();
165 if ( analysisType ==
"classification" ) fAnalysisType = Types::kClassification;
166 else if( analysisType ==
"regression" ) fAnalysisType = Types::kRegression;
167 else if( analysisType ==
"multiclass" ) fAnalysisType = Types::kMulticlass;
168 else if( analysisType ==
"auto" ) fAnalysisType = Types::kNoAnalysisType;
190 std::vector<TMVA::VariableTransformBase*>::iterator trfIt = fDefaultTrfs.begin();
191 for (;trfIt != fDefaultTrfs.end(); trfIt++)
delete (*trfIt);
193 this->DeleteAllMethods();
194 delete fDataInputHandler;
198 delete fDataSetManager;
212 MVector::iterator itrMethod = fMethods.begin();
213 for (; itrMethod != fMethods.end(); itrMethod++) {
214 Log() <<
kDEBUG <<
"Delete method: " << (*itrMethod)->GetName() <<
Endl;
231 return fDataSetManager->AddDataSetInfo(dsi);
238 DataSetInfo* dsi = fDataSetManager->GetDataSetInfo(dsiName);
240 if (dsi!=0)
return *dsi;
242 return fDataSetManager->AddDataSetInfo(*(
new DataSetInfo(dsiName)));
255 assignTree->
Branch(
"type", &fATreeType,
"ATreeType/I" );
256 assignTree->
Branch(
"weight", &fATreeWeight,
"ATreeWeight/F" );
258 std::vector<VariableInfo>& vars = DefaultDataSetInfo().GetVariableInfos();
259 std::vector<VariableInfo>& tgts = DefaultDataSetInfo().GetTargetInfos();
260 std::vector<VariableInfo>& spec = DefaultDataSetInfo().GetSpectatorInfos();
262 if (!fATreeEvent) fATreeEvent =
new Float_t[vars.size()+tgts.size()+spec.size()];
264 for (
UInt_t ivar=0; ivar<vars.size(); ivar++) {
265 TString vname = vars[ivar].GetExpression();
266 assignTree->
Branch( vname, &(fATreeEvent[ivar]), vname +
"/F" );
269 for (
UInt_t itgt=0; itgt<tgts.size(); itgt++) {
270 TString vname = tgts[itgt].GetExpression();
271 assignTree->
Branch( vname, &(fATreeEvent[vars.size()+itgt]), vname +
"/F" );
274 for (
UInt_t ispc=0; ispc<spec.size(); ispc++) {
275 TString vname = spec[ispc].GetExpression();
276 assignTree->
Branch( vname, &(fATreeEvent[vars.size()+tgts.size()+ispc]), vname +
"/F" );
334 const std::vector<Double_t>& event,
Double_t weight )
336 ClassInfo* theClass = DefaultDataSetInfo().AddClass(className);
345 if (clIndex>=fTrainAssignTree.size()) {
346 fTrainAssignTree.resize(clIndex+1, 0);
347 fTestAssignTree.resize(clIndex+1, 0);
350 if (fTrainAssignTree[clIndex]==0) {
351 fTrainAssignTree[clIndex] = CreateEventAssignTrees(
Form(
"TrainAssignTree_%s", className.
Data()) );
352 fTestAssignTree[clIndex] = CreateEventAssignTrees(
Form(
"TestAssignTree_%s", className.
Data()) );
355 fATreeType = clIndex;
356 fATreeWeight = weight;
357 for (
UInt_t ivar=0; ivar<
event.size(); ivar++) fATreeEvent[ivar] = event[ivar];
360 else fTestAssignTree[clIndex]->Fill();
369 return fTrainAssignTree[clIndex]!=0;
377 UInt_t size = fTrainAssignTree.size();
378 for(
UInt_t i=0; i<size; i++) {
379 if(!UserAssignEvents(i))
continue;
380 const TString& className = DefaultDataSetInfo().GetClassInfo(i)->GetName();
381 SetWeightExpression(
"weight", className );
399 Log() <<
kFATAL <<
"<AddTree> cannot interpret tree type: \"" << treetype
400 <<
"\" should be \"Training\" or \"Test\" or \"Training and Testing\"" <<
Endl;
402 AddTree( tree, className, weight, cut, tt );
411 Log() <<
kFATAL <<
"Tree does not exist (empty pointer)." <<
Endl;
413 DefaultDataSetInfo().AddClass( className );
419 Log() <<
kINFO <<
"Add Tree " << tree->
GetName() <<
" of type " << className
421 DataInput().AddTree( tree, className, weight, cut, tt );
429 AddTree( signal,
"Signal", weight,
TCut(
""), treetype );
438 TTree* signalTree =
new TTree(
"TreeS",
"Tree (S)" );
441 Log() <<
kINFO <<
"Create TTree objects from ASCII input files ... \n- Signal file : \""
445 AddTree( signalTree,
"Signal", weight,
TCut(
""), treetype );
452 AddTree( signal,
"Signal", weight,
TCut(
""), treetype );
460 AddTree( signal,
"Background", weight,
TCut(
""), treetype );
468 TTree* bkgTree =
new TTree(
"TreeB",
"Tree (B)" );
471 Log() <<
kINFO <<
"Create TTree objects from ASCII input files ... \n- Background file : \""
475 AddTree( bkgTree,
"Background", weight,
TCut(
""), treetype );
482 AddTree( signal,
"Background", weight,
TCut(
""), treetype );
489 AddTree( tree,
"Signal", weight );
496 AddTree( tree,
"Background", weight );
522 DataInput().AddTree( datFileS,
"Signal", signalWeight );
523 DataInput().AddTree( datFileB,
"Background", backgroundWeight );
543 DefaultDataSetInfo().AddVariable( expression, title, unit, min, max, type );
552 DefaultDataSetInfo().AddVariable( expression,
"",
"", min, max, type );
564 DefaultDataSetInfo().AddTarget( expression, title, unit, min, max );
573 DefaultDataSetInfo().AddSpectator( expression, title, unit, min, max );
581 return AddDataSet(
"Default" );
589 for (std::vector<TString>::iterator it=theVariables->begin();
590 it!=theVariables->end(); it++) AddVariable(*it);
597 DefaultDataSetInfo().SetWeightExpression(variable,
"Signal");
604 DefaultDataSetInfo().SetWeightExpression(variable,
"Background");
613 SetSignalWeightExpression(variable);
614 SetBackgroundWeightExpression(variable);
616 else DefaultDataSetInfo().SetWeightExpression( variable, className );
622 SetCut(
TCut(cut), className );
629 DefaultDataSetInfo().SetCut( cut, className );
636 AddCut(
TCut(cut), className );
643 DefaultDataSetInfo().AddCut( cut, className );
653 SetInputTreesFromEventAssignTrees();
657 DefaultDataSetInfo().SetSplitOptions(
Form(
"nTrain_Signal=%i:nTrain_Background=%i:nTest_Signal=%i:nTest_Background=%i:%s",
658 NsigTrain, NbkgTrain, NsigTest, NbkgTest, otherOpt.
Data()) );
667 SetInputTreesFromEventAssignTrees();
671 DefaultDataSetInfo().SetSplitOptions(
Form(
"nTrain_Signal=%i:nTrain_Background=%i:nTest_Signal=%i:nTest_Background=%i:SplitMode=Random:EqualTrainSample:!V",
672 Ntrain, Ntrain, Ntest, Ntest) );
681 SetInputTreesFromEventAssignTrees();
683 DefaultDataSetInfo().PrintClasses();
685 DefaultDataSetInfo().SetSplitOptions( opt );
694 SetInputTreesFromEventAssignTrees();
696 Log() <<
kINFO <<
"Preparing trees for training and testing..." <<
Endl;
697 AddCut( sigcut,
"Signal" );
698 AddCut( bkgcut,
"Background" );
700 DefaultDataSetInfo().SetSplitOptions( splitOpt );
709 if( DefaultDataSetInfo().GetNClasses()==2
710 && DefaultDataSetInfo().GetClassInfo(
"Signal") !=
NULL
711 && DefaultDataSetInfo().GetClassInfo(
"Background") !=
NULL
714 }
else if( DefaultDataSetInfo().GetNClasses() >= 2 ){
717 Log() <<
kFATAL <<
"No analysis type for " << DefaultDataSetInfo().GetNClasses() <<
" classes and "
718 << DefaultDataSetInfo().GetNTargets() <<
" regression targets." <<
Endl;
724 Log() <<
kFATAL <<
"Booking failed since method with title <"
725 << methodTitle <<
"> already exists"
736 "Number of times the classifier will be boosted" );
746 DefaultDataSetInfo(),
751 Log() <<
"Boost Number is " << boostNum <<
" > 0: train boosted classifier" <<
Endl;
755 DefaultDataSetInfo(),
759 Log() <<
kFATAL <<
"Method with type kBoost cannot be casted to MethodCategory. /Factory" <<
Endl;
766 if (method==0)
return 0;
772 Log() <<
kFATAL <<
"Method with type kCategory cannot be casted to MethodCategory. /Factory" <<
Endl;
778 DefaultDataSetInfo().GetNClasses(),
779 DefaultDataSetInfo().GetNTargets() )) {
782 Log() <<
"regression with " << DefaultDataSetInfo().GetNTargets() <<
" targets." <<
Endl;
785 Log() <<
"multiclass classification with " << DefaultDataSetInfo().GetNClasses() <<
" classes." <<
Endl;
788 Log() <<
"classification with " << DefaultDataSetInfo().GetNClasses() <<
" classes." <<
Endl;
802 fMethods.push_back( method );
823 MVector::const_iterator itrMethod = fMethods.begin();
824 MVector::const_iterator itrMethodEnd = fMethods.end();
826 for (; itrMethod != itrMethodEnd; itrMethod++) {
841 DefaultDataSetInfo().GetDataSet();
849 for (
UInt_t cls = 0; cls < DefaultDataSetInfo().GetNClasses() ; cls++) {
850 m = DefaultDataSetInfo().CorrelationMatrix(DefaultDataSetInfo().GetClassInfo(cls)->GetName());
851 h = DefaultDataSetInfo().CreateCorrelationMatrixHist(m,
TString(
"CorrelationMatrix")+DefaultDataSetInfo().GetClassInfo(cls)->GetName(),
852 "Correlation Matrix ("+ DefaultDataSetInfo().GetClassInfo(cls)->GetName() +
TString(
")"));
860 m = DefaultDataSetInfo().CorrelationMatrix(
"Signal" );
861 h = DefaultDataSetInfo().CreateCorrelationMatrixHist(m,
"CorrelationMatrixS",
"Correlation Matrix (signal)");
867 m = DefaultDataSetInfo().CorrelationMatrix(
"Background" );
868 h = DefaultDataSetInfo().CreateCorrelationMatrixHist(m,
"CorrelationMatrixB",
"Correlation Matrix (background)");
874 m = DefaultDataSetInfo().CorrelationMatrix(
"Regression" );
875 h = DefaultDataSetInfo().CreateCorrelationMatrixHist(m,
"CorrelationMatrix",
"Correlation Matrix");
887 processTrfs = fTransformations;
890 std::vector<TMVA::TransformationHandler*> trfs;
894 std::vector<TString>::iterator trfsDefIt = trfsDef.begin();
895 for (; trfsDefIt!=trfsDef.end(); trfsDefIt++) {
900 Log() <<
kINFO <<
"current transformation string: '" << trfS.
Data() <<
"'" <<
Endl;
902 DefaultDataSetInfo(),
906 if (trfS.
BeginsWith(
'I')) identityTrHandler = trfs.back();
909 const std::vector<Event*>& inputEvents = DefaultDataSetInfo().GetDataSet()->GetEventCollection();
912 std::vector<TMVA::TransformationHandler*>::iterator trfIt = trfs.begin();
914 for (;trfIt != trfs.end(); trfIt++) {
916 (*trfIt)->SetRootDir(RootBaseDir());
917 (*trfIt)->CalcTransformations(inputEvents);
922 for (trfIt = trfs.begin(); trfIt != trfs.end(); trfIt++)
delete *trfIt;
934 MVector::iterator itrMethod;
937 for( itrMethod = fMethods.begin(); itrMethod != fMethods.end(); ++itrMethod ) {
941 Log() <<
kFATAL <<
"Dynamic cast to MethodBase failed" <<
Endl;
947 <<
" not trained (training tree has less entries ["
949 <<
"] than required [" << MinNoTrainingEvents <<
"]" <<
Endl;
958 Log() <<
kINFO <<
"Optimization of tuning paremters finished for Method:"<<mva->
GetName() <<
Endl;
967 if(fDataInputHandler->GetEntries() <=1) {
968 Log() <<
kFATAL <<
"No input data for the training provided!" <<
Endl;
972 Log() <<
kFATAL <<
"You want to do regression training without specifying a target." <<
Endl;
974 && DefaultDataSetInfo().GetNClasses() < 2 )
975 Log() <<
kFATAL <<
"You want to do classification training, but specified less than two classes." <<
Endl;
980 WriteDataInformation();
983 if (fMethods.empty()) {
990 Log() <<
kINFO <<
"Train all methods for "
994 MVector::iterator itrMethod;
997 for( itrMethod = fMethods.begin(); itrMethod != fMethods.end(); ++itrMethod ) {
1000 if(mva==0)
continue;
1004 <<
" not trained (training tree has less entries ["
1006 <<
"] than required [" << MinNoTrainingEvents <<
"]" <<
Endl;
1021 Log() <<
kINFO <<
"Ranking input variables (method specific)..." <<
Endl;
1022 for (itrMethod = fMethods.begin(); itrMethod != fMethods.end(); itrMethod++) {
1027 const Ranking* ranking = (*itrMethod)->CreateRanking();
1028 if (ranking != 0) ranking->
Print();
1029 else Log() <<
kINFO <<
"No variable ranking supplied by classifier: "
1041 Log() <<
kINFO <<
"=== Destroy and recreate all methods via weight files for testing ===" << Endl <<
Endl;
1043 RootBaseDir()->cd();
1046 for (
UInt_t i=0; i<fMethods.size(); i++) {
1064 dataSetInfo, weightfile ) );
1067 if( !methCat )
Log() <<
kFATAL <<
"Method with type kCategory cannot be casted to MethodCategory. /Factory" <<
Endl;
1090 if (fMethods.empty()) {
1097 MVector::iterator itrMethod = fMethods.begin();
1098 MVector::iterator itrMethodEnd = fMethods.end();
1099 for (; itrMethod != itrMethodEnd; itrMethod++) {
1102 if(mva==0)
continue;
1106 (analysisType ==
Types::kMulticlass ?
"Multiclass classification" :
"Classification")) <<
" performance" <<
Endl;
1117 if (methodTitle !=
"") {
1121 Log() <<
kWARNING <<
"<MakeClass> Could not find classifier \"" << methodTitle
1122 <<
"\" in list" <<
Endl;
1128 MVector::const_iterator itrMethod = fMethods.begin();
1129 MVector::const_iterator itrMethodEnd = fMethods.end();
1130 for (; itrMethod != itrMethodEnd; itrMethod++) {
1132 if(method==0)
continue;
1145 if (methodTitle !=
"") {
1149 Log() <<
kWARNING <<
"<PrintHelpMessage> Could not find classifier \"" << methodTitle
1150 <<
"\" in list" <<
Endl;
1156 MVector::const_iterator itrMethod = fMethods.begin();
1157 MVector::const_iterator itrMethodEnd = fMethods.end();
1158 for (; itrMethod != itrMethodEnd; itrMethod++) {
1160 if(method==0)
continue;
1172 Log() <<
kINFO <<
"Evaluating all variables..." <<
Endl;
1175 for (
UInt_t i=0; i<DefaultDataSetInfo().GetNVariables(); i++) {
1176 TString s = DefaultDataSetInfo().GetVariableInfo(i).GetLabel();
1177 if (options.
Contains(
"V")) s +=
":V";
1178 this->BookMethod(
"Variable", s );
1190 if (fMethods.empty()) {
1191 Log() <<
kINFO <<
"...nothing found to evaluate" <<
Endl;
1203 Int_t nmeth_used[2] = {0,0};
1205 std::vector<std::vector<TString> > mname(2);
1206 std::vector<std::vector<Double_t> > sig(2),
sep(2), roc(2);
1207 std::vector<std::vector<Double_t> > eff01(2), eff10(2), eff30(2), effArea(2);
1208 std::vector<std::vector<Double_t> > eff01err(2), eff10err(2), eff30err(2);
1209 std::vector<std::vector<Double_t> > trainEff01(2), trainEff10(2), trainEff30(2);
1211 std::vector<std::vector<Float_t> > multiclass_testEff;
1212 std::vector<std::vector<Float_t> > multiclass_trainEff;
1213 std::vector<std::vector<Float_t> > multiclass_testPur;
1214 std::vector<std::vector<Float_t> > multiclass_trainPur;
1216 std::vector<std::vector<Double_t> > biastrain(1);
1217 std::vector<std::vector<Double_t> > biastest(1);
1218 std::vector<std::vector<Double_t> > devtrain(1);
1219 std::vector<std::vector<Double_t> > devtest(1);
1220 std::vector<std::vector<Double_t> > rmstrain(1);
1221 std::vector<std::vector<Double_t> > rmstest(1);
1222 std::vector<std::vector<Double_t> > minftrain(1);
1223 std::vector<std::vector<Double_t> > minftest(1);
1224 std::vector<std::vector<Double_t> > rhotrain(1);
1225 std::vector<std::vector<Double_t> > rhotest(1);
1228 std::vector<std::vector<Double_t> > biastrainT(1);
1229 std::vector<std::vector<Double_t> > biastestT(1);
1230 std::vector<std::vector<Double_t> > devtrainT(1);
1231 std::vector<std::vector<Double_t> > devtestT(1);
1232 std::vector<std::vector<Double_t> > rmstrainT(1);
1233 std::vector<std::vector<Double_t> > rmstestT(1);
1234 std::vector<std::vector<Double_t> > minftrainT(1);
1235 std::vector<std::vector<Double_t> > minftestT(1);
1244 MVector::iterator itrMethod = fMethods.begin();
1245 MVector::iterator itrMethodEnd = fMethods.end();
1246 for (; itrMethod != itrMethodEnd; itrMethod++) {
1249 if(theMethod==0)
continue;
1253 doRegression =
kTRUE;
1261 biastest[0] .push_back( bias );
1262 devtest[0] .push_back( dev );
1263 rmstest[0] .push_back( rms );
1264 minftest[0] .push_back( mInf );
1265 rhotest[0] .push_back( rho );
1266 biastestT[0] .push_back( biasT );
1267 devtestT[0] .push_back( devT );
1268 rmstestT[0] .push_back( rmsT );
1269 minftestT[0] .push_back( mInfT );
1272 biastrain[0] .push_back( bias );
1273 devtrain[0] .push_back( dev );
1274 rmstrain[0] .push_back( rms );
1275 minftrain[0] .push_back( mInf );
1276 rhotrain[0] .push_back( rho );
1277 biastrainT[0].push_back( biasT );
1278 devtrainT[0] .push_back( devT );
1279 rmstrainT[0] .push_back( rmsT );
1280 minftrainT[0].push_back( mInfT );
1285 Log() <<
kINFO <<
"Write evaluation histograms to file" <<
Endl;
1290 doMulticlass =
kTRUE;
1292 Log() <<
kINFO <<
"Write evaluation histograms to file" <<
Endl;
1318 eff01err[isel].push_back( err );
1320 eff10err[isel].push_back( err );
1322 eff30err[isel].push_back( err );
1331 Log() <<
kINFO <<
"Write evaluation histograms to file" <<
Endl;
1338 std::vector<TString> vtemps = mname[0];
1339 std::vector< std::vector<Double_t> > vtmp;
1340 vtmp.push_back( devtest[0] );
1341 vtmp.push_back( devtrain[0] );
1342 vtmp.push_back( biastest[0] );
1343 vtmp.push_back( biastrain[0] );
1344 vtmp.push_back( rmstest[0] );
1345 vtmp.push_back( rmstrain[0] );
1346 vtmp.push_back( minftest[0] );
1347 vtmp.push_back( minftrain[0] );
1348 vtmp.push_back( rhotest[0] );
1349 vtmp.push_back( rhotrain[0] );
1350 vtmp.push_back( devtestT[0] );
1351 vtmp.push_back( devtrainT[0] );
1352 vtmp.push_back( biastestT[0] );
1353 vtmp.push_back( biastrainT[0]);
1354 vtmp.push_back( rmstestT[0] );
1355 vtmp.push_back( rmstrainT[0] );
1356 vtmp.push_back( minftestT[0] );
1357 vtmp.push_back( minftrainT[0]);
1360 devtest[0] = vtmp[0];
1361 devtrain[0] = vtmp[1];
1362 biastest[0] = vtmp[2];
1363 biastrain[0] = vtmp[3];
1364 rmstest[0] = vtmp[4];
1365 rmstrain[0] = vtmp[5];
1366 minftest[0] = vtmp[6];
1367 minftrain[0] = vtmp[7];
1368 rhotest[0] = vtmp[8];
1369 rhotrain[0] = vtmp[9];
1370 devtestT[0] = vtmp[10];
1371 devtrainT[0] = vtmp[11];
1372 biastestT[0] = vtmp[12];
1373 biastrainT[0] = vtmp[13];
1374 rmstestT[0] = vtmp[14];
1375 rmstrainT[0] = vtmp[15];
1376 minftestT[0] = vtmp[16];
1377 minftrainT[0] = vtmp[17];
1379 else if (doMulticlass) {
1385 for (
Int_t k=0; k<2; k++) {
1386 std::vector< std::vector<Double_t> > vtemp;
1387 vtemp.push_back( effArea[k] );
1388 vtemp.push_back( eff10[k] );
1389 vtemp.push_back( eff01[k] );
1390 vtemp.push_back( eff30[k] );
1391 vtemp.push_back( eff10err[k] );
1392 vtemp.push_back( eff01err[k] );
1393 vtemp.push_back( eff30err[k] );
1394 vtemp.push_back( trainEff10[k] );
1395 vtemp.push_back( trainEff01[k] );
1396 vtemp.push_back( trainEff30[k] );
1397 vtemp.push_back( sig[k] );
1398 vtemp.push_back(
sep[k] );
1399 vtemp.push_back( roc[k] );
1400 std::vector<TString> vtemps = mname[k];
1402 effArea[k] = vtemp[0];
1403 eff10[k] = vtemp[1];
1404 eff01[k] = vtemp[2];
1405 eff30[k] = vtemp[3];
1406 eff10err[k] = vtemp[4];
1407 eff01err[k] = vtemp[5];
1408 eff30err[k] = vtemp[6];
1409 trainEff10[k] = vtemp[7];
1410 trainEff01[k] = vtemp[8];
1411 trainEff30[k] = vtemp[9];
1426 const Int_t nmeth = methodsNoCuts.size();
1427 const Int_t nvar = DefaultDataSetInfo().GetNVariables();
1428 if (!doRegression && !doMulticlass ) {
1434 std::vector<Double_t> rvec;
1442 std::vector<TString>* theVars =
new std::vector<TString>;
1443 std::vector<ResultsClassification*> mvaRes;
1444 for (itrMethod = methodsNoCuts.begin(); itrMethod != methodsNoCuts.end(); itrMethod++, ivar++) {
1449 theVars->back().ReplaceAll(
"MVA_",
"" );
1462 DataSet* defDs = DefaultDataSetInfo().GetDataSet();
1469 for (
Int_t im=0; im<nmeth; im++) {
1473 Log() <<
kWARNING <<
"Found NaN return value in event: " << ievt
1474 <<
" for method \"" << methodsNoCuts[im]->GetName() <<
"\"" <<
Endl;
1477 else dvec[im] = retval;
1480 if (DefaultDataSetInfo().IsSignal(ev)) { tpSig->
AddRow( dvec ); theMat = overlapS; }
1481 else { tpBkg->
AddRow( dvec ); theMat = overlapB; }
1484 for (
Int_t im=0; im<nmeth; im++) {
1485 for (
Int_t jm=im; jm<nmeth; jm++) {
1486 if ((dvec[im] - rvec[im])*(dvec[jm] - rvec[jm]) > 0) {
1488 if (im != jm) (*theMat)(jm,im)++;
1508 if (corrMatS != 0 && corrMatB != 0) {
1513 for (
Int_t im=0; im<nmeth; im++) {
1514 for (
Int_t jm=0; jm<nmeth; jm++) {
1515 mvaMatS(im,jm) = (*corrMatS)(im,jm);
1516 mvaMatB(im,jm) = (*corrMatB)(im,jm);
1521 std::vector<TString> theInputVars;
1524 for (
Int_t iv=0; iv<nvar; iv++) {
1525 theInputVars.push_back( DefaultDataSetInfo().GetVariableInfo( iv ).GetLabel() );
1526 for (
Int_t jm=0; jm<nmeth; jm++) {
1527 varmvaMatS(iv,jm) = (*corrMatS)(nmeth+iv,jm);
1528 varmvaMatB(iv,jm) = (*corrMatB)(nmeth+iv,jm);
1534 Log() <<
kINFO <<
"Inter-MVA correlation matrix (signal):" <<
Endl;
1538 Log() <<
kINFO <<
"Inter-MVA correlation matrix (background):" <<
Endl;
1543 Log() <<
kINFO <<
"Correlations between input variables and MVA response (signal):" <<
Endl;
1547 Log() <<
kINFO <<
"Correlations between input variables and MVA response (background):" <<
Endl;
1551 else Log() <<
kWARNING <<
"<TestAllMethods> cannot compute correlation matrices" <<
Endl;
1554 Log() <<
kINFO <<
"The following \"overlap\" matrices contain the fraction of events for which " <<
Endl;
1555 Log() <<
kINFO <<
"the MVAs 'i' and 'j' have returned conform answers about \"signal-likeness\"" <<
Endl;
1556 Log() <<
kINFO <<
"An event is signal-like, if its MVA output exceeds the following value:" <<
Endl;
1558 Log() <<
kINFO <<
"which correspond to the working point: eff(signal) = 1 - eff(background)" <<
Endl;
1561 if (nmeth != (
Int_t)fMethods.size())
1562 Log() <<
kINFO <<
"Note: no correlations and overlap with cut method are provided at present" <<
Endl;
1566 Log() <<
kINFO <<
"Inter-MVA overlap matrix (signal):" <<
Endl;
1570 Log() <<
kINFO <<
"Inter-MVA overlap matrix (background):" <<
Endl;
1594 TString hLine =
"-------------------------------------------------------------------------";
1595 Log() <<
kINFO <<
"Evaluation results ranked by smallest RMS on test sample:" <<
Endl;
1596 Log() <<
kINFO <<
"(\"Bias\" quotes the mean deviation of the regression from true target." <<
Endl;
1597 Log() <<
kINFO <<
" \"MutInf\" is the \"Mutual Information\" between regression and target." <<
Endl;
1598 Log() <<
kINFO <<
" Indicated by \"_T\" are the corresponding \"truncated\" quantities ob-" <<
Endl;
1599 Log() <<
kINFO <<
" tained when removing events deviating more than 2sigma from average.)" <<
Endl;
1601 Log() <<
kINFO <<
"MVA Method: <Bias> <Bias_T> RMS RMS_T | MutInf MutInf_T" <<
Endl;
1604 for (
Int_t i=0; i<nmeth_used[0]; i++) {
1605 Log() <<
kINFO <<
Form(
"%-15s:%#9.3g%#9.3g%#9.3g%#9.3g | %#5.3f %#5.3f",
1606 (
const char*)mname[0][i],
1607 biastest[0][i], biastestT[0][i],
1608 rmstest[0][i], rmstestT[0][i],
1609 minftest[0][i], minftestT[0][i] )
1614 Log() <<
kINFO <<
"Evaluation results ranked by smallest RMS on training sample:" <<
Endl;
1617 Log() <<
kINFO <<
"MVA Method: <Bias> <Bias_T> RMS RMS_T | MutInf MutInf_T" <<
Endl;
1620 for (
Int_t i=0; i<nmeth_used[0]; i++) {
1621 Log() <<
kINFO <<
Form(
"%-15s:%#9.3g%#9.3g%#9.3g%#9.3g | %#5.3f %#5.3f",
1622 (
const char*)mname[0][i],
1623 biastrain[0][i], biastrainT[0][i],
1624 rmstrain[0][i], rmstrainT[0][i],
1625 minftrain[0][i], minftrainT[0][i] )
1631 else if( doMulticlass ){
1633 TString hLine =
"--------------------------------------------------------------------------------";
1634 Log() <<
kINFO <<
"Evaluation results ranked by best signal efficiency times signal purity " <<
Endl;
1636 TString header=
"MVA Method ";
1637 for(
UInt_t icls = 0; icls<DefaultDataSetInfo().GetNClasses(); ++icls){
1638 header +=
Form(
"%-12s ",DefaultDataSetInfo().GetClassInfo(icls)->GetName().
Data());
1642 for (
Int_t i=0; i<nmeth_used[0]; i++) {
1643 TString res =
Form(
"%-15s",(
const char*)mname[0][i]);
1644 for(
UInt_t icls = 0; icls<DefaultDataSetInfo().GetNClasses(); ++icls){
1645 res +=
Form(
"%#1.3f ",(multiclass_testEff[i][icls])*(multiclass_testPur[i][icls]));
1655 TString hLine =
"--------------------------------------------------------------------------------";
1656 Log() <<
kINFO <<
"Evaluation results ranked by best signal efficiency and purity (area)" <<
Endl;
1658 Log() <<
kINFO <<
"MVA Signal efficiency at bkg eff.(error): | Sepa- Signifi- " <<
Endl;
1659 Log() <<
kINFO <<
"Method: @B=0.01 @B=0.10 @B=0.30 ROC-integ. | ration: cance: " <<
Endl;
1661 for (
Int_t k=0; k<2; k++) {
1662 if (k == 1 && nmeth_used[k] > 0) {
1664 Log() <<
kINFO <<
"Input Variables: " << Endl << hLine <<
Endl;
1666 for (
Int_t i=0; i<nmeth_used[k]; i++) {
1667 if (k == 1) mname[k][i].ReplaceAll(
"Variable_",
"" );
1668 if (
sep[k][i] < 0 || sig[k][i] < 0) {
1670 Log() <<
kINFO <<
Form(
"%-15s: %#1.3f(%02i) %#1.3f(%02i) %#1.3f(%02i) %#1.3f | -- --",
1671 (
const char*)mname[k][i],
1672 eff01[k][i],
Int_t(1000*eff01err[k][i]),
1673 eff10[k][i],
Int_t(1000*eff10err[k][i]),
1674 eff30[k][i],
Int_t(1000*eff30err[k][i]),
1675 effArea[k][i]) <<
Endl;
1678 Log() <<
kINFO <<
Form(
"%-15s: %#1.3f(%02i) %#1.3f(%02i) %#1.3f(%02i) %#1.3f | %#1.3f %#1.3f",
1679 (
const char*)mname[k][i],
1680 eff01[k][i],
Int_t(1000*eff01err[k][i]),
1681 eff10[k][i],
Int_t(1000*eff10err[k][i]),
1682 eff30[k][i],
Int_t(1000*eff30err[k][i]),
1684 sep[k][i], sig[k][i]) <<
Endl;
1690 Log() <<
kINFO <<
"Testing efficiency compared to training efficiency (overtraining check)" <<
Endl;
1692 Log() <<
kINFO <<
"MVA Signal efficiency: from test sample (from training sample) " <<
Endl;
1693 Log() <<
kINFO <<
"Method: @B=0.01 @B=0.10 @B=0.30 " <<
Endl;
1695 for (
Int_t k=0; k<2; k++) {
1696 if (k == 1 && nmeth_used[k] > 0) {
1698 Log() <<
kINFO <<
"Input Variables: " << Endl << hLine <<
Endl;
1700 for (
Int_t i=0; i<nmeth_used[k]; i++) {
1701 if (k == 1) mname[k][i].ReplaceAll(
"Variable_",
"" );
1702 Log() <<
kINFO <<
Form(
"%-15s: %#1.3f (%#1.3f) %#1.3f (%#1.3f) %#1.3f (%#1.3f)",
1703 (
const char*)mname[k][i],
1704 eff01[k][i],trainEff01[k][i],
1705 eff10[k][i],trainEff10[k][i],
1706 eff30[k][i],trainEff30[k][i]) <<
Endl;
1714 RootBaseDir()->cd();
IMethod * Create(const std::string &name, const TString &job, const TString &title, DataSetInfo &dsi, const TString &option)
creates the method if needed based on the method name using the creator function the factory has stor...
static ClassifierFactory & Instance()
access to the ClassifierFactory singleton creates the instance if needed
virtual Int_t Write(const char *name=0, Int_t option=0, Int_t bufsize=0)
Write this object to the current directory.
Principal Components Analysis (PCA)
void AddSignalTrainingEvent(const std::vector< Double_t > &event, Double_t weight=1.0)
add signal training event
void SetInputTrees(const TString &signalFileName, const TString &backgroundFileName, Double_t signalWeight=1.0, Double_t backgroundWeight=1.0)
virtual void MakeClass(const TString &methodTitle="") const
Print predefined help message of classifier iterate over methods and test.
void OptimizeAllMethods(TString fomType="ROCIntegral", TString fitType="FitGA")
iterates through all booked methods and sees if they use parameter tuning and if so.
static Vc_ALWAYS_INLINE int_v min(const int_v &x, const int_v &y)
MsgLogger & Endl(MsgLogger &ml)
static void SetIsTraining(Bool_t)
when this static function is called, it sets the flag whether events with negative event weight shoul...
void AddOutput(Types::ETreeType type, Types::EAnalysisType analysisType)
static void CreateVariableTransforms(const TString &trafoDefinition, TMVA::DataSetInfo &dataInfo, TMVA::TransformationHandler &transformationHandler, TMVA::MsgLogger &log)
create variable transformations
TString & ReplaceAll(const TString &s1, const TString &s2)
const char * GetName() const
Returns name of object.
virtual void WriteEvaluationHistosToFile(Types::ETreeType treetype)
writes all MVA evaluation histograms to file
void SetSignalWeightExpression(const TString &variable)
virtual std::map< TString, Double_t > OptimizeTuningParameters(TString fomType="ROCIntegral", TString fitType="FitGA")
call the Optimzier with the set of paremeters and ranges that are meant to be tuned.
void SetInputVariables(std::vector< TString > *theVariables)
fill input variables in data set
OptionBase * DeclareOptionRef(T &ref, const TString &name, const TString &desc="")
A ROOT file is a suite of consecutive data records (TKey instances) with a well defined format...
virtual void MakeClass(const TString &classFileName=TString("")) const =0
void AddBackgroundTrainingEvent(const std::vector< Double_t > &event, Double_t weight=1.0)
add signal training event
void AddSpectator(const TString &expression, const TString &title="", const TString &unit="", Double_t min=0, Double_t max=0)
user inserts target in data set info
void ToLower()
Change string to lower-case.
void AddVariable(const TString &expression, const TString &title, const TString &unit, char type='F', Double_t min=0, Double_t max=0)
user inserts discriminating variable in data set info
void TrainAllMethods()
iterates through all booked methods and calls training
virtual void TestMulticlass()
test multiclass classification
DataSetInfo & DefaultDataSetInfo()
default creation
virtual Bool_t HasAnalysisType(Types::EAnalysisType type, UInt_t numberClasses, UInt_t numberTargets)=0
void AddTrainingEvent(const TString &className, const std::vector< Double_t > &event, Double_t weight)
add signal training event
Bool_t BeginsWith(const char *s, ECaseCompare cmp=kExact) const
void AddTarget(const TString &expression, const TString &title="", const TString &unit="", Double_t min=0, Double_t max=0)
user inserts target in data set info
void SetSignalTree(TTree *signal, Double_t weight=1.0)
void WriteDataInformation()
put correlations of input data and a few (default + user selected) transformations into the root file...
const TString & GetMethodName() const
TString GetWeightFileName() const
retrieve weight file name
void AddTestEvent(const TString &className, const std::vector< Double_t > &event, Double_t weight)
add signal test event
void AddBackgroundTree(TTree *background, Double_t weight=1.0, Types::ETreeType treetype=Types::kMaxTreeType)
number of signal events (used to compute significance)
const char * Data() const
void AddEvent(const TString &className, Types::ETreeType tt, const std::vector< Double_t > &event, Double_t weight)
add event vector event : the order of values is: variables + targets + spectators ...
static Types & Instance()
the the single instance of "Types" if existin already, or create it (Signleton)
Types::EAnalysisType GetAnalysisType() const
IMethod * GetMethod(const TString &title) const
returns pointer to MVA that corresponds to given method title
ClassImp(TMVA::Factory) TMVA
standard constructor jobname : this name will appear in all weight file names produced by the MVAs th...
void ReadStateFromFile()
Function to write options and weights to file.
virtual void MakeClass(const TString &classFileName=TString("")) const
create reader class for method (classification only at present)
void EvaluateAllVariables(TString options="")
iterates over all MVA input varables and evaluates them
Bool_t DoMulticlass() const
std::vector< std::vector< double > > Data
virtual void ParseOptions()
options parser
void SetupMethod()
setup of methods
TTree * CreateEventAssignTrees(const TString &name)
create the data assignment tree (for event-wise data assignment by user)
void SetDrawProgressBar(Bool_t d)
Types::EMVA GetMethodType() const
virtual Double_t GetEfficiency(const TString &, Types::ETreeType, Double_t &err)
fill background efficiency (resp.
virtual std::vector< Float_t > GetMulticlassEfficiency(std::vector< std::vector< Float_t > > &purity)
TCppMethod_t GetMethod(TCppScope_t scope, TCppIndex_t imeth)
A specialized string object used for TTree selections.
const TMatrixD * GetCovarianceMatrix() const
TMatrixT< Double_t > TMatrixD
Bool_t UserAssignEvents(UInt_t clIndex)
const Int_t MinNoTrainingEvents
virtual ~Factory()
destructor delete fATreeEvent;
void AddBackgroundTestEvent(const std::vector< Double_t > &event, Double_t weight=1.0)
add signal training event
void SetCut(const TString &cut, const TString &className="")
DataSetInfo & AddDataSet(DataSetInfo &)
Results * GetResults(const TString &, Types::ETreeType type, Types::EAnalysisType analysistype)
TString info(resultsName+"/"); switch(type) { case Types::kTraining: info += "kTraining/"; break; cas...
std::string GetMethodName(TCppMethod_t)
Service class for 2-Dim histogram classes.
MethodBase * BookMethod(TString theMethodName, TString methodTitle, TString theOption="")
Book a classifier or regression method.
virtual void AddRow(const Double_t *x)
Add a data point and update the covariance matrix.
Long64_t GetNEvtSigTest()
return number of signal test events in dataset
TPaveLabel title(3, 27.1, 15, 28.7,"ROOT Environment and Tools")
void EvaluateAllMethods(void)
iterates over all MVAs that have been booked, and calls their evaluation methods
char * Form(const char *fmt,...)
DataSetManager * fDataSetManager
void AddCut(const TString &cut, const TString &className="")
virtual const char * GetName() const
Returns name of object.
void SetBackgroundWeightExpression(const TString &variable)
virtual Double_t GetSignificance() const
compute significance of mean difference significance = |<S> - |/Sqrt(RMS_S2 + RMS_B2) ...
void Greetings()
print welcome message options are: kLogoWelcomeMsg, kIsometricWelcomeMsg, kLeanWelcomeMsg ...
virtual void MakePrincipals()
Perform the principal components analysis.
void SetBoostedMethodName(TString methodName)
void SetVerbose(Bool_t v=kTRUE)
Long64_t GetNEvtBkgdTest()
return number of background test events in dataset
const Event * GetEvent() const
void SetInputTreesFromEventAssignTrees()
assign event-wise local trees to data set
void PrintHelpMessage(const TString &methodTitle="") const
Print predefined help message of classifier iterate over methods and test.
void SetCurrentType(Types::ETreeType type) const
virtual void SetDirectory(TDirectory *dir)
Change the tree's directory.
virtual void CheckSetup()
check may be overridden by derived class (sometimes, eg, fitters are used which can only be implement...
virtual void PrintHelpMessage() const =0
virtual Double_t GetROCIntegral(TH1D *histS, TH1D *histB) const
calculate the area (integral) under the ROC curve as a overall quality measure of the classification ...
void AddTree(TTree *tree, const TString &className, Double_t weight=1.0, const TCut &cut="", Types::ETreeType tt=Types::kMaxTreeType)
Long64_t GetNEvents(Types::ETreeType type=Types::kMaxTreeType) const
Float_t GetValue(UInt_t ivar) const
return value of i'th variable
static void DestroyInstance()
static function: destroy TMVA instance
DataSetInfo & DataInfo() const
RooCmdArg Verbose(Bool_t flag=kTRUE)
void ProcessSetup()
process all options the "CheckForUnusedOptions" is done in an independent call, since it may be overr...
static Vc_ALWAYS_INLINE int_v max(const int_v &x, const int_v &y)
void AddSignalTree(TTree *signal, Double_t weight=1.0, Types::ETreeType treetype=Types::kMaxTreeType)
number of signal events (used to compute significance)
virtual Long64_t ReadFile(const char *filename, const char *branchDescriptor="", char delimiter= ' ')
Create or simply read branches from filename.
void SetUseColor(Bool_t uc)
virtual Int_t Branch(TCollection *list, Int_t bufsize=32000, Int_t splitlevel=99, const char *name="")
Create one branch for each element in the collection.
Bool_t DoRegression() const
virtual Double_t GetSeparation(TH1 *, TH1 *) const
compute "separation" defined as <s2> = (1/2) Int_-oo..+oo { (S(x) - B(x))^2/(S(x) + B(x)) dx } ...
virtual void TestRegression(Double_t &bias, Double_t &biasT, Double_t &dev, Double_t &devT, Double_t &rms, Double_t &rmsT, Double_t &mInf, Double_t &mInfT, Double_t &corr, Types::ETreeType type)
calculate <sum-of-deviation-squared> of regression output versus "true" value from test sample ...
DataSetManager * fDataSetManager
void SetWeightExpression(const TString &variable, const TString &className="")
Log() << kWarning << DefaultDataSetInfo().GetNClasses() /*fClasses.size()*/ << Endl;.
Factory(TString theJobName, TFile *theTargetFile, TString theOption="")
void PrintHelpMessage() const
prints out method-specific help method
Bool_t Contains(const char *pat, ECaseCompare cmp=kExact) const
void DeleteAllMethods(void)
delete methods
virtual Double_t GetTrainingEfficiency(const TString &)
virtual Long64_t GetEntries() const
void AddSignalTestEvent(const std::vector< Double_t > &event, Double_t weight=1.0)
add signal testing event
Long64_t GetNTrainingEvents() const
A TTree object has a header with a name and a title.
std::vector< IMethod * > MVector
Double_t GetSignalReferenceCut() const
virtual void Print() const
get maximum length of variable names
TString GetMethodTypeName() const
void SetTestvarName(const TString &v="")
void SetTree(TTree *tree, const TString &className, Double_t weight)
set background tree
virtual void TestClassification()
initialization
void PrepareTrainingAndTestTree(const TCut &cut, const TString &splitOpt)
prepare the training and test trees -> same cuts for signal and background
virtual void SetAnalysisType(Types::EAnalysisType type)
const TString & GetTestvarName() const
void SetBackgroundTree(TTree *background, Double_t weight=1.0)