Logo ROOT   6.12/07
Reference Guide
Factory.cxx
Go to the documentation of this file.
1 // @(#)Root/tmva $Id$
2 // Author: Andreas Hoecker, Peter Speckmayer, Joerg Stelzer, Helge Voss, Kai Voss, Eckhard von Toerne, Jan Therhaag
3 // Updated by: Omar Zapata, Kim Albertsson
4 /**********************************************************************************
5  * Project: TMVA - a Root-integrated toolkit for multivariate data analysis *
6  * Package: TMVA *
7  * Class : Factory *
8  * Web : http://tmva.sourceforge.net *
9  * *
10  * Description: *
11  * Implementation (see header for description) *
12  * *
13  * Authors : *
14  * Andreas Hoecker <Andreas.Hocker@cern.ch> - CERN, Switzerland *
15  * Joerg Stelzer <stelzer@cern.ch> - DESY, Germany *
16  * Peter Speckmayer <peter.speckmayer@cern.ch> - CERN, Switzerland *
17  * Jan Therhaag <Jan.Therhaag@cern.ch> - U of Bonn, Germany *
18  * Eckhard v. Toerne <evt@uni-bonn.de> - U of Bonn, Germany *
19  * Helge Voss <Helge.Voss@cern.ch> - MPI-K Heidelberg, Germany *
20  * Kai Voss <Kai.Voss@cern.ch> - U. of Victoria, Canada *
21  * Omar Zapata <Omar.Zapata@cern.ch> - UdeA/ITM Colombia *
22  * Lorenzo Moneta <Lorenzo.Moneta@cern.ch> - CERN, Switzerland *
23  * Sergei Gleyzer <Sergei.Gleyzer@cern.ch> - U of Florida & CERN *
24  * Kim Albertsson <kim.albertsson@cern.ch> - LTU & CERN *
25  * *
26  * Copyright (c) 2005-2015: *
27  * CERN, Switzerland *
28  * U. of Victoria, Canada *
29  * MPI-K Heidelberg, Germany *
30  * U. of Bonn, Germany *
31  * UdeA/ITM, Colombia *
32  * U. of Florida, USA *
33  * *
34  * Redistribution and use in source and binary forms, with or without *
35  * modification, are permitted according to the terms listed in LICENSE *
36  * (http://tmva.sourceforge.net/LICENSE) *
37  **********************************************************************************/
38 
39 /*! \class TMVA::Factory
40 \ingroup TMVA
41 
42 This is the main MVA steering class.
43 It creates all MVA methods, and guides them through the training, testing and
44 evaluation phases.
45 */
46 
47 #include "TMVA/Factory.h"
48 
49 #include "TMVA/ClassifierFactory.h"
50 #include "TMVA/Config.h"
51 #include "TMVA/Configurable.h"
52 #include "TMVA/Tools.h"
53 #include "TMVA/Ranking.h"
54 #include "TMVA/DataSet.h"
55 #include "TMVA/IMethod.h"
56 #include "TMVA/MethodBase.h"
57 #include "TMVA/DataInputHandler.h"
58 #include "TMVA/DataSetManager.h"
59 #include "TMVA/DataSetInfo.h"
60 #include "TMVA/DataLoader.h"
61 #include "TMVA/MethodBoost.h"
62 #include "TMVA/MethodCategory.h"
63 #include "TMVA/ROCCalc.h"
64 #include "TMVA/ROCCurve.h"
65 #include "TMVA/MsgLogger.h"
66 
67 #include "TMVA/VariableInfo.h"
68 #include "TMVA/VariableTransform.h"
69 
70 #include "TMVA/Results.h"
72 #include "TMVA/ResultsRegression.h"
73 #include "TMVA/ResultsMulticlass.h"
74 #include <list>
75 #include <bitset>
76 
77 #include "TMVA/Types.h"
78 
79 #include "TROOT.h"
80 #include "TFile.h"
81 #include "TTree.h"
82 #include "TLeaf.h"
83 #include "TEventList.h"
84 #include "TH2.h"
85 #include "TText.h"
86 #include "TLegend.h"
87 #include "TGraph.h"
88 #include "TStyle.h"
89 #include "TMatrixF.h"
90 #include "TMatrixDSym.h"
91 #include "TMultiGraph.h"
92 #include "TPaletteAxis.h"
93 #include "TPrincipal.h"
94 #include "TMath.h"
95 #include "TObjString.h"
96 #include "TSystem.h"
97 #include "TCanvas.h"
98 
100 //const Int_t MinNoTestEvents = 1;
101 
103 
104 #define READXML kTRUE
105 
106 //number of bits for bitset
107 #define VIBITS 32
108 
109 
110 
111 ////////////////////////////////////////////////////////////////////////////////
112 /// Standard constructor.
113 ///
114 /// - jobname : this name will appear in all weight file names produced by the MVAs
115 /// - theTargetFile : output ROOT file; the test tree and all evaluation plots
116 /// will be stored here
117 /// - theOption : option string; currently: "V" for verbose
118 
119 TMVA::Factory::Factory( TString jobName, TFile* theTargetFile, TString theOption )
120 : Configurable ( theOption ),
121  fTransformations ( "I" ),
122  fVerbose ( kFALSE ),
123  fCorrelations ( kFALSE ),
124  fROC ( kTRUE ),
125  fSilentFile ( kFALSE ),
126  fJobName ( jobName ),
127  fAnalysisType ( Types::kClassification ),
128  fModelPersistence (kTRUE)
129 {
130  fgTargetFile = theTargetFile;
132 
133  // render silent
134  if (gTools().CheckForSilentOption( GetOptions() )) Log().InhibitOutput(); // make sure is silent if wanted to
135 
136 
137  // init configurable
138  SetConfigDescription( "Configuration options for Factory running" );
139  SetConfigName( GetName() );
140 
141  // histograms are not automatically associated with the current
142  // directory and hence don't go out of scope when closing the file
143  // TH1::AddDirectory(kFALSE);
144  Bool_t silent = kFALSE;
145 #ifdef WIN32
146  // under Windows, switch progress bar and color off by default, as the typical windows shell doesn't handle these (would need different sequences..)
147  Bool_t color = kFALSE;
148  Bool_t drawProgressBar = kFALSE;
149 #else
150  Bool_t color = !gROOT->IsBatch();
151  Bool_t drawProgressBar = kTRUE;
152 #endif
153  DeclareOptionRef( fVerbose, "V", "Verbose flag" );
154  DeclareOptionRef( color, "Color", "Flag for coloured screen output (default: True, if in batch mode: False)" );
155  DeclareOptionRef( fTransformations, "Transformations", "List of transformations to test; formatting example: \"Transformations=I;D;P;U;G,D\", for identity, decorrelation, PCA, Uniform and Gaussianisation followed by decorrelation transformations" );
156  DeclareOptionRef( fCorrelations, "Correlations", "boolean to show correlation in output" );
157  DeclareOptionRef( fROC, "ROC", "boolean to show ROC in output" );
158  DeclareOptionRef( silent, "Silent", "Batch mode: boolean silent flag inhibiting any output from TMVA after the creation of the factory class object (default: False)" );
159  DeclareOptionRef( drawProgressBar,
160  "DrawProgressBar", "Draw progress bar to display training, testing and evaluation schedule (default: True)" );
162  "ModelPersistence",
163  "Option to save the trained model in xml file or using serialization");
164 
165  TString analysisType("Auto");
166  DeclareOptionRef( analysisType,
167  "AnalysisType", "Set the analysis type (Classification, Regression, Multiclass, Auto) (default: Auto)" );
168  AddPreDefVal(TString("Classification"));
169  AddPreDefVal(TString("Regression"));
170  AddPreDefVal(TString("Multiclass"));
171  AddPreDefVal(TString("Auto"));
172 
173  ParseOptions();
175 
176  if (Verbose()) Log().SetMinType( kVERBOSE );
177 
178  // global settings
179  gConfig().SetUseColor( color );
180  gConfig().SetSilent( silent );
181  gConfig().SetDrawProgressBar( drawProgressBar );
182 
183  analysisType.ToLower();
184  if ( analysisType == "classification" ) fAnalysisType = Types::kClassification;
185  else if( analysisType == "regression" ) fAnalysisType = Types::kRegression;
186  else if( analysisType == "multiclass" ) fAnalysisType = Types::kMulticlass;
187  else if( analysisType == "auto" ) fAnalysisType = Types::kNoAnalysisType;
188 
189 // Greetings();
190 }
191 
192 ////////////////////////////////////////////////////////////////////////////////
193 /// Constructor.
194 
196 : Configurable ( theOption ),
197  fTransformations ( "I" ),
198  fVerbose ( kFALSE ),
199  fCorrelations ( kFALSE ),
200  fROC ( kTRUE ),
201  fSilentFile ( kTRUE ),
202  fJobName ( jobName ),
203  fAnalysisType ( Types::kClassification ),
205 {
206  fgTargetFile = 0;
208 
209 
210  // render silent
211  if (gTools().CheckForSilentOption( GetOptions() )) Log().InhibitOutput(); // make sure is silent if wanted to
212 
213 
214  // init configurable
215  SetConfigDescription( "Configuration options for Factory running" );
216  SetConfigName( GetName() );
217 
218  // histograms are not automatically associated with the current
219  // directory and hence don't go out of scope when closing the file
221  Bool_t silent = kFALSE;
222 #ifdef WIN32
223  // under Windows, switch progress bar and color off by default, as the typical windows shell doesn't handle these (would need different sequences..)
224  Bool_t color = kFALSE;
225  Bool_t drawProgressBar = kFALSE;
226 #else
227  Bool_t color = !gROOT->IsBatch();
228  Bool_t drawProgressBar = kTRUE;
229 #endif
230  DeclareOptionRef( fVerbose, "V", "Verbose flag" );
231  DeclareOptionRef( color, "Color", "Flag for coloured screen output (default: True, if in batch mode: False)" );
232  DeclareOptionRef( fTransformations, "Transformations", "List of transformations to test; formatting example: \"Transformations=I;D;P;U;G,D\", for identity, decorrelation, PCA, Uniform and Gaussianisation followed by decorrelation transformations" );
233  DeclareOptionRef( fCorrelations, "Correlations", "boolean to show correlation in output" );
234  DeclareOptionRef( fROC, "ROC", "boolean to show ROC in output" );
235  DeclareOptionRef( silent, "Silent", "Batch mode: boolean silent flag inhibiting any output from TMVA after the creation of the factory class object (default: False)" );
236  DeclareOptionRef( drawProgressBar,
237  "DrawProgressBar", "Draw progress bar to display training, testing and evaluation schedule (default: True)" );
239  "ModelPersistence",
240  "Option to save the trained model in xml file or using serialization");
241 
242  TString analysisType("Auto");
243  DeclareOptionRef( analysisType,
244  "AnalysisType", "Set the analysis type (Classification, Regression, Multiclass, Auto) (default: Auto)" );
245  AddPreDefVal(TString("Classification"));
246  AddPreDefVal(TString("Regression"));
247  AddPreDefVal(TString("Multiclass"));
248  AddPreDefVal(TString("Auto"));
249 
250  ParseOptions();
252 
253  if (Verbose()) Log().SetMinType( kVERBOSE );
254 
255  // global settings
256  gConfig().SetUseColor( color );
257  gConfig().SetSilent( silent );
258  gConfig().SetDrawProgressBar( drawProgressBar );
259 
260  analysisType.ToLower();
261  if ( analysisType == "classification" ) fAnalysisType = Types::kClassification;
262  else if( analysisType == "regression" ) fAnalysisType = Types::kRegression;
263  else if( analysisType == "multiclass" ) fAnalysisType = Types::kMulticlass;
264  else if( analysisType == "auto" ) fAnalysisType = Types::kNoAnalysisType;
265 
266  Greetings();
267 }
268 
269 ////////////////////////////////////////////////////////////////////////////////
270 /// Print welcome message.
271 /// Options are: kLogoWelcomeMsg, kIsometricWelcomeMsg, kLeanWelcomeMsg
272 
274 {
276  gTools().TMVAWelcomeMessage( Log(), gTools().kLogoWelcomeMsg );
277  gTools().TMVAVersionMessage( Log() ); Log() << Endl;
278 }
279 
280 ////////////////////////////////////////////////////////////////////////////////
281 
283 {
284  return fSilentFile;
285 }
286 
287 ////////////////////////////////////////////////////////////////////////////////
288 
290 {
291  return fModelPersistence;
292 }
293 
294 ////////////////////////////////////////////////////////////////////////////////
295 /// Destructor.
296 
298 {
299  std::vector<TMVA::VariableTransformBase*>::iterator trfIt = fDefaultTrfs.begin();
300  for (;trfIt != fDefaultTrfs.end(); trfIt++) delete (*trfIt);
301 
302  this->DeleteAllMethods();
303 
304 
305  // problem with call of REGISTER_METHOD macro ...
306  // ClassifierFactory::DestroyInstance();
307  // Types::DestroyInstance();
308  //Tools::DestroyInstance();
309  //Config::DestroyInstance();
310 }
311 
312 ////////////////////////////////////////////////////////////////////////////////
313 /// Delete methods.
314 
316 {
317  std::map<TString,MVector*>::iterator itrMap;
318 
319  for(itrMap = fMethodsMap.begin();itrMap != fMethodsMap.end();itrMap++)
320  {
321  MVector *methods=itrMap->second;
322  // delete methods
323  MVector::iterator itrMethod = methods->begin();
324  for (; itrMethod != methods->end(); itrMethod++) {
325  Log() << kDEBUG << "Delete method: " << (*itrMethod)->GetName() << Endl;
326  delete (*itrMethod);
327  }
328  methods->clear();
329  delete methods;
330  }
331 }
332 
333 ////////////////////////////////////////////////////////////////////////////////
334 
336 {
337  fVerbose = v;
338 }
339 
340 ////////////////////////////////////////////////////////////////////////////////
341 /// Book a classifier or regression method.
342 
343 TMVA::MethodBase* TMVA::Factory::BookMethod( TMVA::DataLoader *loader, TString theMethodName, TString methodTitle, TString theOption )
344 {
345  if(fModelPersistence) gSystem->MakeDirectory(loader->GetName());//creating directory for DataLoader output
346 
347  TString datasetname=loader->GetName();
348 
350  if( loader->DefaultDataSetInfo().GetNClasses()==2
351  && loader->DefaultDataSetInfo().GetClassInfo("Signal") != NULL
352  && loader->DefaultDataSetInfo().GetClassInfo("Background") != NULL
353  ){
354  fAnalysisType = Types::kClassification; // default is classification
355  } else if( loader->DefaultDataSetInfo().GetNClasses() >= 2 ){
356  fAnalysisType = Types::kMulticlass; // if two classes, but not named "Signal" and "Background"
357  } else
358  Log() << kFATAL << "No analysis type for " << loader->DefaultDataSetInfo().GetNClasses() << " classes and "
359  << loader->DefaultDataSetInfo().GetNTargets() << " regression targets." << Endl;
360  }
361 
362  // booking via name; the names are translated into enums and the
363  // corresponding overloaded BookMethod is called
364 
365  if(fMethodsMap.find(datasetname)!=fMethodsMap.end())
366  {
367  if (GetMethod( datasetname,methodTitle ) != 0) {
368  Log() << kFATAL << "Booking failed since method with title <"
369  << methodTitle <<"> already exists "<< "in with DataSet Name <"<< loader->GetName()<<"> "
370  << Endl;
371  }
372  }
373 
374 
375  Log() << kHEADER << "Booking method: " << gTools().Color("bold") << methodTitle
376  // << gTools().Color("reset")<<" DataSet Name: "<<gTools().Color("bold")<<loader->GetName()
377  << gTools().Color("reset") << Endl << Endl;
378 
379  // interpret option string with respect to a request for boosting (i.e., BostNum > 0)
380  Int_t boostNum = 0;
381  TMVA::Configurable* conf = new TMVA::Configurable( theOption );
382  conf->DeclareOptionRef( boostNum = 0, "Boost_num",
383  "Number of times the classifier will be boosted" );
384  conf->ParseOptions();
385  delete conf;
386  TString fFileDir;
388  {
389  fFileDir=loader->GetName();
390  fFileDir+="/"+gConfig().GetIONames().fWeightFileDir;
391  }
392  // initialize methods
393  IMethod* im;
394  if (!boostNum) {
395  im = ClassifierFactory::Instance().Create(theMethodName.Data(), fJobName, methodTitle,
396  loader->DefaultDataSetInfo(), theOption);
397  }
398  else {
399  // boosted classifier, requires a specific definition, making it transparent for the user
400  Log() << kDEBUG <<"Boost Number is " << boostNum << " > 0: train boosted classifier" << Endl;
401  im = ClassifierFactory::Instance().Create("Boost", fJobName, methodTitle, loader->DefaultDataSetInfo(), theOption);
402  MethodBoost *methBoost = dynamic_cast<MethodBoost *>(im); // DSMTEST divided into two lines
403  if (!methBoost) // DSMTEST
404  Log() << kFATAL << "Method with type kBoost cannot be casted to MethodCategory. /Factory" << Endl; // DSMTEST
405 
406  if (fModelPersistence)
407  methBoost->SetWeightFileDir(fFileDir);
409  methBoost->SetBoostedMethodName(theMethodName); // DSMTEST divided into two lines
410  methBoost->fDataSetManager = loader->fDataSetManager; // DSMTEST
411  methBoost->SetFile(fgTargetFile);
412  methBoost->SetSilentFile(IsSilentFile());
413  }
414 
415  MethodBase *method = dynamic_cast<MethodBase*>(im);
416  if (method==0) return 0; // could not create method
417 
418  // set fDataSetManager if MethodCategory (to enable Category to create datasetinfo objects) // DSMTEST
419  if (method->GetMethodType() == Types::kCategory) { // DSMTEST
420  MethodCategory *methCat = (dynamic_cast<MethodCategory*>(im)); // DSMTEST
421  if (!methCat) // DSMTEST
422  Log() << kFATAL << "Method with type kCategory cannot be casted to MethodCategory. /Factory" << Endl; // DSMTEST
423 
424  if(fModelPersistence) methCat->SetWeightFileDir(fFileDir);
426  methCat->fDataSetManager = loader->fDataSetManager; // DSMTEST
427  methCat->SetFile(fgTargetFile);
428  methCat->SetSilentFile(IsSilentFile());
429  } // DSMTEST
430 
431 
432  if (!method->HasAnalysisType( fAnalysisType,
433  loader->DefaultDataSetInfo().GetNClasses(),
434  loader->DefaultDataSetInfo().GetNTargets() )) {
435  Log() << kWARNING << "Method " << method->GetMethodTypeName() << " is not capable of handling " ;
437  Log() << "regression with " << loader->DefaultDataSetInfo().GetNTargets() << " targets." << Endl;
438  }
439  else if (fAnalysisType == Types::kMulticlass ) {
440  Log() << "multiclass classification with " << loader->DefaultDataSetInfo().GetNClasses() << " classes." << Endl;
441  }
442  else {
443  Log() << "classification with " << loader->DefaultDataSetInfo().GetNClasses() << " classes." << Endl;
444  }
445  return 0;
446  }
447 
448  if(fModelPersistence) method->SetWeightFileDir(fFileDir);
450  method->SetAnalysisType( fAnalysisType );
451  method->SetupMethod();
452  method->ParseOptions();
453  method->ProcessSetup();
454  method->SetFile(fgTargetFile);
455  method->SetSilentFile(IsSilentFile());
456 
457  // check-for-unused-options is performed; may be overridden by derived classes
458  method->CheckSetup();
459 
460  if(fMethodsMap.find(datasetname)==fMethodsMap.end())
461  {
462  MVector *mvector=new MVector;
463  fMethodsMap[datasetname]=mvector;
464  }
465  fMethodsMap[datasetname]->push_back( method );
466  return method;
467 }
468 
469 ////////////////////////////////////////////////////////////////////////////////
470 /// Books MVA method. The option configuration string is custom for each MVA
471 /// the TString field "theNameAppendix" serves to define (and distinguish)
472 /// several instances of a given MVA, eg, when one wants to compare the
473 /// performance of various configurations
474 
476 {
477  return BookMethod(loader, Types::Instance().GetMethodName( theMethod ), methodTitle, theOption );
478 }
479 
480 ////////////////////////////////////////////////////////////////////////////////
481 /// Returns pointer to MVA that corresponds to given method title.
482 
483 TMVA::IMethod* TMVA::Factory::GetMethod(const TString& datasetname, const TString &methodTitle ) const
484 {
485  if(fMethodsMap.find(datasetname)==fMethodsMap.end()) return 0;
486 
487  MVector *methods=fMethodsMap.find(datasetname)->second;
488 
489  MVector::const_iterator itrMethod;
490  //
491  for (itrMethod = methods->begin(); itrMethod != methods->end(); itrMethod++) {
492  MethodBase* mva = dynamic_cast<MethodBase*>(*itrMethod);
493  if ( (mva->GetMethodName())==methodTitle ) return mva;
494  }
495  return 0;
496 }
497 
498 ////////////////////////////////////////////////////////////////////////////////
499 /// Checks whether a given method name is defined for a given dataset.
500 
501 Bool_t TMVA::Factory::HasMethod(const TString& datasetname, const TString &methodTitle ) const
502 {
503  if(fMethodsMap.find(datasetname)==fMethodsMap.end()) return 0;
504 
505  std::string methodName = methodTitle.Data();
506  auto isEqualToMethodName = [&methodName](TMVA::IMethod * m) {
507  return ( 0 == methodName.compare( m->GetName() ) );
508  };
509 
510  TMVA::Factory::MVector * methods = this->fMethodsMap.at(datasetname);
511  Bool_t isMethodNameExisting = std::any_of( methods->begin(), methods->end(), isEqualToMethodName);
512 
513  return isMethodNameExisting;
514 }
515 
516 ////////////////////////////////////////////////////////////////////////////////
517 
519 {
520  RootBaseDir()->cd();
521 
522  if(!RootBaseDir()->GetDirectory(fDataSetInfo.GetName())) RootBaseDir()->mkdir(fDataSetInfo.GetName());
523  else return; //loader is now in the output file, we dont need to save again
524 
525  RootBaseDir()->cd(fDataSetInfo.GetName());
526  fDataSetInfo.GetDataSet(); // builds dataset (including calculation of correlation matrix)
527 
528 
529  // correlation matrix of the default DS
530  const TMatrixD* m(0);
531  const TH2* h(0);
532 
534  for (UInt_t cls = 0; cls < fDataSetInfo.GetNClasses() ; cls++) {
535  m = fDataSetInfo.CorrelationMatrix(fDataSetInfo.GetClassInfo(cls)->GetName());
536  h = fDataSetInfo.CreateCorrelationMatrixHist(m, TString("CorrelationMatrix")+fDataSetInfo.GetClassInfo(cls)->GetName(),
537  TString("Correlation Matrix (")+ fDataSetInfo.GetClassInfo(cls)->GetName() +TString(")"));
538  if (h!=0) {
539  h->Write();
540  delete h;
541  }
542  }
543  }
544  else{
545  m = fDataSetInfo.CorrelationMatrix( "Signal" );
546  h = fDataSetInfo.CreateCorrelationMatrixHist(m, "CorrelationMatrixS", "Correlation Matrix (signal)");
547  if (h!=0) {
548  h->Write();
549  delete h;
550  }
551 
552  m = fDataSetInfo.CorrelationMatrix( "Background" );
553  h = fDataSetInfo.CreateCorrelationMatrixHist(m, "CorrelationMatrixB", "Correlation Matrix (background)");
554  if (h!=0) {
555  h->Write();
556  delete h;
557  }
558 
559  m = fDataSetInfo.CorrelationMatrix( "Regression" );
560  h = fDataSetInfo.CreateCorrelationMatrixHist(m, "CorrelationMatrix", "Correlation Matrix");
561  if (h!=0) {
562  h->Write();
563  delete h;
564  }
565  }
566 
567  // some default transformations to evaluate
568  // NOTE: all transformations are destroyed after this test
569  TString processTrfs = "I"; //"I;N;D;P;U;G,D;"
570 
571  // plus some user defined transformations
572  processTrfs = fTransformations;
573 
574  // remove any trace of identity transform - if given (avoid to apply it twice)
575  std::vector<TMVA::TransformationHandler*> trfs;
576  TransformationHandler* identityTrHandler = 0;
577 
578  std::vector<TString> trfsDef = gTools().SplitString(processTrfs,';');
579  std::vector<TString>::iterator trfsDefIt = trfsDef.begin();
580  for (; trfsDefIt!=trfsDef.end(); trfsDefIt++) {
581  trfs.push_back(new TMVA::TransformationHandler(fDataSetInfo, "Factory"));
582  TString trfS = (*trfsDefIt);
583 
584  //Log() << kINFO << Endl;
585  Log() << kDEBUG << "current transformation string: '" << trfS.Data() << "'" << Endl;
587  fDataSetInfo,
588  *(trfs.back()),
589  Log() );
590 
591  if (trfS.BeginsWith('I')) identityTrHandler = trfs.back();
592  }
593 
594  const std::vector<Event*>& inputEvents = fDataSetInfo.GetDataSet()->GetEventCollection();
595 
596  // apply all transformations
597  std::vector<TMVA::TransformationHandler*>::iterator trfIt = trfs.begin();
598 
599  for (;trfIt != trfs.end(); trfIt++) {
600  // setting a Root dir causes the variables distributions to be saved to the root file
601  (*trfIt)->SetRootDir(RootBaseDir()->GetDirectory(fDataSetInfo.GetName()));// every dataloader have its own dir
602  (*trfIt)->CalcTransformations(inputEvents);
603  }
604  if(identityTrHandler) identityTrHandler->PrintVariableRanking();
605 
606  // clean up
607  for (trfIt = trfs.begin(); trfIt != trfs.end(); trfIt++) delete *trfIt;
608 }
609 
610 ////////////////////////////////////////////////////////////////////////////////
611 /// Iterates through all booked methods and sees if they use parameter tuning and if so..
612 /// does just that i.e. calls "Method::Train()" for different parameter settings and
613 /// keeps in mind the "optimal one"... and that's the one that will later on be used
614 /// in the main training loop.
615 
616 std::map<TString,Double_t> TMVA::Factory::OptimizeAllMethods(TString fomType, TString fitType)
617 {
618 
619  std::map<TString,MVector*>::iterator itrMap;
620  std::map<TString,Double_t> TunedParameters;
621  for(itrMap = fMethodsMap.begin();itrMap != fMethodsMap.end();itrMap++)
622  {
623  MVector *methods=itrMap->second;
624 
625  MVector::iterator itrMethod;
626 
627  // iterate over methods and optimize
628  for( itrMethod = methods->begin(); itrMethod != methods->end(); itrMethod++ ) {
630  MethodBase* mva = dynamic_cast<MethodBase*>(*itrMethod);
631  if (!mva) {
632  Log() << kFATAL << "Dynamic cast to MethodBase failed" <<Endl;
633  return TunedParameters;
634  }
635 
636  if (mva->Data()->GetNTrainingEvents() < MinNoTrainingEvents) {
637  Log() << kWARNING << "Method " << mva->GetMethodName()
638  << " not trained (training tree has less entries ["
639  << mva->Data()->GetNTrainingEvents()
640  << "] than required [" << MinNoTrainingEvents << "]" << Endl;
641  continue;
642  }
643 
644  Log() << kINFO << "Optimize method: " << mva->GetMethodName() << " for "
645  << (fAnalysisType == Types::kRegression ? "Regression" :
646  (fAnalysisType == Types::kMulticlass ? "Multiclass classification" : "Classification")) << Endl;
647 
648  TunedParameters = mva->OptimizeTuningParameters(fomType,fitType);
649  Log() << kINFO << "Optimization of tuning parameters finished for Method:"<<mva->GetName() << Endl;
650  }
651  }
652 
653  return TunedParameters;
654 
655 }
656 
657 ////////////////////////////////////////////////////////////////////////////////
658 /// Private method to generate a ROCCurve instance for a given method.
659 /// Handles the conversion from TMVA ResultSet to a format the ROCCurve class
660 /// understands.
661 ///
662 /// \note You own the retured pointer.
663 ///
664 
667 {
668  return GetROC((TString)loader->GetName(), theMethodName, iClass, type);
669 }
670 
671 ////////////////////////////////////////////////////////////////////////////////
672 /// Private method to generate a ROCCurve instance for a given method.
673 /// Handles the conversion from TMVA ResultSet to a format the ROCCurve class
674 /// understands.
675 ///
676 /// \note You own the retured pointer.
677 ///
678 
680 {
681  if (fMethodsMap.find(datasetname) == fMethodsMap.end()) {
682  Log() << kERROR << Form("DataSet = %s not found in methods map.", datasetname.Data()) << Endl;
683  return nullptr;
684  }
685 
686  if (!this->HasMethod(datasetname, theMethodName)) {
687  Log() << kERROR << Form("Method = %s not found with Dataset = %s ", theMethodName.Data(), datasetname.Data())
688  << Endl;
689  return nullptr;
690  }
691 
692  std::set<Types::EAnalysisType> allowedAnalysisTypes = {Types::kClassification, Types::kMulticlass};
693  if (allowedAnalysisTypes.count(this->fAnalysisType) == 0) {
694  Log() << kERROR << Form("Can only generate ROC curves for analysis type kClassification and kMulticlass.")
695  << Endl;
696  return nullptr;
697  }
698 
699  TMVA::MethodBase *method = dynamic_cast<TMVA::MethodBase *>(this->GetMethod(datasetname, theMethodName));
700  TMVA::DataSet *dataset = method->Data();
701  dataset->SetCurrentType(type);
702  TMVA::Results *results = dataset->GetResults(theMethodName, type, this->fAnalysisType);
703 
704  UInt_t nClasses = method->DataInfo().GetNClasses();
705  if (this->fAnalysisType == Types::kMulticlass && iClass >= nClasses) {
706  Log() << kERROR << Form("Given class number (iClass = %i) does not exist. There are %i classes in dataset.",
707  iClass, nClasses)
708  << Endl;
709  return nullptr;
710  }
711 
712  TMVA::ROCCurve *rocCurve = nullptr;
713  if (this->fAnalysisType == Types::kClassification) {
714 
715  std::vector<Float_t> *mvaRes = dynamic_cast<ResultsClassification *>(results)->GetValueVector();
716  std::vector<Bool_t> *mvaResTypes = dynamic_cast<ResultsClassification *>(results)->GetValueVectorTypes();
717  std::vector<Float_t> mvaResWeights;
718 
719  auto eventCollection = dataset->GetEventCollection(type);
720  mvaResWeights.reserve(eventCollection.size());
721  for (auto ev : eventCollection) {
722  mvaResWeights.push_back(ev->GetWeight());
723  }
724 
725  rocCurve = new TMVA::ROCCurve(*mvaRes, *mvaResTypes, mvaResWeights);
726 
727  } else if (this->fAnalysisType == Types::kMulticlass) {
728  std::vector<Float_t> mvaRes;
729  std::vector<Bool_t> mvaResTypes;
730  std::vector<Float_t> mvaResWeights;
731 
732  std::vector<std::vector<Float_t>> *rawMvaRes = dynamic_cast<ResultsMulticlass *>(results)->GetValueVector();
733 
734  // Vector transpose due to values being stored as
735  // [ [0, 1, 2], [0, 1, 2], ... ]
736  // in ResultsMulticlass::GetValueVector.
737  mvaRes.reserve(rawMvaRes->size());
738  for (auto item : *rawMvaRes) {
739  mvaRes.push_back(item[iClass]);
740  }
741 
742  auto eventCollection = dataset->GetEventCollection(type);
743  mvaResTypes.reserve(eventCollection.size());
744  mvaResWeights.reserve(eventCollection.size());
745  for (auto ev : eventCollection) {
746  mvaResTypes.push_back(ev->GetClass() == iClass);
747  mvaResWeights.push_back(ev->GetWeight());
748  }
749 
750  rocCurve = new TMVA::ROCCurve(mvaRes, mvaResTypes, mvaResWeights);
751  }
752 
753  return rocCurve;
754 }
755 
756 ////////////////////////////////////////////////////////////////////////////////
757 /// Calculate the integral of the ROC curve, also known as the area under curve
758 /// (AUC), for a given method.
759 ///
760 /// Argument iClass specifies the class to generate the ROC curve in a
761 /// multiclass setting. It is ignored for binary classification.
762 ///
763 
765 {
766  return GetROCIntegral((TString)loader->GetName(), theMethodName, iClass);
767 }
768 
769 ////////////////////////////////////////////////////////////////////////////////
770 /// Calculate the integral of the ROC curve, also known as the area under curve
771 /// (AUC), for a given method.
772 ///
773 /// Argument iClass specifies the class to generate the ROC curve in a
774 /// multiclass setting. It is ignored for binary classification.
775 ///
776 
777 Double_t TMVA::Factory::GetROCIntegral(TString datasetname, TString theMethodName, UInt_t iClass)
778 {
779  if (fMethodsMap.find(datasetname) == fMethodsMap.end()) {
780  Log() << kERROR << Form("DataSet = %s not found in methods map.", datasetname.Data()) << Endl;
781  return 0;
782  }
783 
784  if ( ! this->HasMethod(datasetname, theMethodName) ) {
785  Log() << kERROR << Form("Method = %s not found with Dataset = %s ", theMethodName.Data(), datasetname.Data()) << Endl;
786  return 0;
787  }
788 
789  std::set<Types::EAnalysisType> allowedAnalysisTypes = {Types::kClassification, Types::kMulticlass};
790  if ( allowedAnalysisTypes.count(this->fAnalysisType) == 0 ) {
791  Log() << kERROR << Form("Can only generate ROC integral for analysis type kClassification. and kMulticlass.")
792  << Endl;
793  return 0;
794  }
795 
796  TMVA::ROCCurve *rocCurve = GetROC(datasetname, theMethodName, iClass);
797  if (!rocCurve) {
798  Log() << kFATAL << Form("ROCCurve object was not created in Method = %s not found with Dataset = %s ",
799  theMethodName.Data(), datasetname.Data())
800  << Endl;
801  return 0;
802  }
803 
805  Double_t rocIntegral = rocCurve->GetROCIntegral(npoints);
806  delete rocCurve;
807 
808  return rocIntegral;
809 }
810 
811 ////////////////////////////////////////////////////////////////////////////////
812 /// Argument iClass specifies the class to generate the ROC curve in a
813 /// multiclass setting. It is ignored for binary classification.
814 ///
815 /// Returns a ROC graph for a given method, or nullptr on error.
816 ///
817 /// Note: Evaluation of the given method must have been run prior to ROC
818 /// generation through Factory::EvaluateAllMetods.
819 ///
820 /// NOTE: The ROC curve is 1 vs. all where the given class is considered signal
821 /// and the others considered background. This is ok in binary classification
822 /// but in in multi class classification, the ROC surface is an N dimensional
823 /// shape, where N is number of classes - 1.
824 
825 TGraph* TMVA::Factory::GetROCCurve(DataLoader *loader, TString theMethodName, Bool_t setTitles, UInt_t iClass)
826 {
827  return GetROCCurve( (TString)loader->GetName(), theMethodName, setTitles, iClass );
828 }
829 
830 ////////////////////////////////////////////////////////////////////////////////
831 /// Argument iClass specifies the class to generate the ROC curve in a
832 /// multiclass setting. It is ignored for binary classification.
833 ///
834 /// Returns a ROC graph for a given method, or nullptr on error.
835 ///
836 /// Note: Evaluation of the given method must have been run prior to ROC
837 /// generation through Factory::EvaluateAllMetods.
838 ///
839 /// NOTE: The ROC curve is 1 vs. all where the given class is considered signal
840 /// and the others considered background. This is ok in binary classification
841 /// but in in multi class classification, the ROC surface is an N dimensional
842 /// shape, where N is number of classes - 1.
843 
844 TGraph* TMVA::Factory::GetROCCurve(TString datasetname, TString theMethodName, Bool_t setTitles, UInt_t iClass)
845 {
846  if (fMethodsMap.find(datasetname) == fMethodsMap.end()) {
847  Log() << kERROR << Form("DataSet = %s not found in methods map.", datasetname.Data()) << Endl;
848  return nullptr;
849  }
850 
851  if ( ! this->HasMethod(datasetname, theMethodName) ) {
852  Log() << kERROR << Form("Method = %s not found with Dataset = %s ", theMethodName.Data(), datasetname.Data()) << Endl;
853  return nullptr;
854  }
855 
856  std::set<Types::EAnalysisType> allowedAnalysisTypes = {Types::kClassification, Types::kMulticlass};
857  if ( allowedAnalysisTypes.count(this->fAnalysisType) == 0 ) {
858  Log() << kERROR << Form("Can only generate ROC curves for analysis type kClassification and kMulticlass.") << Endl;
859  return nullptr;
860  }
861 
862  TMVA::ROCCurve *rocCurve = GetROC(datasetname, theMethodName, iClass);
863  TGraph *graph = nullptr;
864 
865  if ( ! rocCurve ) {
866  Log() << kFATAL << Form("ROCCurve object was not created in Method = %s not found with Dataset = %s ", theMethodName.Data(), datasetname.Data()) << Endl;
867  return nullptr;
868  }
869 
870  graph = (TGraph *)rocCurve->GetROCCurve()->Clone();
871  delete rocCurve;
872 
873  if(setTitles) {
874  graph->GetYaxis()->SetTitle("Background rejection (Specificity)");
875  graph->GetXaxis()->SetTitle("Signal efficiency (Sensitivity)");
876  graph->SetTitle(Form("Signal efficiency vs. Background rejection (%s)", theMethodName.Data()));
877  }
878 
879  return graph;
880 }
881 
882 ////////////////////////////////////////////////////////////////////////////////
883 /// Generate a collection of graphs, for all methods for a given class. Suitable
884 /// for comparing method performance.
885 ///
886 /// Argument iClass specifies the class to generate the ROC curve in a
887 /// multiclass setting. It is ignored for binary classification.
888 ///
889 /// NOTE: The ROC curve is 1 vs. all where the given class is considered signal
890 /// and the others considered background. This is ok in binary classification
891 /// but in in multi class classification, the ROC surface is an N dimensional
892 /// shape, where N is number of classes - 1.
893 
895 {
896  return GetROCCurveAsMultiGraph((TString)loader->GetName(), iClass);
897 }
898 
899 ////////////////////////////////////////////////////////////////////////////////
900 /// Generate a collection of graphs, for all methods for a given class. Suitable
901 /// for comparing method performance.
902 ///
903 /// Argument iClass specifies the class to generate the ROC curve in a
904 /// multiclass setting. It is ignored for binary classification.
905 ///
906 /// NOTE: The ROC curve is 1 vs. all where the given class is considered signal
907 /// and the others considered background. This is ok in binary classification
908 /// but in in multi class classification, the ROC surface is an N dimensional
909 /// shape, where N is number of classes - 1.
910 
912 {
913  UInt_t line_color = 1;
914 
915  TMultiGraph *multigraph = new TMultiGraph();
916 
917  MVector *methods = fMethodsMap[datasetname.Data()];
918  for (auto * method_raw : *methods) {
919  TMVA::MethodBase *method = dynamic_cast<TMVA::MethodBase *>(method_raw);
920  if (method == nullptr) { continue; }
921 
922  TString methodName = method->GetMethodName();
923  UInt_t nClasses = method->DataInfo().GetNClasses();
924 
925  if ( this->fAnalysisType == Types::kMulticlass && iClass >= nClasses ) {
926  Log() << kERROR << Form("Given class number (iClass = %i) does not exist. There are %i classes in dataset.", iClass, nClasses) << Endl;
927  continue;
928  }
929 
930  TString className = method->DataInfo().GetClassInfo(iClass)->GetName();
931 
932  TGraph *graph = this->GetROCCurve(datasetname, methodName, false, iClass);
933  graph->SetTitle(methodName);
934 
935  graph->SetLineWidth(2);
936  graph->SetLineColor(line_color++);
937  graph->SetFillColor(10);
938 
939  multigraph->Add(graph);
940  }
941 
942  if ( multigraph->GetListOfGraphs() == nullptr ) {
943  Log() << kERROR << Form("No metohds have class %i defined.", iClass) << Endl;
944  return nullptr;
945  }
946 
947  return multigraph;
948 }
949 
950 ////////////////////////////////////////////////////////////////////////////////
951 /// Draws ROC curves for all methods booked with the factory for a given class
952 /// onto a canvas.
953 ///
954 /// Argument iClass specifies the class to generate the ROC curve in a
955 /// multiclass setting. It is ignored for binary classification.
956 ///
957 /// NOTE: The ROC curve is 1 vs. all where the given class is considered signal
958 /// and the others considered background. This is ok in binary classification
959 /// but in in multi class classification, the ROC surface is an N dimensional
960 /// shape, where N is number of classes - 1.
961 
963 {
964  return GetROCCurve((TString)loader->GetName(), iClass);
965 }
966 
967 ////////////////////////////////////////////////////////////////////////////////
968 /// Draws ROC curves for all methods booked with the factory for a given class.
969 ///
970 /// Argument iClass specifies the class to generate the ROC curve in a
971 /// multiclass setting. It is ignored for binary classification.
972 ///
973 /// NOTE: The ROC curve is 1 vs. all where the given class is considered signal
974 /// and the others considered background. This is ok in binary classification
975 /// but in in multi class classification, the ROC surface is an N dimensional
976 /// shape, where N is number of classes - 1.
977 
979 {
980  if (fMethodsMap.find(datasetname) == fMethodsMap.end()) {
981  Log() << kERROR << Form("DataSet = %s not found in methods map.", datasetname.Data()) << Endl;
982  return 0;
983  }
984 
985  TString name = Form("ROCCurve %s class %i", datasetname.Data(), iClass);
986  TCanvas *canvas = new TCanvas(name, "ROC Curve", 200, 10, 700, 500);
987  canvas->SetGrid();
988 
989  TMultiGraph *multigraph = this->GetROCCurveAsMultiGraph(datasetname, iClass);
990 
991  if ( multigraph ) {
992  multigraph->Draw("AL");
993 
994  multigraph->GetYaxis()->SetTitle("Background rejection (Specificity)");
995  multigraph->GetXaxis()->SetTitle("Signal efficiency (Sensitivity)");
996 
997  TString titleString = Form("Signal efficiency vs. Background rejection");
998  if (this->fAnalysisType == Types::kMulticlass) {
999  titleString = Form("%s (Class=%i)", titleString.Data(), iClass);
1000  }
1001 
1002  // Workaround for TMultigraph not drawing title correctly.
1003  multigraph->GetHistogram()->SetTitle( titleString );
1004  multigraph->SetTitle( titleString );
1005 
1006  canvas->BuildLegend(0.15, 0.15, 0.35, 0.3, "MVA Method");
1007  }
1008 
1009  return canvas;
1010 }
1011 
1012 ////////////////////////////////////////////////////////////////////////////////
1013 /// Iterates through all booked methods and calls training
1014 
1016 {
1017  Log() << kHEADER << gTools().Color("bold") << "Train all methods" << gTools().Color("reset") << Endl;
1018  // iterates over all MVAs that have been booked, and calls their training methods
1019 
1020 
1021  // don't do anything if no method booked
1022  if (fMethodsMap.empty()) {
1023  Log() << kINFO << "...nothing found to train" << Endl;
1024  return;
1025  }
1026 
1027  // here the training starts
1028  //Log() << kINFO << " " << Endl;
1029  Log() << kDEBUG << "Train all methods for "
1030  << (fAnalysisType == Types::kRegression ? "Regression" :
1031  (fAnalysisType == Types::kMulticlass ? "Multiclass" : "Classification") ) << " ..." << Endl;
1032 
1033  std::map<TString,MVector*>::iterator itrMap;
1034 
1035  for(itrMap = fMethodsMap.begin();itrMap != fMethodsMap.end();itrMap++)
1036  {
1037  MVector *methods=itrMap->second;
1038  MVector::iterator itrMethod;
1039 
1040  // iterate over methods and train
1041  for( itrMethod = methods->begin(); itrMethod != methods->end(); itrMethod++ ) {
1043  MethodBase* mva = dynamic_cast<MethodBase*>(*itrMethod);
1044 
1045  if(mva==0) continue;
1046 
1047  if(mva->DataInfo().GetDataSetManager()->DataInput().GetEntries() <=1) { // 0 entries --> 0 events, 1 entry --> dynamical dataset (or one entry)
1048  Log() << kFATAL << "No input data for the training provided!" << Endl;
1049  }
1050 
1051  if(fAnalysisType == Types::kRegression && mva->DataInfo().GetNTargets() < 1 )
1052  Log() << kFATAL << "You want to do regression training without specifying a target." << Endl;
1053  else if( (fAnalysisType == Types::kMulticlass || fAnalysisType == Types::kClassification)
1054  && mva->DataInfo().GetNClasses() < 2 )
1055  Log() << kFATAL << "You want to do classification training, but specified less than two classes." << Endl;
1056 
1057  // first print some information about the default dataset
1059 
1060 
1061  if (mva->Data()->GetNTrainingEvents() < MinNoTrainingEvents) {
1062  Log() << kWARNING << "Method " << mva->GetMethodName()
1063  << " not trained (training tree has less entries ["
1064  << mva->Data()->GetNTrainingEvents()
1065  << "] than required [" << MinNoTrainingEvents << "]" << Endl;
1066  continue;
1067  }
1068 
1069  Log() << kHEADER << "Train method: " << mva->GetMethodName() << " for "
1070  << (fAnalysisType == Types::kRegression ? "Regression" :
1071  (fAnalysisType == Types::kMulticlass ? "Multiclass classification" : "Classification")) << Endl << Endl;
1072  mva->TrainMethod();
1073  Log() << kHEADER << "Training finished" << Endl << Endl;
1074  }
1075 
1076  if (fAnalysisType != Types::kRegression) {
1077 
1078  // variable ranking
1079  //Log() << Endl;
1080  Log() << kINFO << "Ranking input variables (method specific)..." << Endl;
1081  for (itrMethod = methods->begin(); itrMethod != methods->end(); itrMethod++) {
1082  MethodBase* mva = dynamic_cast<MethodBase*>(*itrMethod);
1083  if (mva && mva->Data()->GetNTrainingEvents() >= MinNoTrainingEvents) {
1084 
1085  // create and print ranking
1086  const Ranking* ranking = (*itrMethod)->CreateRanking();
1087  if (ranking != 0) ranking->Print();
1088  else Log() << kINFO << "No variable ranking supplied by classifier: "
1089  << dynamic_cast<MethodBase*>(*itrMethod)->GetMethodName() << Endl;
1090  }
1091  }
1092  }
1093 
1094  // delete all methods and recreate them from weight file - this ensures that the application
1095  // of the methods (in TMVAClassificationApplication) is consistent with the results obtained
1096  // in the testing
1097  //Log() << Endl;
1098  if (fModelPersistence) {
1099 
1100  Log() << kHEADER << "=== Destroy and recreate all methods via weight files for testing ===" << Endl << Endl;
1101 
1102  if(!IsSilentFile())RootBaseDir()->cd();
1103 
1104  // iterate through all booked methods
1105  for (UInt_t i=0; i<methods->size(); i++) {
1106 
1107  MethodBase* m = dynamic_cast<MethodBase*>((*methods)[i]);
1108  if(m==0) continue;
1109 
1110  TMVA::Types::EMVA methodType = m->GetMethodType();
1111  TString weightfile = m->GetWeightFileName();
1112 
1113  // decide if .txt or .xml file should be read:
1114  if (READXML) weightfile.ReplaceAll(".txt",".xml");
1115 
1116  DataSetInfo& dataSetInfo = m->DataInfo();
1117  TString testvarName = m->GetTestvarName();
1118  delete m; //itrMethod[i];
1119 
1120  // recreate
1121  m = dynamic_cast<MethodBase *>(ClassifierFactory::Instance().Create(
1122  Types::Instance().GetMethodName(methodType).Data(), dataSetInfo, weightfile));
1123  if( m->GetMethodType() == Types::kCategory ){
1124  MethodCategory *methCat = (dynamic_cast<MethodCategory*>(m));
1125  if( !methCat ) Log() << kFATAL << "Method with type kCategory cannot be casted to MethodCategory. /Factory" << Endl;
1126  else methCat->fDataSetManager = m->DataInfo().GetDataSetManager();
1127  }
1128  //ToDo, Do we need to fill the DataSetManager of MethodBoost here too?
1129 
1130 
1131  TString fFileDir= m->DataInfo().GetName();
1132  fFileDir+="/"+gConfig().GetIONames().fWeightFileDir;
1133  m->SetWeightFileDir(fFileDir);
1136  m->SetAnalysisType(fAnalysisType);
1137  m->SetupMethod();
1138  m->ReadStateFromFile();
1139  m->SetTestvarName(testvarName);
1140 
1141  // replace trained method by newly created one (from weight file) in methods vector
1142  (*methods)[i] = m;
1143  }
1144  }
1145  }
1146 }
1147 
1148 ////////////////////////////////////////////////////////////////////////////////
1149 
1151 {
1152  Log() << kHEADER << gTools().Color("bold") << "Test all methods" << gTools().Color("reset") << Endl;
1153 
1154  // don't do anything if no method booked
1155  if (fMethodsMap.empty()) {
1156  Log() << kINFO << "...nothing found to test" << Endl;
1157  return;
1158  }
1159  std::map<TString,MVector*>::iterator itrMap;
1160 
1161  for(itrMap = fMethodsMap.begin();itrMap != fMethodsMap.end();itrMap++)
1162  {
1163  MVector *methods=itrMap->second;
1164  MVector::iterator itrMethod;
1165 
1166  // iterate over methods and test
1167  for( itrMethod = methods->begin(); itrMethod != methods->end(); itrMethod++ ) {
1169  MethodBase* mva = dynamic_cast<MethodBase*>(*itrMethod);
1170  if(mva==0) continue;
1171  Types::EAnalysisType analysisType = mva->GetAnalysisType();
1172  Log() << kHEADER << "Test method: " << mva->GetMethodName() << " for "
1173  << (analysisType == Types::kRegression ? "Regression" :
1174  (analysisType == Types::kMulticlass ? "Multiclass classification" : "Classification")) << " performance" << Endl << Endl;
1175  mva->AddOutput( Types::kTesting, analysisType );
1176  }
1177  }
1178 }
1179 
1180 ////////////////////////////////////////////////////////////////////////////////
1181 
1182 void TMVA::Factory::MakeClass(const TString& datasetname , const TString& methodTitle ) const
1183 {
1184  if (methodTitle != "") {
1185  IMethod* method = GetMethod(datasetname, methodTitle);
1186  if (method) method->MakeClass();
1187  else {
1188  Log() << kWARNING << "<MakeClass> Could not find classifier \"" << methodTitle
1189  << "\" in list" << Endl;
1190  }
1191  }
1192  else {
1193 
1194  // no classifier specified, print all help messages
1195  MVector *methods=fMethodsMap.find(datasetname)->second;
1196  MVector::const_iterator itrMethod;
1197  for (itrMethod = methods->begin(); itrMethod != methods->end(); itrMethod++) {
1198  MethodBase* method = dynamic_cast<MethodBase*>(*itrMethod);
1199  if(method==0) continue;
1200  Log() << kINFO << "Make response class for classifier: " << method->GetMethodName() << Endl;
1201  method->MakeClass();
1202  }
1203  }
1204 }
1205 
1206 ////////////////////////////////////////////////////////////////////////////////
1207 /// Print predefined help message of classifier.
1208 /// Iterate over methods and test.
1209 
1210 void TMVA::Factory::PrintHelpMessage(const TString& datasetname , const TString& methodTitle ) const
1211 {
1212  if (methodTitle != "") {
1213  IMethod* method = GetMethod(datasetname , methodTitle );
1214  if (method) method->PrintHelpMessage();
1215  else {
1216  Log() << kWARNING << "<PrintHelpMessage> Could not find classifier \"" << methodTitle
1217  << "\" in list" << Endl;
1218  }
1219  }
1220  else {
1221 
1222  // no classifier specified, print all help messages
1223  MVector *methods=fMethodsMap.find(datasetname)->second;
1224  MVector::const_iterator itrMethod ;
1225  for (itrMethod = methods->begin(); itrMethod != methods->end(); itrMethod++) {
1226  MethodBase* method = dynamic_cast<MethodBase*>(*itrMethod);
1227  if(method==0) continue;
1228  Log() << kINFO << "Print help message for classifier: " << method->GetMethodName() << Endl;
1229  method->PrintHelpMessage();
1230  }
1231  }
1232 }
1233 
1234 ////////////////////////////////////////////////////////////////////////////////
1235 /// Iterates over all MVA input variables and evaluates them.
1236 
1238 {
1239  Log() << kINFO << "Evaluating all variables..." << Endl;
1241 
1242  for (UInt_t i=0; i<loader->DefaultDataSetInfo().GetNVariables(); i++) {
1244  if (options.Contains("V")) s += ":V";
1245  this->BookMethod(loader, "Variable", s );
1246  }
1247 }
1248 
1249 ////////////////////////////////////////////////////////////////////////////////
1250 /// Iterates over all MVAs that have been booked, and calls their evaluation methods.
1251 
1253 {
1254  Log() << kHEADER << gTools().Color("bold") << "Evaluate all methods" << gTools().Color("reset") << Endl;
1255 
1256  // don't do anything if no method booked
1257  if (fMethodsMap.empty()) {
1258  Log() << kINFO << "...nothing found to evaluate" << Endl;
1259  return;
1260  }
1261  std::map<TString,MVector*>::iterator itrMap;
1262 
1263  for(itrMap = fMethodsMap.begin();itrMap != fMethodsMap.end();itrMap++)
1264  {
1265  MVector *methods=itrMap->second;
1266 
1267  // -----------------------------------------------------------------------
1268  // First part of evaluation process
1269  // --> compute efficiencies, and other separation estimators
1270  // -----------------------------------------------------------------------
1271 
1272  // although equal, we now want to separate the output for the variables
1273  // and the real methods
1274  Int_t isel; // will be 0 for a Method; 1 for a Variable
1275  Int_t nmeth_used[2] = {0,0}; // 0 Method; 1 Variable
1276 
1277  std::vector<std::vector<TString> > mname(2);
1278  std::vector<std::vector<Double_t> > sig(2), sep(2), roc(2);
1279  std::vector<std::vector<Double_t> > eff01(2), eff10(2), eff30(2), effArea(2);
1280  std::vector<std::vector<Double_t> > eff01err(2), eff10err(2), eff30err(2);
1281  std::vector<std::vector<Double_t> > trainEff01(2), trainEff10(2), trainEff30(2);
1282 
1283  std::vector<std::vector<Float_t> > multiclass_testEff;
1284  std::vector<std::vector<Float_t> > multiclass_trainEff;
1285  std::vector<std::vector<Float_t> > multiclass_testPur;
1286  std::vector<std::vector<Float_t> > multiclass_trainPur;
1287 
1288  // Multiclass confusion matrices.
1289  std::vector<TMatrixD> multiclass_trainConfusionEffB01;
1290  std::vector<TMatrixD> multiclass_trainConfusionEffB10;
1291  std::vector<TMatrixD> multiclass_trainConfusionEffB30;
1292  std::vector<TMatrixD> multiclass_testConfusionEffB01;
1293  std::vector<TMatrixD> multiclass_testConfusionEffB10;
1294  std::vector<TMatrixD> multiclass_testConfusionEffB30;
1295 
1296  std::vector<std::vector<Double_t> > biastrain(1); // "bias" of the regression on the training data
1297  std::vector<std::vector<Double_t> > biastest(1); // "bias" of the regression on test data
1298  std::vector<std::vector<Double_t> > devtrain(1); // "dev" of the regression on the training data
1299  std::vector<std::vector<Double_t> > devtest(1); // "dev" of the regression on test data
1300  std::vector<std::vector<Double_t> > rmstrain(1); // "rms" of the regression on the training data
1301  std::vector<std::vector<Double_t> > rmstest(1); // "rms" of the regression on test data
1302  std::vector<std::vector<Double_t> > minftrain(1); // "minf" of the regression on the training data
1303  std::vector<std::vector<Double_t> > minftest(1); // "minf" of the regression on test data
1304  std::vector<std::vector<Double_t> > rhotrain(1); // correlation of the regression on the training data
1305  std::vector<std::vector<Double_t> > rhotest(1); // correlation of the regression on test data
1306 
1307  // same as above but for 'truncated' quantities (computed for events within 2sigma of RMS)
1308  std::vector<std::vector<Double_t> > biastrainT(1);
1309  std::vector<std::vector<Double_t> > biastestT(1);
1310  std::vector<std::vector<Double_t> > devtrainT(1);
1311  std::vector<std::vector<Double_t> > devtestT(1);
1312  std::vector<std::vector<Double_t> > rmstrainT(1);
1313  std::vector<std::vector<Double_t> > rmstestT(1);
1314  std::vector<std::vector<Double_t> > minftrainT(1);
1315  std::vector<std::vector<Double_t> > minftestT(1);
1316 
1317  // following vector contains all methods - with the exception of Cuts, which are special
1318  MVector methodsNoCuts;
1319 
1320  Bool_t doRegression = kFALSE;
1321  Bool_t doMulticlass = kFALSE;
1322 
1323  // iterate over methods and evaluate
1324  for (MVector::iterator itrMethod =methods->begin(); itrMethod != methods->end(); itrMethod++) {
1326  MethodBase* theMethod = dynamic_cast<MethodBase*>(*itrMethod);
1327  if(theMethod==0) continue;
1328  theMethod->SetFile(fgTargetFile);
1329  theMethod->SetSilentFile(IsSilentFile());
1330  if (theMethod->GetMethodType() != Types::kCuts) methodsNoCuts.push_back( *itrMethod );
1331 
1332  if (theMethod->DoRegression()) {
1333  doRegression = kTRUE;
1334 
1335  Log() << kINFO << "Evaluate regression method: " << theMethod->GetMethodName() << Endl;
1336  Double_t bias, dev, rms, mInf;
1337  Double_t biasT, devT, rmsT, mInfT;
1338  Double_t rho;
1339 
1340  theMethod->TestRegression( bias, biasT, dev, devT, rms, rmsT, mInf, mInfT, rho, TMVA::Types::kTesting );
1341  biastest[0] .push_back( bias );
1342  devtest[0] .push_back( dev );
1343  rmstest[0] .push_back( rms );
1344  minftest[0] .push_back( mInf );
1345  rhotest[0] .push_back( rho );
1346  biastestT[0] .push_back( biasT );
1347  devtestT[0] .push_back( devT );
1348  rmstestT[0] .push_back( rmsT );
1349  minftestT[0] .push_back( mInfT );
1350 
1351  theMethod->TestRegression( bias, biasT, dev, devT, rms, rmsT, mInf, mInfT, rho, TMVA::Types::kTraining );
1352  biastrain[0] .push_back( bias );
1353  devtrain[0] .push_back( dev );
1354  rmstrain[0] .push_back( rms );
1355  minftrain[0] .push_back( mInf );
1356  rhotrain[0] .push_back( rho );
1357  biastrainT[0].push_back( biasT );
1358  devtrainT[0] .push_back( devT );
1359  rmstrainT[0] .push_back( rmsT );
1360  minftrainT[0].push_back( mInfT );
1361 
1362  mname[0].push_back( theMethod->GetMethodName() );
1363  nmeth_used[0]++;
1364  if(!IsSilentFile())
1365  {
1366  Log() << kDEBUG << "\tWrite evaluation histograms to file" << Endl;
1369  }
1370  } else if (theMethod->DoMulticlass()) {
1371  // ====================================================================
1372  // === Multiclass evaluation
1373  // ====================================================================
1374  doMulticlass = kTRUE;
1375  Log() << kINFO << "Evaluate multiclass classification method: " << theMethod->GetMethodName() << Endl;
1376 
1377  // This part uses a genetic alg. to evaluate the optimal sig eff * sig pur.
1378  // This is why it is disabled for now.
1379  // Find approximate optimal working point w.r.t. signalEfficiency * signalPurity.
1380  // theMethod->TestMulticlass(); // This is where the actual GA calc is done
1381  // multiclass_testEff.push_back(theMethod->GetMulticlassEfficiency(multiclass_testPur));
1382 
1383  // Confusion matrix at three background efficiency levels
1384  multiclass_trainConfusionEffB01.push_back(theMethod->GetMulticlassConfusionMatrix(0.01, Types::kTraining));
1385  multiclass_trainConfusionEffB10.push_back(theMethod->GetMulticlassConfusionMatrix(0.10, Types::kTraining));
1386  multiclass_trainConfusionEffB30.push_back(theMethod->GetMulticlassConfusionMatrix(0.30, Types::kTraining));
1387 
1388  multiclass_testConfusionEffB01.push_back(theMethod->GetMulticlassConfusionMatrix(0.01, Types::kTesting));
1389  multiclass_testConfusionEffB10.push_back(theMethod->GetMulticlassConfusionMatrix(0.10, Types::kTesting));
1390  multiclass_testConfusionEffB30.push_back(theMethod->GetMulticlassConfusionMatrix(0.30, Types::kTesting));
1391 
1392  if (not IsSilentFile()) {
1393  Log() << kDEBUG << "\tWrite evaluation histograms to file" << Endl;
1396  }
1397 
1398  nmeth_used[0]++;
1399  mname[0].push_back(theMethod->GetMethodName());
1400  } else {
1401 
1402  Log() << kHEADER << "Evaluate classifier: " << theMethod->GetMethodName() << Endl << Endl;
1403  isel = (theMethod->GetMethodTypeName().Contains("Variable")) ? 1 : 0;
1404 
1405  // perform the evaluation
1406  theMethod->TestClassification();
1407 
1408  // evaluate the classifier
1409  mname[isel].push_back(theMethod->GetMethodName());
1410  sig[isel].push_back(theMethod->GetSignificance());
1411  sep[isel].push_back(theMethod->GetSeparation());
1412  roc[isel].push_back(theMethod->GetROCIntegral());
1413 
1414  Double_t err;
1415  eff01[isel].push_back(theMethod->GetEfficiency("Efficiency:0.01", Types::kTesting, err));
1416  eff01err[isel].push_back(err);
1417  eff10[isel].push_back(theMethod->GetEfficiency("Efficiency:0.10", Types::kTesting, err));
1418  eff10err[isel].push_back(err);
1419  eff30[isel].push_back(theMethod->GetEfficiency("Efficiency:0.30", Types::kTesting, err));
1420  eff30err[isel].push_back(err);
1421  effArea[isel].push_back(theMethod->GetEfficiency("", Types::kTesting, err)); // computes the area (average)
1422 
1423  trainEff01[isel].push_back(theMethod->GetTrainingEfficiency("Efficiency:0.01")); // the first pass takes longer
1424  trainEff10[isel].push_back(theMethod->GetTrainingEfficiency("Efficiency:0.10"));
1425  trainEff30[isel].push_back(theMethod->GetTrainingEfficiency("Efficiency:0.30"));
1426 
1427  nmeth_used[isel]++;
1428 
1429  if (!IsSilentFile()) {
1430  Log() << kDEBUG << "\tWrite evaluation histograms to file" << Endl;
1433  }
1434  }
1435  }
1436  if (doRegression) {
1437 
1438  std::vector<TString> vtemps = mname[0];
1439  std::vector< std::vector<Double_t> > vtmp;
1440  vtmp.push_back( devtest[0] ); // this is the vector that is ranked
1441  vtmp.push_back( devtrain[0] );
1442  vtmp.push_back( biastest[0] );
1443  vtmp.push_back( biastrain[0] );
1444  vtmp.push_back( rmstest[0] );
1445  vtmp.push_back( rmstrain[0] );
1446  vtmp.push_back( minftest[0] );
1447  vtmp.push_back( minftrain[0] );
1448  vtmp.push_back( rhotest[0] );
1449  vtmp.push_back( rhotrain[0] );
1450  vtmp.push_back( devtestT[0] ); // this is the vector that is ranked
1451  vtmp.push_back( devtrainT[0] );
1452  vtmp.push_back( biastestT[0] );
1453  vtmp.push_back( biastrainT[0]);
1454  vtmp.push_back( rmstestT[0] );
1455  vtmp.push_back( rmstrainT[0] );
1456  vtmp.push_back( minftestT[0] );
1457  vtmp.push_back( minftrainT[0]);
1458  gTools().UsefulSortAscending( vtmp, &vtemps );
1459  mname[0] = vtemps;
1460  devtest[0] = vtmp[0];
1461  devtrain[0] = vtmp[1];
1462  biastest[0] = vtmp[2];
1463  biastrain[0] = vtmp[3];
1464  rmstest[0] = vtmp[4];
1465  rmstrain[0] = vtmp[5];
1466  minftest[0] = vtmp[6];
1467  minftrain[0] = vtmp[7];
1468  rhotest[0] = vtmp[8];
1469  rhotrain[0] = vtmp[9];
1470  devtestT[0] = vtmp[10];
1471  devtrainT[0] = vtmp[11];
1472  biastestT[0] = vtmp[12];
1473  biastrainT[0] = vtmp[13];
1474  rmstestT[0] = vtmp[14];
1475  rmstrainT[0] = vtmp[15];
1476  minftestT[0] = vtmp[16];
1477  minftrainT[0] = vtmp[17];
1478  } else if (doMulticlass) {
1479  // TODO: fill in something meaningful
1480  // If there is some ranking of methods to be done it should be done here.
1481  // However, this is not so easy to define for multiclass so it is left out for now.
1482 
1483  }
1484  else {
1485  // now sort the variables according to the best 'eff at Beff=0.10'
1486  for (Int_t k=0; k<2; k++) {
1487  std::vector< std::vector<Double_t> > vtemp;
1488  vtemp.push_back( effArea[k] ); // this is the vector that is ranked
1489  vtemp.push_back( eff10[k] );
1490  vtemp.push_back( eff01[k] );
1491  vtemp.push_back( eff30[k] );
1492  vtemp.push_back( eff10err[k] );
1493  vtemp.push_back( eff01err[k] );
1494  vtemp.push_back( eff30err[k] );
1495  vtemp.push_back( trainEff10[k] );
1496  vtemp.push_back( trainEff01[k] );
1497  vtemp.push_back( trainEff30[k] );
1498  vtemp.push_back( sig[k] );
1499  vtemp.push_back( sep[k] );
1500  vtemp.push_back( roc[k] );
1501  std::vector<TString> vtemps = mname[k];
1502  gTools().UsefulSortDescending( vtemp, &vtemps );
1503  effArea[k] = vtemp[0];
1504  eff10[k] = vtemp[1];
1505  eff01[k] = vtemp[2];
1506  eff30[k] = vtemp[3];
1507  eff10err[k] = vtemp[4];
1508  eff01err[k] = vtemp[5];
1509  eff30err[k] = vtemp[6];
1510  trainEff10[k] = vtemp[7];
1511  trainEff01[k] = vtemp[8];
1512  trainEff30[k] = vtemp[9];
1513  sig[k] = vtemp[10];
1514  sep[k] = vtemp[11];
1515  roc[k] = vtemp[12];
1516  mname[k] = vtemps;
1517  }
1518  }
1519 
1520  // -----------------------------------------------------------------------
1521  // Second part of evaluation process
1522  // --> compute correlations among MVAs
1523  // --> compute correlations between input variables and MVA (determines importance)
1524  // --> count overlaps
1525  // -----------------------------------------------------------------------
1526  if(fCorrelations)
1527  {
1528  const Int_t nmeth = methodsNoCuts.size();
1529  MethodBase* method = dynamic_cast<MethodBase*>(methods[0][0]);
1530  const Int_t nvar = method->fDataSetInfo.GetNVariables();
1531  if (!doRegression && !doMulticlass ) {
1532 
1533  if (nmeth > 0) {
1534 
1535  // needed for correlations
1536  Double_t *dvec = new Double_t[nmeth+nvar];
1537  std::vector<Double_t> rvec;
1538 
1539  // for correlations
1540  TPrincipal* tpSig = new TPrincipal( nmeth+nvar, "" );
1541  TPrincipal* tpBkg = new TPrincipal( nmeth+nvar, "" );
1542 
1543  // set required tree branch references
1544  Int_t ivar = 0;
1545  std::vector<TString>* theVars = new std::vector<TString>;
1546  std::vector<ResultsClassification*> mvaRes;
1547  for (MVector::iterator itrMethod = methodsNoCuts.begin(); itrMethod != methodsNoCuts.end(); itrMethod++, ivar++) {
1548  MethodBase* m = dynamic_cast<MethodBase*>(*itrMethod);
1549  if(m==0) continue;
1550  theVars->push_back( m->GetTestvarName() );
1551  rvec.push_back( m->GetSignalReferenceCut() );
1552  theVars->back().ReplaceAll( "MVA_", "" );
1553  mvaRes.push_back( dynamic_cast<ResultsClassification*>( m->Data()->GetResults( m->GetMethodName(),
1556  }
1557 
1558  // for overlap study
1559  TMatrixD* overlapS = new TMatrixD( nmeth, nmeth );
1560  TMatrixD* overlapB = new TMatrixD( nmeth, nmeth );
1561  (*overlapS) *= 0; // init...
1562  (*overlapB) *= 0; // init...
1563 
1564  // loop over test tree
1565  DataSet* defDs = method->fDataSetInfo.GetDataSet();
1567  for (Int_t ievt=0; ievt<defDs->GetNEvents(); ievt++) {
1568  const Event* ev = defDs->GetEvent(ievt);
1569 
1570  // for correlations
1571  TMatrixD* theMat = 0;
1572  for (Int_t im=0; im<nmeth; im++) {
1573  // check for NaN value
1574  Double_t retval = (Double_t)(*mvaRes[im])[ievt][0];
1575  if (TMath::IsNaN(retval)) {
1576  Log() << kWARNING << "Found NaN return value in event: " << ievt
1577  << " for method \"" << methodsNoCuts[im]->GetName() << "\"" << Endl;
1578  dvec[im] = 0;
1579  }
1580  else dvec[im] = retval;
1581  }
1582  for (Int_t iv=0; iv<nvar; iv++) dvec[iv+nmeth] = (Double_t)ev->GetValue(iv);
1583  if (method->fDataSetInfo.IsSignal(ev)) { tpSig->AddRow( dvec ); theMat = overlapS; }
1584  else { tpBkg->AddRow( dvec ); theMat = overlapB; }
1585 
1586  // count overlaps
1587  for (Int_t im=0; im<nmeth; im++) {
1588  for (Int_t jm=im; jm<nmeth; jm++) {
1589  if ((dvec[im] - rvec[im])*(dvec[jm] - rvec[jm]) > 0) {
1590  (*theMat)(im,jm)++;
1591  if (im != jm) (*theMat)(jm,im)++;
1592  }
1593  }
1594  }
1595  }
1596 
1597  // renormalise overlap matrix
1598  (*overlapS) *= (1.0/defDs->GetNEvtSigTest()); // init...
1599  (*overlapB) *= (1.0/defDs->GetNEvtBkgdTest()); // init...
1600 
1601  tpSig->MakePrincipals();
1602  tpBkg->MakePrincipals();
1603 
1604  const TMatrixD* covMatS = tpSig->GetCovarianceMatrix();
1605  const TMatrixD* covMatB = tpBkg->GetCovarianceMatrix();
1606 
1607  const TMatrixD* corrMatS = gTools().GetCorrelationMatrix( covMatS );
1608  const TMatrixD* corrMatB = gTools().GetCorrelationMatrix( covMatB );
1609 
1610  // print correlation matrices
1611  if (corrMatS != 0 && corrMatB != 0) {
1612 
1613  // extract MVA matrix
1614  TMatrixD mvaMatS(nmeth,nmeth);
1615  TMatrixD mvaMatB(nmeth,nmeth);
1616  for (Int_t im=0; im<nmeth; im++) {
1617  for (Int_t jm=0; jm<nmeth; jm++) {
1618  mvaMatS(im,jm) = (*corrMatS)(im,jm);
1619  mvaMatB(im,jm) = (*corrMatB)(im,jm);
1620  }
1621  }
1622 
1623  // extract variables - to MVA matrix
1624  std::vector<TString> theInputVars;
1625  TMatrixD varmvaMatS(nvar,nmeth);
1626  TMatrixD varmvaMatB(nvar,nmeth);
1627  for (Int_t iv=0; iv<nvar; iv++) {
1628  theInputVars.push_back( method->fDataSetInfo.GetVariableInfo( iv ).GetLabel() );
1629  for (Int_t jm=0; jm<nmeth; jm++) {
1630  varmvaMatS(iv,jm) = (*corrMatS)(nmeth+iv,jm);
1631  varmvaMatB(iv,jm) = (*corrMatB)(nmeth+iv,jm);
1632  }
1633  }
1634 
1635  if (nmeth > 1) {
1636  Log() << kINFO << Endl;
1637  Log() << kINFO <<Form("Dataset[%s] : ",method->fDataSetInfo.GetName())<< "Inter-MVA correlation matrix (signal):" << Endl;
1638  gTools().FormattedOutput( mvaMatS, *theVars, Log() );
1639  Log() << kINFO << Endl;
1640 
1641  Log() << kINFO <<Form("Dataset[%s] : ",method->fDataSetInfo.GetName())<< "Inter-MVA correlation matrix (background):" << Endl;
1642  gTools().FormattedOutput( mvaMatB, *theVars, Log() );
1643  Log() << kINFO << Endl;
1644  }
1645 
1646  Log() << kINFO <<Form("Dataset[%s] : ",method->fDataSetInfo.GetName())<< "Correlations between input variables and MVA response (signal):" << Endl;
1647  gTools().FormattedOutput( varmvaMatS, theInputVars, *theVars, Log() );
1648  Log() << kINFO << Endl;
1649 
1650  Log() << kINFO <<Form("Dataset[%s] : ",method->fDataSetInfo.GetName())<< "Correlations between input variables and MVA response (background):" << Endl;
1651  gTools().FormattedOutput( varmvaMatB, theInputVars, *theVars, Log() );
1652  Log() << kINFO << Endl;
1653  }
1654  else Log() << kWARNING <<Form("Dataset[%s] : ",method->fDataSetInfo.GetName())<< "<TestAllMethods> cannot compute correlation matrices" << Endl;
1655 
1656  // print overlap matrices
1657  Log() << kINFO <<Form("Dataset[%s] : ",method->fDataSetInfo.GetName())<< "The following \"overlap\" matrices contain the fraction of events for which " << Endl;
1658  Log() << kINFO <<Form("Dataset[%s] : ",method->fDataSetInfo.GetName())<< "the MVAs 'i' and 'j' have returned conform answers about \"signal-likeness\"" << Endl;
1659  Log() << kINFO <<Form("Dataset[%s] : ",method->fDataSetInfo.GetName())<< "An event is signal-like, if its MVA output exceeds the following value:" << Endl;
1660  gTools().FormattedOutput( rvec, *theVars, "Method" , "Cut value", Log() );
1661  Log() << kINFO <<Form("Dataset[%s] : ",method->fDataSetInfo.GetName())<< "which correspond to the working point: eff(signal) = 1 - eff(background)" << Endl;
1662 
1663  // give notice that cut method has been excluded from this test
1664  if (nmeth != (Int_t)methods->size())
1665  Log() << kINFO <<Form("Dataset[%s] : ",method->fDataSetInfo.GetName())<< "Note: no correlations and overlap with cut method are provided at present" << Endl;
1666 
1667  if (nmeth > 1) {
1668  Log() << kINFO << Endl;
1669  Log() << kINFO <<Form("Dataset[%s] : ",method->fDataSetInfo.GetName())<< "Inter-MVA overlap matrix (signal):" << Endl;
1670  gTools().FormattedOutput( *overlapS, *theVars, Log() );
1671  Log() << kINFO << Endl;
1672 
1673  Log() << kINFO <<Form("Dataset[%s] : ",method->fDataSetInfo.GetName())<< "Inter-MVA overlap matrix (background):" << Endl;
1674  gTools().FormattedOutput( *overlapB, *theVars, Log() );
1675  }
1676 
1677  // cleanup
1678  delete tpSig;
1679  delete tpBkg;
1680  delete corrMatS;
1681  delete corrMatB;
1682  delete theVars;
1683  delete overlapS;
1684  delete overlapB;
1685  delete [] dvec;
1686  }
1687  }
1688  }
1689  // -----------------------------------------------------------------------
1690  // Third part of evaluation process
1691  // --> output
1692  // -----------------------------------------------------------------------
1693 
1694  if (doRegression) {
1695 
1696  Log() << kINFO << Endl;
1697  TString hLine = "--------------------------------------------------------------------------------------------------";
1698  Log() << kINFO << "Evaluation results ranked by smallest RMS on test sample:" << Endl;
1699  Log() << kINFO << "(\"Bias\" quotes the mean deviation of the regression from true target." << Endl;
1700  Log() << kINFO << " \"MutInf\" is the \"Mutual Information\" between regression and target." << Endl;
1701  Log() << kINFO << " Indicated by \"_T\" are the corresponding \"truncated\" quantities ob-" << Endl;
1702  Log() << kINFO << " tained when removing events deviating more than 2sigma from average.)" << Endl;
1703  Log() << kINFO << hLine << Endl;
1704  //Log() << kINFO << "DataSet Name: MVA Method: <Bias> <Bias_T> RMS RMS_T | MutInf MutInf_T" << Endl;
1705  Log() << kINFO << hLine << Endl;
1706 
1707  for (Int_t i=0; i<nmeth_used[0]; i++) {
1708  MethodBase* theMethod = dynamic_cast<MethodBase*>((*methods)[i]);
1709  if(theMethod==0) continue;
1710 
1711  Log() << kINFO << Form("%-20s %-15s:%#9.3g%#9.3g%#9.3g%#9.3g | %#5.3f %#5.3f",
1712  theMethod->fDataSetInfo.GetName(),
1713  (const char*)mname[0][i],
1714  biastest[0][i], biastestT[0][i],
1715  rmstest[0][i], rmstestT[0][i],
1716  minftest[0][i], minftestT[0][i] )
1717  << Endl;
1718  }
1719  Log() << kINFO << hLine << Endl;
1720  Log() << kINFO << Endl;
1721  Log() << kINFO << "Evaluation results ranked by smallest RMS on training sample:" << Endl;
1722  Log() << kINFO << "(overtraining check)" << Endl;
1723  Log() << kINFO << hLine << Endl;
1724  Log() << kINFO << "DataSet Name: MVA Method: <Bias> <Bias_T> RMS RMS_T | MutInf MutInf_T" << Endl;
1725  Log() << kINFO << hLine << Endl;
1726 
1727  for (Int_t i=0; i<nmeth_used[0]; i++) {
1728  MethodBase* theMethod = dynamic_cast<MethodBase*>((*methods)[i]);
1729  if(theMethod==0) continue;
1730  Log() << kINFO << Form("%-20s %-15s:%#9.3g%#9.3g%#9.3g%#9.3g | %#5.3f %#5.3f",
1731  theMethod->fDataSetInfo.GetName(),
1732  (const char*)mname[0][i],
1733  biastrain[0][i], biastrainT[0][i],
1734  rmstrain[0][i], rmstrainT[0][i],
1735  minftrain[0][i], minftrainT[0][i] )
1736  << Endl;
1737  }
1738  Log() << kINFO << hLine << Endl;
1739  Log() << kINFO << Endl;
1740  } else if (doMulticlass) {
1741  // ====================================================================
1742  // === Multiclass Output
1743  // ====================================================================
1744 
1745  TString hLine =
1746  "-------------------------------------------------------------------------------------------------------";
1747 
1748  // This part uses a genetic alg. to evaluate the optimal sig eff * sig pur.
1749  // This is why it is disabled for now.
1750  //
1751  // // --- Acheivable signal efficiency * signal purity
1752  // // --------------------------------------------------------------------
1753  // Log() << kINFO << Endl;
1754  // Log() << kINFO << "Evaluation results ranked by best signal efficiency times signal purity " << Endl;
1755  // Log() << kINFO << hLine << Endl;
1756 
1757  // // iterate over methods and evaluate
1758  // for (MVector::iterator itrMethod = methods->begin(); itrMethod != methods->end(); itrMethod++) {
1759  // MethodBase *theMethod = dynamic_cast<MethodBase *>(*itrMethod);
1760  // if (theMethod == 0) {
1761  // continue;
1762  // }
1763 
1764  // TString header = "DataSet Name MVA Method ";
1765  // for (UInt_t icls = 0; icls < theMethod->fDataSetInfo.GetNClasses(); ++icls) {
1766  // header += Form("%-12s ", theMethod->fDataSetInfo.GetClassInfo(icls)->GetName());
1767  // }
1768 
1769  // Log() << kINFO << header << Endl;
1770  // Log() << kINFO << hLine << Endl;
1771  // for (Int_t i = 0; i < nmeth_used[0]; i++) {
1772  // TString res = Form("[%-14s] %-15s", theMethod->fDataSetInfo.GetName(), (const char *)mname[0][i]);
1773  // for (UInt_t icls = 0; icls < theMethod->fDataSetInfo.GetNClasses(); ++icls) {
1774  // res += Form("%#1.3f ", (multiclass_testEff[i][icls]) * (multiclass_testPur[i][icls]));
1775  // }
1776  // Log() << kINFO << res << Endl;
1777  // }
1778 
1779  // Log() << kINFO << hLine << Endl;
1780  // Log() << kINFO << Endl;
1781  // }
1782 
1783  // --- 1 vs Rest ROC AUC, signal efficiency @ given background efficiency
1784  // --------------------------------------------------------------------
1785  TString header1 = Form("%-15s%-15s%-15s%-15s%-15s%-15s", "Dataset", "MVA Method", "ROC AUC", "Sig eff@B=0.01",
1786  "Sig eff@B=0.10", "Sig eff@B=0.30");
1787  TString header2 = Form("%-15s%-15s%-15s%-15s%-15s%-15s", "Name:", "/ Class:", "test (train)", "test (train)",
1788  "test (train)", "test (train)");
1789  Log() << kINFO << Endl;
1790  Log() << kINFO << "1-vs-rest performance metrics per class" << Endl;
1791  Log() << kINFO << hLine << Endl;
1792  Log() << kINFO << Endl;
1793  Log() << kINFO << "Considers the listed class as signal and the other classes" << Endl;
1794  Log() << kINFO << "as background, reporting the resulting binary performance." << Endl;
1795  Log() << kINFO << "A score of 0.820 (0.850) means 0.820 was acheived on the" << Endl;
1796  Log() << kINFO << "test set and 0.850 on the training set." << Endl;
1797 
1798  Log() << kINFO << Endl;
1799  Log() << kINFO << header1 << Endl;
1800  Log() << kINFO << header2 << Endl;
1801  for (Int_t k = 0; k < 2; k++) {
1802  for (Int_t i = 0; i < nmeth_used[k]; i++) {
1803  if (k == 1) {
1804  mname[k][i].ReplaceAll("Variable_", "");
1805  }
1806 
1807  const TString datasetName = itrMap->first;
1808  const TString mvaName = mname[k][i];
1809 
1810  MethodBase *theMethod = dynamic_cast<MethodBase *>(GetMethod(datasetName, mvaName));
1811  if (theMethod == 0) {
1812  continue;
1813  }
1814 
1815  Log() << kINFO << Endl;
1816  TString row = Form("%-15s%-15s", datasetName.Data(), mvaName.Data());
1817  Log() << kINFO << row << Endl;
1818  Log() << kINFO << "------------------------------" << Endl;
1819 
1820  UInt_t numClasses = theMethod->fDataSetInfo.GetNClasses();
1821  for (UInt_t iClass = 0; iClass < numClasses; ++iClass) {
1822 
1823  ROCCurve *rocCurveTrain = GetROC(datasetName, mvaName, iClass, Types::kTraining);
1824  ROCCurve *rocCurveTest = GetROC(datasetName, mvaName, iClass, Types::kTesting);
1825 
1826  const TString className = theMethod->DataInfo().GetClassInfo(iClass)->GetName();
1827  const Double_t rocaucTrain = rocCurveTrain->GetROCIntegral();
1828  const Double_t effB01Train = rocCurveTrain->GetEffSForEffB(0.01);
1829  const Double_t effB10Train = rocCurveTrain->GetEffSForEffB(0.10);
1830  const Double_t effB30Train = rocCurveTrain->GetEffSForEffB(0.30);
1831  const Double_t rocaucTest = rocCurveTest->GetROCIntegral();
1832  const Double_t effB01Test = rocCurveTest->GetEffSForEffB(0.01);
1833  const Double_t effB10Test = rocCurveTest->GetEffSForEffB(0.10);
1834  const Double_t effB30Test = rocCurveTest->GetEffSForEffB(0.30);
1835  const TString rocaucCmp = Form("%5.3f (%5.3f)", rocaucTest, rocaucTrain);
1836  const TString effB01Cmp = Form("%5.3f (%5.3f)", effB01Test, effB01Train);
1837  const TString effB10Cmp = Form("%5.3f (%5.3f)", effB10Test, effB10Train);
1838  const TString effB30Cmp = Form("%5.3f (%5.3f)", effB30Test, effB30Train);
1839  row = Form("%-15s%-15s%-15s%-15s%-15s%-15s", "", className.Data(), rocaucCmp.Data(), effB01Cmp.Data(),
1840  effB10Cmp.Data(), effB30Cmp.Data());
1841  Log() << kINFO << row << Endl;
1842 
1843  delete rocCurveTrain;
1844  delete rocCurveTest;
1845  }
1846  }
1847  }
1848  Log() << kINFO << Endl;
1849  Log() << kINFO << hLine << Endl;
1850  Log() << kINFO << Endl;
1851 
1852  // --- Confusion matrices
1853  // --------------------------------------------------------------------
1854  auto printMatrix = [](TMatrixD const &matTraining, TMatrixD const &matTesting, std::vector<TString> classnames,
1855  UInt_t numClasses, MsgLogger &stream) {
1856  // assert (classLabledWidth >= valueLabelWidth + 2)
1857  // if (...) {Log() << kWARN << "..." << Endl; }
1858 
1859  // TODO: Ensure matrices are same size.
1860 
1861  TString header = Form(" %-14s", " ");
1862  TString headerInfo = Form(" %-14s", " ");
1863  ;
1864  for (UInt_t iCol = 0; iCol < numClasses; ++iCol) {
1865  header += Form(" %-14s", classnames[iCol].Data());
1866  headerInfo += Form(" %-14s", " test (train)");
1867  }
1868  stream << kINFO << header << Endl;
1869  stream << kINFO << headerInfo << Endl;
1870 
1871  for (UInt_t iRow = 0; iRow < numClasses; ++iRow) {
1872  stream << kINFO << Form(" %-14s", classnames[iRow].Data());
1873 
1874  for (UInt_t iCol = 0; iCol < numClasses; ++iCol) {
1875  if (iCol == iRow) {
1876  stream << kINFO << Form(" %-14s", "-");
1877  } else {
1878  Double_t trainValue = matTraining[iRow][iCol];
1879  Double_t testValue = matTesting[iRow][iCol];
1880  TString entry = Form("%-5.3f (%-5.3f)", testValue, trainValue);
1881  stream << kINFO << Form(" %-14s", entry.Data());
1882  }
1883  }
1884  stream << kINFO << Endl;
1885  }
1886  };
1887 
1888  Log() << kINFO << Endl;
1889  Log() << kINFO << "Confusion matrices for all methods" << Endl;
1890  Log() << kINFO << hLine << Endl;
1891  Log() << kINFO << Endl;
1892  Log() << kINFO << "Does a binary comparison between the two classes given by a " << Endl;
1893  Log() << kINFO << "particular row-column combination. In each case, the class " << Endl;
1894  Log() << kINFO << "given by the row is considered signal while the class given " << Endl;
1895  Log() << kINFO << "by the column index is considered background." << Endl;
1896  Log() << kINFO << Endl;
1897  for (UInt_t iMethod = 0; iMethod < methods->size(); ++iMethod) {
1898  MethodBase *theMethod = dynamic_cast<MethodBase *>(methods->at(iMethod));
1899  if (theMethod == nullptr) {
1900  continue;
1901  }
1902  UInt_t numClasses = theMethod->fDataSetInfo.GetNClasses();
1903 
1904  std::vector<TString> classnames;
1905  for (UInt_t iCls = 0; iCls < numClasses; ++iCls) {
1906  classnames.push_back(theMethod->fDataSetInfo.GetClassInfo(iCls)->GetName());
1907  }
1908  Log() << kINFO
1909  << "=== Showing confusion matrix for method : " << Form("%-15s", (const char *)mname[0][iMethod])
1910  << Endl;
1911  Log() << kINFO << "(Signal Efficiency for Background Efficiency 0.01%)" << Endl;
1912  Log() << kINFO << "---------------------------------------------------" << Endl;
1913  printMatrix(multiclass_testConfusionEffB01[iMethod], multiclass_trainConfusionEffB01[iMethod], classnames,
1914  numClasses, Log());
1915  Log() << kINFO << Endl;
1916 
1917  Log() << kINFO << "(Signal Efficiency for Background Efficiency 0.10%)" << Endl;
1918  Log() << kINFO << "---------------------------------------------------" << Endl;
1919  printMatrix(multiclass_testConfusionEffB10[iMethod], multiclass_trainConfusionEffB10[iMethod], classnames,
1920  numClasses, Log());
1921  Log() << kINFO << Endl;
1922 
1923  Log() << kINFO << "(Signal Efficiency for Background Efficiency 0.30%)" << Endl;
1924  Log() << kINFO << "---------------------------------------------------" << Endl;
1925  printMatrix(multiclass_testConfusionEffB30[iMethod], multiclass_trainConfusionEffB30[iMethod], classnames,
1926  numClasses, Log());
1927  Log() << kINFO << Endl;
1928  }
1929  Log() << kINFO << hLine << Endl;
1930  Log() << kINFO << Endl;
1931 
1932  } else {
1933  // Binary classification
1934  if (fROC) {
1935  Log().EnableOutput();
1937  Log() << Endl;
1938  TString hLine = "------------------------------------------------------------------------------------------"
1939  "-------------------------";
1940  Log() << kINFO << "Evaluation results ranked by best signal efficiency and purity (area)" << Endl;
1941  Log() << kINFO << hLine << Endl;
1942  Log() << kINFO << "DataSet MVA " << Endl;
1943  Log() << kINFO << "Name: Method: ROC-integ" << Endl;
1944 
1945  // Log() << kDEBUG << "DataSet MVA Signal efficiency at bkg eff.(error):
1946  // | Sepa- Signifi- " << Endl; Log() << kDEBUG << "Name: Method: @B=0.01
1947  // @B=0.10 @B=0.30 ROC-integ ROCCurve| ration: cance: " << Endl;
1948  Log() << kDEBUG << hLine << Endl;
1949  for (Int_t k = 0; k < 2; k++) {
1950  if (k == 1 && nmeth_used[k] > 0) {
1951  Log() << kINFO << hLine << Endl;
1952  Log() << kINFO << "Input Variables: " << Endl << hLine << Endl;
1953  }
1954  for (Int_t i = 0; i < nmeth_used[k]; i++) {
1955  TString datasetName = itrMap->first;
1956  TString methodName = mname[k][i];
1957 
1958  if (k == 1) {
1959  methodName.ReplaceAll("Variable_", "");
1960  }
1961 
1962  MethodBase *theMethod = dynamic_cast<MethodBase *>(GetMethod(datasetName, methodName));
1963  if (theMethod == 0) {
1964  continue;
1965  }
1966 
1967  TMVA::DataSet *dataset = theMethod->Data();
1968  TMVA::Results *results = dataset->GetResults(methodName, Types::kTesting, this->fAnalysisType);
1969  std::vector<Bool_t> *mvaResType =
1970  dynamic_cast<ResultsClassification *>(results)->GetValueVectorTypes();
1971 
1972  Double_t rocIntegral = 0.0;
1973  if (mvaResType->size() != 0) {
1974  rocIntegral = GetROCIntegral(datasetName, methodName);
1975  }
1976 
1977  if (sep[k][i] < 0 || sig[k][i] < 0) {
1978  // cannot compute separation/significance -> no MVA (usually for Cuts)
1979  Log() << kINFO << Form("%-13s %-15s: %#1.3f", datasetName.Data(), methodName.Data(), effArea[k][i])
1980  << Endl;
1981 
1982  // Log() << kDEBUG << Form("%-20s %-15s: %#1.3f(%02i) %#1.3f(%02i) %#1.3f(%02i)
1983  // %#1.3f %#1.3f | -- --",
1984  // datasetName.Data(),
1985  // methodName.Data(),
1986  // eff01[k][i], Int_t(1000*eff01err[k][i]),
1987  // eff10[k][i], Int_t(1000*eff10err[k][i]),
1988  // eff30[k][i], Int_t(1000*eff30err[k][i]),
1989  // effArea[k][i],rocIntegral) << Endl;
1990  } else {
1991  Log() << kINFO << Form("%-13s %-15s: %#1.3f", datasetName.Data(), methodName.Data(), rocIntegral)
1992  << Endl;
1993  // Log() << kDEBUG << Form("%-20s %-15s: %#1.3f(%02i) %#1.3f(%02i) %#1.3f(%02i)
1994  // %#1.3f %#1.3f | %#1.3f %#1.3f",
1995  // datasetName.Data(),
1996  // methodName.Data(),
1997  // eff01[k][i], Int_t(1000*eff01err[k][i]),
1998  // eff10[k][i], Int_t(1000*eff10err[k][i]),
1999  // eff30[k][i], Int_t(1000*eff30err[k][i]),
2000  // effArea[k][i],rocIntegral,
2001  // sep[k][i], sig[k][i]) << Endl;
2002  }
2003  }
2004  }
2005  Log() << kINFO << hLine << Endl;
2006  Log() << kINFO << Endl;
2007  Log() << kINFO << "Testing efficiency compared to training efficiency (overtraining check)" << Endl;
2008  Log() << kINFO << hLine << Endl;
2009  Log() << kINFO
2010  << "DataSet MVA Signal efficiency: from test sample (from training sample) "
2011  << Endl;
2012  Log() << kINFO << "Name: Method: @B=0.01 @B=0.10 @B=0.30 "
2013  << Endl;
2014  Log() << kINFO << hLine << Endl;
2015  for (Int_t k = 0; k < 2; k++) {
2016  if (k == 1 && nmeth_used[k] > 0) {
2017  Log() << kINFO << hLine << Endl;
2018  Log() << kINFO << "Input Variables: " << Endl << hLine << Endl;
2019  }
2020  for (Int_t i = 0; i < nmeth_used[k]; i++) {
2021  if (k == 1) mname[k][i].ReplaceAll("Variable_", "");
2022  MethodBase *theMethod = dynamic_cast<MethodBase *>((*methods)[i]);
2023  if (theMethod == 0) continue;
2024 
2025  Log() << kINFO << Form("%-20s %-15s: %#1.3f (%#1.3f) %#1.3f (%#1.3f) %#1.3f (%#1.3f)",
2026  theMethod->fDataSetInfo.GetName(), (const char *)mname[k][i], eff01[k][i],
2027  trainEff01[k][i], eff10[k][i], trainEff10[k][i], eff30[k][i], trainEff30[k][i])
2028  << Endl;
2029  }
2030  }
2031  Log() << kINFO << hLine << Endl;
2032  Log() << kINFO << Endl;
2033 
2034  if (gTools().CheckForSilentOption(GetOptions())) Log().InhibitOutput();
2035  } // end fROC
2036  }
2037  if(!IsSilentFile())
2038  {
2039  std::list<TString> datasets;
2040  for (Int_t k=0; k<2; k++) {
2041  for (Int_t i=0; i<nmeth_used[k]; i++) {
2042  MethodBase* theMethod = dynamic_cast<MethodBase*>((*methods)[i]);
2043  if(theMethod==0) continue;
2044  // write test/training trees
2045  RootBaseDir()->cd(theMethod->fDataSetInfo.GetName());
2046  if(std::find(datasets.begin(), datasets.end(), theMethod->fDataSetInfo.GetName()) == datasets.end())
2047  {
2050  datasets.push_back(theMethod->fDataSetInfo.GetName());
2051  }
2052  }
2053  }
2054  }
2055  }//end for MethodsMap
2056  // references for citation
2058 }
2059 
2060 ////////////////////////////////////////////////////////////////////////////////
2061 /// Evaluate Variable Importance
2062 
2063 TH1F* TMVA::Factory::EvaluateImportance(DataLoader *loader,VIType vitype, Types::EMVA theMethod, TString methodTitle, const char *theOption)
2064 {
2066  fSilentFile=kTRUE;//we need silent file here because we need fast classification results
2067 
2068  //getting number of variables and variable names from loader
2069  const int nbits = loader->DefaultDataSetInfo().GetNVariables();
2070  if(vitype==VIType::kShort)
2071  return EvaluateImportanceShort(loader,theMethod,methodTitle,theOption);
2072  else if(vitype==VIType::kAll)
2073  return EvaluateImportanceAll(loader,theMethod,methodTitle,theOption);
2074  else if(vitype==VIType::kRandom&&nbits>10)
2075  {
2076  return EvaluateImportanceRandom(loader,pow(2,nbits),theMethod,methodTitle,theOption);
2077  }else
2078  {
2079  std::cerr<<"Error in Variable Importance: Random mode require more that 10 variables in the dataset."<<std::endl;
2080  return nullptr;
2081  }
2082 }
2083 
2084 ////////////////////////////////////////////////////////////////////////////////
2085 
2086 TH1F* TMVA::Factory::EvaluateImportanceAll(DataLoader *loader, Types::EMVA theMethod, TString methodTitle, const char *theOption)
2087 {
2088 
2089  uint64_t x = 0;
2090  uint64_t y = 0;
2091 
2092  //getting number of variables and variable names from loader
2093  const int nbits = loader->DefaultDataSetInfo().GetNVariables();
2094  std::vector<TString> varNames = loader->DefaultDataSetInfo().GetListOfVariables();
2095 
2096  uint64_t range = pow(2, nbits);
2097 
2098  //vector to save importances
2099  std::vector<Double_t> importances(nbits);
2100  //vector to save ROC
2101  std::vector<Double_t> ROC(range);
2102  ROC[0]=0.5;
2103  for (int i = 0; i < nbits; i++)importances[i] = 0;
2104 
2105  Double_t SROC, SSROC; //computed ROC value
2106  for ( x = 1; x <range ; x++) {
2107 
2108  std::bitset<VIBITS> xbitset(x);
2109  if (x == 0) continue; //data loader need at least one variable
2110 
2111  //creating loader for seed
2112  TMVA::DataLoader *seedloader = new TMVA::DataLoader(xbitset.to_string());
2113 
2114  //adding variables from seed
2115  for (int index = 0; index < nbits; index++) {
2116  if (xbitset[index]) seedloader->AddVariable(varNames[index], 'F');
2117  }
2118 
2119  DataLoaderCopy(seedloader,loader);
2120  seedloader->PrepareTrainingAndTestTree(loader->DefaultDataSetInfo().GetCut("Signal"), loader->DefaultDataSetInfo().GetCut("Background"), loader->DefaultDataSetInfo().GetSplitOptions());
2121 
2122  //Booking Seed
2123  BookMethod(seedloader, theMethod, methodTitle, theOption);
2124 
2125  //Train/Test/Evaluation
2126  TrainAllMethods();
2127  TestAllMethods();
2129 
2130  //getting ROC
2131  ROC[x] = GetROCIntegral(xbitset.to_string(), methodTitle);
2132 
2133  //cleaning information to process sub-seeds
2134  TMVA::MethodBase *smethod=dynamic_cast<TMVA::MethodBase*>(fMethodsMap[xbitset.to_string().c_str()][0][0]);
2135  TMVA::ResultsClassification *sresults = (TMVA::ResultsClassification*)smethod->Data()->GetResults(smethod->GetMethodName(), Types::kTesting, Types::kClassification);
2136  delete sresults;
2137  delete seedloader;
2138  this->DeleteAllMethods();
2139 
2140  fMethodsMap.clear();
2141  //removing global result because it is requiring a lot of RAM for all seeds
2142  }
2143 
2144 
2145  for ( x = 0; x <range ; x++)
2146  {
2147  SROC=ROC[x];
2148  for (uint32_t i = 0; i < VIBITS; ++i) {
2149  if (x & (1 << i)) {
2150  y = x & ~(1 << i);
2151  std::bitset<VIBITS> ybitset(y);
2152  //need at least one variable
2153  //NOTE: if sub-seed is zero then is the special case
2154  //that count in xbitset is 1
2155  Double_t ny = log(x - y) / 0.693147;
2156  if (y == 0) {
2157  importances[ny] = SROC - 0.5;
2158  continue;
2159  }
2160 
2161  //getting ROC
2162  SSROC = ROC[y];
2163  importances[ny] += SROC - SSROC;
2164  //cleaning information
2165  }
2166 
2167  }
2168  }
2169  std::cout<<"--- Variable Importance Results (All)"<<std::endl;
2170  return GetImportance(nbits,importances,varNames);
2171 }
2172 
2173 static long int sum(long int i)
2174 {
2175  long int _sum=0;
2176  for(long int n=0;n<i;n++) _sum+=pow(2,n);
2177  return _sum;
2178 }
2179 
2180 ////////////////////////////////////////////////////////////////////////////////
2181 
2182 TH1F* TMVA::Factory::EvaluateImportanceShort(DataLoader *loader, Types::EMVA theMethod, TString methodTitle, const char *theOption)
2183 {
2184  uint64_t x = 0;
2185  uint64_t y = 0;
2186 
2187  //getting number of variables and variable names from loader
2188  const int nbits = loader->DefaultDataSetInfo().GetNVariables();
2189  std::vector<TString> varNames = loader->DefaultDataSetInfo().GetListOfVariables();
2190 
2191  long int range = sum(nbits);
2192 // std::cout<<range<<std::endl;
2193  //vector to save importances
2194  std::vector<Double_t> importances(nbits);
2195  for (int i = 0; i < nbits; i++)importances[i] = 0;
2196 
2197  Double_t SROC, SSROC; //computed ROC value
2198 
2199  x = range;
2200 
2201  std::bitset<VIBITS> xbitset(x);
2202  if (x == 0) Log()<<kFATAL<<"Error: need at least one variable."; //data loader need at least one variable
2203 
2204 
2205  //creating loader for seed
2206  TMVA::DataLoader *seedloader = new TMVA::DataLoader(xbitset.to_string());
2207 
2208  //adding variables from seed
2209  for (int index = 0; index < nbits; index++) {
2210  if (xbitset[index]) seedloader->AddVariable(varNames[index], 'F');
2211  }
2212 
2213  //Loading Dataset
2214  DataLoaderCopy(seedloader,loader);
2215 
2216  //Booking Seed
2217  BookMethod(seedloader, theMethod, methodTitle, theOption);
2218 
2219  //Train/Test/Evaluation
2220  TrainAllMethods();
2221  TestAllMethods();
2223 
2224  //getting ROC
2225  SROC = GetROCIntegral(xbitset.to_string(), methodTitle);
2226 
2227  //cleaning information to process sub-seeds
2228  TMVA::MethodBase *smethod=dynamic_cast<TMVA::MethodBase*>(fMethodsMap[xbitset.to_string().c_str()][0][0]);
2229  TMVA::ResultsClassification *sresults = (TMVA::ResultsClassification*)smethod->Data()->GetResults(smethod->GetMethodName(), Types::kTesting, Types::kClassification);
2230  delete sresults;
2231  delete seedloader;
2232  this->DeleteAllMethods();
2233  fMethodsMap.clear();
2234 
2235  //removing global result because it is requiring a lot of RAM for all seeds
2236 
2237  for (uint32_t i = 0; i < VIBITS; ++i) {
2238  if (x & (1 << i)) {
2239  y = x & ~(1 << i);
2240  std::bitset<VIBITS> ybitset(y);
2241  //need at least one variable
2242  //NOTE: if sub-seed is zero then is the special case
2243  //that count in xbitset is 1
2244  Double_t ny = log(x - y) / 0.693147;
2245  if (y == 0) {
2246  importances[ny] = SROC - 0.5;
2247  continue;
2248  }
2249 
2250  //creating loader for sub-seed
2251  TMVA::DataLoader *subseedloader = new TMVA::DataLoader(ybitset.to_string());
2252  //adding variables from sub-seed
2253  for (int index = 0; index < nbits; index++) {
2254  if (ybitset[index]) subseedloader->AddVariable(varNames[index], 'F');
2255  }
2256 
2257  //Loading Dataset
2258  DataLoaderCopy(subseedloader,loader);
2259 
2260  //Booking SubSeed
2261  BookMethod(subseedloader, theMethod, methodTitle, theOption);
2262 
2263  //Train/Test/Evaluation
2264  TrainAllMethods();
2265  TestAllMethods();
2267 
2268  //getting ROC
2269  SSROC = GetROCIntegral(ybitset.to_string(), methodTitle);
2270  importances[ny] += SROC - SSROC;
2271 
2272  //cleaning information
2273  TMVA::MethodBase *ssmethod=dynamic_cast<TMVA::MethodBase*>(fMethodsMap[ybitset.to_string().c_str()][0][0]);
2275  delete ssresults;
2276  delete subseedloader;
2277  this->DeleteAllMethods();
2278  fMethodsMap.clear();
2279  }
2280  }
2281  std::cout<<"--- Variable Importance Results (Short)"<<std::endl;
2282  return GetImportance(nbits,importances,varNames);
2283 }
2284 
2285 ////////////////////////////////////////////////////////////////////////////////
2286 
2287 TH1F* TMVA::Factory::EvaluateImportanceRandom(DataLoader *loader, UInt_t nseeds, Types::EMVA theMethod, TString methodTitle, const char *theOption)
2288 {
2289  TRandom3 *rangen = new TRandom3(0); //Random Gen.
2290 
2291  uint64_t x = 0;
2292  uint64_t y = 0;
2293 
2294  //getting number of variables and variable names from loader
2295  const int nbits = loader->DefaultDataSetInfo().GetNVariables();
2296  std::vector<TString> varNames = loader->DefaultDataSetInfo().GetListOfVariables();
2297 
2298  long int range = pow(2, nbits);
2299 
2300  //vector to save importances
2301  std::vector<Double_t> importances(nbits);
2302  Double_t importances_norm = 0;
2303  for (int i = 0; i < nbits; i++)importances[i] = 0;
2304 
2305  Double_t SROC, SSROC; //computed ROC value
2306  for (UInt_t n = 0; n < nseeds; n++) {
2307  x = rangen -> Integer(range);
2308 
2309  std::bitset<32> xbitset(x);
2310  if (x == 0) continue; //data loader need at least one variable
2311 
2312 
2313  //creating loader for seed
2314  TMVA::DataLoader *seedloader = new TMVA::DataLoader(xbitset.to_string());
2315 
2316  //adding variables from seed
2317  for (int index = 0; index < nbits; index++) {
2318  if (xbitset[index]) seedloader->AddVariable(varNames[index], 'F');
2319  }
2320 
2321  //Loading Dataset
2322  DataLoaderCopy(seedloader,loader);
2323 
2324  //Booking Seed
2325  BookMethod(seedloader, theMethod, methodTitle, theOption);
2326 
2327  //Train/Test/Evaluation
2328  TrainAllMethods();
2329  TestAllMethods();
2331 
2332  //getting ROC
2333  SROC = GetROCIntegral(xbitset.to_string(), methodTitle);
2334 // std::cout << "Seed: n " << n << " x " << x << " xbitset:" << xbitset << " ROC " << SROC << std::endl;
2335 
2336  //cleaning information to process sub-seeds
2337  TMVA::MethodBase *smethod=dynamic_cast<TMVA::MethodBase*>(fMethodsMap[xbitset.to_string().c_str()][0][0]);
2338  TMVA::ResultsClassification *sresults = (TMVA::ResultsClassification*)smethod->Data()->GetResults(smethod->GetMethodName(), Types::kTesting, Types::kClassification);
2339  delete sresults;
2340  delete seedloader;
2341  this->DeleteAllMethods();
2342  fMethodsMap.clear();
2343 
2344  //removing global result because it is requiring a lot of RAM for all seeds
2345 
2346  for (uint32_t i = 0; i < 32; ++i) {
2347  if (x & (1 << i)) {
2348  y = x & ~(1 << i);
2349  std::bitset<32> ybitset(y);
2350  //need at least one variable
2351  //NOTE: if sub-seed is zero then is the special case
2352  //that count in xbitset is 1
2353  Double_t ny = log(x - y) / 0.693147;
2354  if (y == 0) {
2355  importances[ny] = SROC - 0.5;
2356  importances_norm += importances[ny];
2357  // std::cout << "SubSeed: " << y << " y:" << ybitset << "ROC " << 0.5 << std::endl;
2358  continue;
2359  }
2360 
2361  //creating loader for sub-seed
2362  TMVA::DataLoader *subseedloader = new TMVA::DataLoader(ybitset.to_string());
2363  //adding variables from sub-seed
2364  for (int index = 0; index < nbits; index++) {
2365  if (ybitset[index]) subseedloader->AddVariable(varNames[index], 'F');
2366  }
2367 
2368  //Loading Dataset
2369  DataLoaderCopy(subseedloader,loader);
2370 
2371  //Booking SubSeed
2372  BookMethod(subseedloader, theMethod, methodTitle, theOption);
2373 
2374  //Train/Test/Evaluation
2375  TrainAllMethods();
2376  TestAllMethods();
2378 
2379  //getting ROC
2380  SSROC = GetROCIntegral(ybitset.to_string(), methodTitle);
2381  importances[ny] += SROC - SSROC;
2382  //std::cout << "SubSeed: " << y << " y:" << ybitset << " x-y " << x - y << " " << std::bitset<32>(x - y) << " ny " << ny << " SROC " << SROC << " SSROC " << SSROC << " Importance = " << importances[ny] << std::endl;
2383  //cleaning information
2384  TMVA::MethodBase *ssmethod=dynamic_cast<TMVA::MethodBase*>(fMethodsMap[ybitset.to_string().c_str()][0][0]);
2386  delete ssresults;
2387  delete subseedloader;
2388  this->DeleteAllMethods();
2389  fMethodsMap.clear();
2390  }
2391  }
2392  }
2393  std::cout<<"--- Variable Importance Results (Random)"<<std::endl;
2394  return GetImportance(nbits,importances,varNames);
2395 }
2396 
2397 ////////////////////////////////////////////////////////////////////////////////
2398 
2399 TH1F* TMVA::Factory::GetImportance(const int nbits,std::vector<Double_t> importances,std::vector<TString> varNames)
2400 {
2401  TH1F *vih1 = new TH1F("vih1", "", nbits, 0, nbits);
2402 
2403  gStyle->SetOptStat(000000);
2404 
2405  Float_t normalization = 0.0;
2406  for (int i = 0; i < nbits; i++) {
2407  normalization = normalization + importances[i];
2408  }
2409 
2410  Float_t roc = 0.0;
2411 
2412  gStyle->SetTitleXOffset(0.4);
2413  gStyle->SetTitleXOffset(1.2);
2414 
2415 
2416  Double_t x_ie[nbits], y_ie[nbits];
2417  for (Int_t i = 1; i < nbits + 1; i++) {
2418  x_ie[i - 1] = (i - 1) * 1.;
2419  roc = 100.0 * importances[i - 1] / normalization;
2420  y_ie[i - 1] = roc;
2421  std::cout<<"--- "<<varNames[i-1]<<" = "<<roc<<" %"<<std::endl;
2422  vih1->GetXaxis()->SetBinLabel(i, varNames[i - 1].Data());
2423  vih1->SetBinContent(i, roc);
2424  }
2425  TGraph *g_ie = new TGraph(nbits + 2, x_ie, y_ie);
2426  g_ie->SetTitle("");
2427 
2428  vih1->LabelsOption("v >", "X");
2429  vih1->SetBarWidth(0.97);
2430  Int_t ca = TColor::GetColor("#006600");
2431  vih1->SetFillColor(ca);
2432  //Int_t ci = TColor::GetColor("#990000");
2433 
2434  vih1->GetYaxis()->SetTitle("Importance (%)");
2435  vih1->GetYaxis()->SetTitleSize(0.045);
2436  vih1->GetYaxis()->CenterTitle();
2437  vih1->GetYaxis()->SetTitleOffset(1.24);
2438 
2439  vih1->GetYaxis()->SetRangeUser(-7, 50);
2440  vih1->SetDirectory(0);
2441 
2442 // vih1->Draw("B");
2443  return vih1;
2444 }
2445 
IMethod * Create(const std::string &name, const TString &job, const TString &title, DataSetInfo &dsi, const TString &option)
creates the method if needed based on the method name using the creator function the factory has stor...
static ClassifierFactory & Instance()
access to the ClassifierFactory singleton creates the instance if needed
virtual void SetTitleOffset(Float_t offset=1)
Set distance between the axis and the axis title Offset is a correction factor with respect to the "s...
Definition: TAttAxis.cxx:294
void SetModelPersistence(Bool_t status)
Definition: MethodBase.h:371
virtual const char * GetName() const
Returns name of object.
Definition: TNamed.h:47
virtual Int_t Write(const char *name=0, Int_t option=0, Int_t bufsize=0)
Write this object to the current directory.
Definition: TObject.cxx:785
virtual void SetLineWidth(Width_t lwidth)
Set the line width.
Definition: TAttLine.h:43
TH1F * GetImportance(const int nbits, std::vector< Double_t > importances, std::vector< TString > varNames)
Definition: Factory.cxx:2399
virtual void MakeClass(const TString &classFileName=TString("")) const
create reader class for method (classification only at present)
UInt_t GetNVariables() const
Definition: DataSetInfo.h:110
DataSetManager * fDataSetManager
Definition: DataLoader.h:190
static long int sum(long int i)
Definition: Factory.cxx:2173
Principal Components Analysis (PCA)
Definition: TPrincipal.h:20
void UsefulSortDescending(std::vector< std::vector< Double_t > > &, std::vector< TString > *vs=0)
sort 2D vector (AND in parallel a TString vector) in such a way that the "first vector is sorted" and...
Definition: Tools.cxx:575
MethodBase * BookMethod(DataLoader *loader, TString theMethodName, TString methodTitle, TString theOption="")
Book a classifier or regression method.
Definition: Factory.cxx:343
Double_t GetEffSForEffB(Double_t effB, const UInt_t num_points=41)
Calculate the signal efficiency (sensitivity) for a given background efficiency (sensitivity).
Definition: ROCCurve.cxx:220
Random number generator class based on M.
Definition: TRandom3.h:27
MsgLogger & Endl(MsgLogger &ml)
Definition: MsgLogger.h:158
Singleton class for Global types used by TMVA.
Definition: Types.h:73
void AddOutput(Types::ETreeType type, Types::EAnalysisType analysisType)
auto * m
Definition: textangle.C:8
std::vector< TMVA::VariableTransformBase * > fDefaultTrfs
ROOT output file.
Definition: Factory.h:201
virtual void LabelsOption(Option_t *option="h", Option_t *axis="X")
Set option(s) to draw axis with labels.
Definition: TH1.cxx:5059
void EvaluateAllVariables(DataLoader *loader, TString options="")
Iterates over all MVA input variables and evaluates them.
Definition: Factory.cxx:1237
Bool_t fROC
enable to calculate corelations
Definition: Factory.h:208
Double_t GetROCIntegral(const UInt_t points=41)
Calculates the ROC integral (AUC)
Definition: ROCCurve.cxx:251
float Float_t
Definition: RtypesCore.h:53
virtual void SetDirectory(TDirectory *dir)
By default when an histogram is created, it is added to the list of histogram objects in the current ...
Definition: TH1.cxx:8194
void ROOTVersionMessage(MsgLogger &logger)
prints the ROOT release number and date
Definition: Tools.cxx:1336
TString & ReplaceAll(const TString &s1, const TString &s2)
Definition: TString.h:638
R__EXTERN TStyle * gStyle
Definition: TStyle.h:402
static Types & Instance()
the the single instance of "Types" if existing already, or create it (Singleton)
Definition: Types.cxx:70
virtual void WriteEvaluationHistosToFile(Types::ETreeType treetype)
writes all MVA evaluation histograms to file
TH1F * EvaluateImportanceShort(DataLoader *loader, Types::EMVA theMethod, TString methodTitle, const char *theOption="")
Definition: Factory.cxx:2182
virtual std::map< TString, Double_t > OptimizeTuningParameters(TString fomType="ROCIntegral", TString fitType="FitGA")
call the Optimizer with the set of parameters and ranges that are meant to be tuned.
Definition: MethodBase.cxx:628
THist< 1, float, THistStatContent, THistStatUncertainty > TH1F
Definition: THist.hxx:285
Config & gConfig()
TH1 * h
Definition: legend2.C:5
MsgLogger & Log() const
Definition: Configurable.h:122
std::vector< TString > GetListOfVariables() const
returns list of variables
OptionBase * DeclareOptionRef(T &ref, const TString &name, const TString &desc="")
A ROOT file is a suite of consecutive data records (TKey instances) with a well defined format...
Definition: TFile.h:46
Bool_t Verbose(void) const
Definition: Factory.h:133
EAnalysisType
Definition: Types.h:125
A TMultiGraph is a collection of TGraph (or derived) objects.
Definition: TMultiGraph.h:35
virtual int MakeDirectory(const char *name)
Make a directory.
Definition: TSystem.cxx:825
DataSetInfo & DefaultDataSetInfo()
default creation
Definition: DataLoader.cxx:530
virtual void MakeClass(const TString &classFileName=TString("")) const =0
TAxis * GetYaxis() const
Get y axis of the graph.
Definition: TGraph.cxx:1602
Virtual base Class for all MVA method.
Definition: MethodBase.h:109
overwrite existing object with same name
Definition: TObject.h:88
#define gROOT
Definition: TROOT.h:402
TString fTransformations
option string given by construction (presently only "V")
Definition: Factory.h:205
Basic string class.
Definition: TString.h:125
1-D histogram with a float per channel (see TH1 documentation)}
Definition: TH1.h:567
Ranking for variables in method (implementation)
Definition: Ranking.h:48
void ToLower()
Change string to lower-case.
Definition: TString.cxx:1099
int Int_t
Definition: RtypesCore.h:41
virtual TDirectory * mkdir(const char *name, const char *title="")
Create a sub-directory "a" or a hierarchy of sub-directories "a/b/c/...".
bool Bool_t
Definition: RtypesCore.h:59
void TrainAllMethods()
Iterates through all booked methods and calls training.
Definition: Factory.cxx:1015
virtual void SetTitle(const char *title="")
Set graph title.
Definition: TGraph.cxx:2208
UInt_t GetNClasses() const
Definition: DataSetInfo.h:136
void DataLoaderCopy(TMVA::DataLoader *des, TMVA::DataLoader *src)
Bool_t IsNaN(Double_t x)
Definition: TMath.h:777
TMultiGraph * GetROCCurveAsMultiGraph(DataLoader *loader, UInt_t iClass)
Generate a collection of graphs, for all methods for a given class.
Definition: Factory.cxx:894
void WriteDataInformation(DataSetInfo &fDataSetInfo)
Definition: Factory.cxx:518
const std::vector< Event * > & GetEventCollection(Types::ETreeType type=Types::kMaxTreeType) const
Definition: DataSet.h:225
const TString & GetLabel() const
Definition: VariableInfo.h:59
void SetSilentFile(Bool_t status)
Definition: MethodBase.h:367
virtual Bool_t HasAnalysisType(Types::EAnalysisType type, UInt_t numberClasses, UInt_t numberTargets)=0
void CenterTitle(Bool_t center=kTRUE)
Center axis title.
Definition: TAxis.h:184
virtual Double_t GetROCIntegral(TH1D *histS, TH1D *histB) const
calculate the area (integral) under the ROC curve as a overall quality measure of the classification ...
MsgLogger * fLogger
Definition: Configurable.h:128
void AddVariable(const TString &expression, const TString &title, const TString &unit, char type='F', Double_t min=0, Double_t max=0)
user inserts discriminating variable in data set info
Definition: DataLoader.cxx:491
TH1F * EvaluateImportanceRandom(DataLoader *loader, UInt_t nseeds, Types::EMVA theMethod, TString methodTitle, const char *theOption="")
Definition: Factory.cxx:2287
static void AddDirectory(Bool_t add=kTRUE)
Sets the flag controlling the automatic add of histograms in memory.
Definition: TH1.cxx:1225
virtual void SetBarWidth(Float_t width=0.5)
Definition: TH1.h:353
TGraph * GetROCCurve(DataLoader *loader, TString theMethodName, Bool_t setTitles=kTRUE, UInt_t iClass=0)
Argument iClass specifies the class to generate the ROC curve in a multiclass setting.
Definition: Factory.cxx:825
virtual void SetRangeUser(Double_t ufirst, Double_t ulast)
Set the viewing range for the axis from ufirst to ulast (in user coordinates).
Definition: TAxis.cxx:928
static void InhibitOutput()
Definition: MsgLogger.cxx:74
static void SetIsTraining(Bool_t)
when this static function is called, it sets the flag whether events with negative event weight shoul...
Definition: Event.cxx:392
#define READXML
Definition: Factory.cxx:104
Double_t x[n]
Definition: legend1.C:17
DataSetInfo & fDataSetInfo
Definition: MethodBase.h:594
TAxis * GetXaxis()
Get x axis of the graph.
TH2 * CreateCorrelationMatrixHist(const TMatrixD *m, const TString &hName, const TString &hTitle) const
TH1F * EvaluateImportance(DataLoader *loader, VIType vitype, Types::EMVA theMethod, TString methodTitle, const char *theOption="")
Evaluate Variable Importance.
Definition: Factory.cxx:2063
Bool_t fModelPersistence
the training type
Definition: Factory.h:214
DataSet * Data() const
Definition: MethodBase.h:398
TTree * GetTree(Types::ETreeType type)
create the test/trainings tree with all the variables, the weights, the classes, the targets...
Definition: DataSet.cxx:580
TString fWeightFileDir
Definition: Config.h:107
void ReadStateFromFile()
Function to write options and weights to file.
double pow(double, double)
void PrintHelpMessage() const
prints out method-specific help method
IONames & GetIONames()
Definition: Config.h:85
virtual void ParseOptions()
options parser
void SetupMethod()
setup of methods
Definition: MethodBase.cxx:411
DataSetInfo & DataInfo() const
Definition: MethodBase.h:399
Bool_t DoRegression() const
Definition: MethodBase.h:427
void SetMinType(EMsgType minType)
Definition: MsgLogger.h:72
void SetDrawProgressBar(Bool_t d)
Definition: Config.h:74
TH1F * EvaluateImportanceAll(DataLoader *loader, Types::EMVA theMethod, TString methodTitle, const char *theOption="")
Definition: Factory.cxx:2086
Bool_t IsModelPersistence()
Definition: Factory.cxx:289
Class that contains all the data information.
Definition: DataSetInfo.h:60
std::map< TString, MVector * > fMethodsMap
Definition: Factory.h:85
TH1F * GetHistogram()
Returns a pointer to the histogram used to draw the axis.
virtual void Draw(Option_t *chopt="")
Draw this multigraph with its current attributes.
Bool_t fSilentFile
enable to calculate ROC values
Definition: Factory.h:209
Long64_t GetNTrainingEvents() const
Definition: DataSet.h:79
virtual Double_t GetEfficiency(const TString &, Types::ETreeType, Double_t &err)
fill background efficiency (resp.
void CreateVariableTransforms(const TString &trafoDefinition, TMVA::DataSetInfo &dataInfo, TMVA::TransformationHandler &transformationHandler, TMVA::MsgLogger &log)
Class for boosting a TMVA method.
Definition: MethodBoost.h:58
TMatrixT< Double_t > TMatrixD
Definition: TMatrixDfwd.h:22
Bool_t DoMulticlass() const
Definition: MethodBase.h:428
const Int_t MinNoTrainingEvents
Definition: Factory.cxx:99
Class that contains all the data information.
Definition: DataSet.h:69
virtual ~Factory()
Destructor.
Definition: Factory.cxx:297
virtual void SetLineColor(Color_t lcolor)
Set the line color.
Definition: TAttLine.h:40
std::map< TString, Double_t > OptimizeAllMethods(TString fomType="ROCIntegral", TString fitType="FitGA")
Iterates through all booked methods and sees if they use parameter tuning and if so.
Definition: Factory.cxx:616
TDirectory * RootBaseDir()
Definition: Factory.h:148
virtual Int_t Write(const char *name=0, Int_t option=0, Int_t bufsize=0)
Write this object to the current directory.
Definition: TTree.cxx:9212
UInt_t GetNTargets() const
Definition: DataSetInfo.h:111
Results * GetResults(const TString &, Types::ETreeType type, Types::EAnalysisType analysistype)
Definition: DataSet.cxx:265
std::string GetMethodName(TCppMethod_t)
Definition: Cppyy.cxx:733
Double_t GetROCIntegral(DataLoader *loader, TString theMethodName, UInt_t iClass=0)
Calculate the integral of the ROC curve, also known as the area under curve (AUC), for a given method.
Definition: Factory.cxx:764
DataSetManager * GetDataSetManager()
Definition: DataSetInfo.h:175
Service class for 2-Dim histogram classes.
Definition: TH2.h:30
R__EXTERN TSystem * gSystem
Definition: TSystem.h:540
virtual void AddRow(const Double_t *x)
Add a data point and update the covariance matrix.
Definition: TPrincipal.cxx:410
SVector< double, 2 > v
Definition: Dict.h:5
void TMVAWelcomeMessage()
direct output, eg, when starting ROOT session -> no use of Logger here
Definition: Tools.cxx:1313
const char * GetName() const
Definition: MethodBase.h:323
Long64_t GetNEvtSigTest()
return number of signal test events in dataset
Definition: DataSet.cxx:398
ClassInfo * GetClassInfo(Int_t clNum) const
Bool_t HasMethod(const TString &datasetname, const TString &title) const
Checks whether a given method name is defined for a given dataset.
Definition: Factory.cxx:501
virtual void SetFillColor(Color_t fcolor)
Set the fill area color.
Definition: TAttFill.h:37
const TMatrixD * CorrelationMatrix(const TString &className) const
Bool_t fCorrelations
verbose mode
Definition: Factory.h:207
void EvaluateAllMethods(void)
Iterates over all MVAs that have been booked, and calls their evaluation methods. ...
Definition: Factory.cxx:1252
class TMVA::Config::VariablePlotting fVariablePlotting
Bool_t BeginsWith(const char *s, ECaseCompare cmp=kExact) const
Definition: TString.h:561
virtual void SetBinContent(Int_t bin, Double_t content)
Set bin content see convention for numbering bins in TH1::GetBin In case the bin number is greater th...
Definition: TH1.cxx:8477
void TestAllMethods()
Definition: Factory.cxx:1150
unsigned int UInt_t
Definition: RtypesCore.h:42
char * Form(const char *fmt,...)
DataSetManager * fDataSetManager
const TMatrixD * GetCovarianceMatrix() const
Definition: TPrincipal.h:58
TAxis * GetYaxis()
Get y axis of the graph.
const TString & GetMethodName() const
Definition: MethodBase.h:320
static Int_t GetColor(const char *hexcolor)
Static method returning color number for color specified by hex color string of form: "#rrggbb"...
Definition: TColor.cxx:1751
TAxis * GetYaxis()
Definition: TH1.h:316
Class that contains all the data information.
void Greetings()
Print welcome message.
Definition: Factory.cxx:273
This is the main MVA steering class.
Definition: Factory.h:81
Tools & gTools()
virtual void MakePrincipals()
Perform the principal components analysis.
Definition: TPrincipal.cxx:869
void SetBoostedMethodName(TString methodName)
Definition: MethodBoost.h:86
void SetVerbose(Bool_t v=kTRUE)
Definition: Factory.cxx:335
TFile * fgTargetFile
Definition: Factory.h:198
virtual Double_t GetSignificance() const
compute significance of mean difference
Definition: graph.py:1
Long64_t GetNEvtBkgdTest()
return number of background test events in dataset
Definition: DataSet.cxx:406
TString GetWeightFileName() const
retrieve weight file name
virtual void SetTitleSize(Float_t size=0.04)
Set size of axis title The size is expressed in per cent of the pad width.
Definition: TAttAxis.cxx:304
TAxis * GetXaxis() const
Get x axis of the graph.
Definition: TGraph.cxx:1592
const Bool_t kFALSE
Definition: RtypesCore.h:88
Float_t GetValue(UInt_t ivar) const
return value of i&#39;th variable
Definition: Event.cxx:237
Class for categorizing the phase space.
virtual void Print() const
get maximum length of variable names
Definition: Ranking.cxx:111
The Canvas class.
Definition: TCanvas.h:31
void PrepareTrainingAndTestTree(const TCut &cut, const TString &splitOpt)
prepare the training and test trees -> same cuts for signal and background
Definition: DataLoader.cxx:629
virtual void CheckSetup()
check may be overridden by derived class (sometimes, eg, fitters are used which can only be implement...
Definition: MethodBase.cxx:438
#define ClassImp(name)
Definition: Rtypes.h:359
ROCCurve * GetROC(DataLoader *loader, TString theMethodName, UInt_t iClass=0, Types::ETreeType type=Types::kTesting)
Private method to generate a ROCCurve instance for a given method.
Definition: Factory.cxx:665
double Double_t
Definition: RtypesCore.h:55
virtual void PrintHelpMessage() const =0
virtual Double_t GetSeparation(TH1 *, TH1 *) const
compute "separation" defined as
const TMatrixD * GetCorrelationMatrix(const TMatrixD *covMat)
turns covariance into correlation matrix
Definition: Tools.cxx:336
int type
Definition: TGX11.cxx:120
Class which takes the results of a multiclass classification.
void SetFile(TFile *file)
Definition: MethodBase.h:364
Double_t y[n]
Definition: legend1.C:17
void SetCurrentType(Types::ETreeType type) const
Definition: DataSet.h:100
Bool_t Contains(const char *pat, ECaseCompare cmp=kExact) const
Definition: TString.h:570
UInt_t GetEntries(const TString &name) const
static constexpr double s
void UsefulSortAscending(std::vector< std::vector< Double_t > > &, std::vector< TString > *vs=0)
sort 2D vector (AND in parallel a TString vector) in such a way that the "first vector is sorted" and...
Definition: Tools.cxx:549
VariableInfo & GetVariableInfo(Int_t i)
Definition: DataSetInfo.h:96
virtual void SetBinLabel(Int_t bin, const char *label)
Set label for bin.
Definition: TAxis.cxx:809
void AddPreDefVal(const T &)
Definition: Configurable.h:168
void TMVAVersionMessage(MsgLogger &logger)
prints the TMVA release number and date
Definition: Tools.cxx:1327
virtual const char * GetName() const
Returns name of object.
Definition: DataSetInfo.h:67
void ProcessSetup()
process all options the "CheckForUnusedOptions" is done in an independent call, since it may be overr...
Definition: MethodBase.cxx:428
void PrintVariableRanking() const
prints ranking of input variables
ostringstream derivative to redirect and format output
Definition: MsgLogger.h:59
const TString & GetOptions() const
Definition: Configurable.h:84
virtual TObject * Clone(const char *newname="") const
Make a clone of an object using the Streamer facility.
Definition: TNamed.cxx:74
void FormattedOutput(const std::vector< Double_t > &, const std::vector< TString > &, const TString titleVars, const TString titleValues, MsgLogger &logger, TString format="%+1.3f")
formatted output of simple table
Definition: Tools.cxx:898
const TString & Color(const TString &)
human readable color strings
Definition: Tools.cxx:839
void SetUseColor(Bool_t uc)
Definition: Config.h:65
void SetConfigName(const char *n)
Definition: Configurable.h:63
Interface for all concrete MVA method implementations.
Definition: IMethod.h:54
void SetSource(const std::string &source)
Definition: MsgLogger.h:70
#define VIBITS
Definition: Factory.cxx:107
void SetTitleXOffset(Float_t offset=1)
Definition: TStyle.h:382
void PrintHelpMessage(const TString &datasetname, const TString &methodTitle="") const
Print predefined help message of classifier.
Definition: Factory.cxx:1210
TGraph * GetROCCurve(const UInt_t points=100)
Returns a new TGraph containing the ROC curve.
Definition: ROCCurve.cxx:277
TList * GetListOfGraphs() const
Definition: TMultiGraph.h:69
virtual void TestRegression(Double_t &bias, Double_t &biasT, Double_t &dev, Double_t &devT, Double_t &rms, Double_t &rmsT, Double_t &mInf, Double_t &mInfT, Double_t &corr, Types::ETreeType type)
calculate <sum-of-deviation-squared> of regression output versus "true" value from test sample ...
Definition: MethodBase.cxx:969
DataSetManager * fDataSetManager
Definition: MethodBoost.h:193
virtual Bool_t cd(const char *path=0)
Change current directory to "this" directory.
Definition: TDirectory.cxx:497
const TString & GetSplitOptions() const
Definition: DataSetInfo.h:167
Factory(TString theJobName, TFile *theTargetFile, TString theOption="")
Standard constructor.
Definition: Factory.cxx:119
DataInputHandler & DataInput()
TString GetMethodTypeName() const
Definition: MethodBase.h:321
Class that is the base-class for a vector of result.
Definition: Results.h:57
void SetSilent(Bool_t s)
Definition: Config.h:68
const TCut & GetCut(Int_t i) const
Definition: DataSetInfo.h:149
void SetWeightFileDir(TString fileDir)
set directory of weight file
TString fJobName
used in contructor wihtout file
Definition: Factory.h:211
Double_t GetSignalReferenceCut() const
Definition: MethodBase.h:349
A Graph is a graphics object made of two arrays X and Y with npoints each.
Definition: TGraph.h:41
void DeleteAllMethods(void)
Delete methods.
Definition: Factory.cxx:315
void SetOptStat(Int_t stat=1)
The type of information printed in the histogram statistics box can be selected via the parameter mod...
Definition: TStyle.cxx:1266
std::vector< TString > SplitString(const TString &theOpt, const char separator) const
splits the option string at &#39;separator&#39; and fills the list &#39;splitV&#39; with the primitive strings ...
Definition: Tools.cxx:1210
Long64_t GetNEvents(Types::ETreeType type=Types::kMaxTreeType) const
Definition: DataSet.h:215
virtual Double_t GetTrainingEfficiency(const TString &)
Bool_t IsSignal(const Event *ev) const
Bool_t IsSilentFile()
Definition: Factory.cxx:282
Types::EAnalysisType GetAnalysisType() const
Definition: MethodBase.h:426
Types::EAnalysisType fAnalysisType
jobname, used as extension in weight file names
Definition: Factory.h:213
virtual void SetTitle(const char *title)
See GetStatOverflows for more information.
Definition: TH1.cxx:6154
void TMVACitation(MsgLogger &logger, ECitation citType=kPlainText)
kinds of TMVA citation
Definition: Tools.cxx:1452
std::vector< IMethod * > MVector
Definition: Factory.h:84
virtual const char * GetName() const
Returns name of object.
Definition: Factory.h:96
virtual void Add(TGraph *graph, Option_t *chopt="")
Add a new graph to the list of graphs.
static void EnableOutput()
Definition: MsgLogger.cxx:75
virtual void MakeClass(const TString &datasetname, const TString &methodTitle="") const
Definition: Factory.cxx:1182
const TString & GetTestvarName() const
Definition: MethodBase.h:324
virtual const char * GetName() const
Returns name of object.
Definition: TObject.cxx:357
IMethod * GetMethod(const TString &datasetname, const TString &title) const
Returns pointer to MVA that corresponds to given method title.
Definition: Factory.cxx:483
virtual TMatrixD GetMulticlassConfusionMatrix(Double_t effB, Types::ETreeType type)
Construct a confusion matrix for a multiclass classifier.
virtual void SetTitle(const char *title="")
Set the title of the TNamed.
Definition: TNamed.cxx:164
void SetTestvarName(const TString &v="")
Definition: MethodBase.h:330
const Bool_t kTRUE
Definition: RtypesCore.h:87
DataSet * GetDataSet() const
returns data set
Types::EMVA GetMethodType() const
Definition: MethodBase.h:322
void CheckForUnusedOptions() const
checks for unused options in option string
const Int_t n
Definition: legend1.C:16
virtual void TestClassification()
initialization
const Event * GetEvent() const
Definition: DataSet.cxx:202
virtual void SetAnalysisType(Types::EAnalysisType type)
Definition: MethodBase.h:425
char name[80]
Definition: TGX11.cxx:109
double log(double)
Class that is the base-class for a vector of result.
TAxis * GetXaxis()
Get the behaviour adopted by the object about the statoverflows. See EStatOverflows for more informat...
Definition: TH1.h:315
void SetConfigDescription(const char *d)
Definition: Configurable.h:64
Bool_t fVerbose
List of transformations to test.
Definition: Factory.h:206
const char * Data() const
Definition: TString.h:345