Logo ROOT   6.07/09
Reference Guide
Factory.cxx
Go to the documentation of this file.
1 // @(#)Root/tmva $Id$
2 // Author: Andreas Hoecker, Peter Speckmayer, Joerg Stelzer, Helge Voss, Kai Voss, Eckhard von Toerne, Jan Therhaag
3 // Updated by: Omar Zapata
4 /**********************************************************************************
5  * Project: TMVA - a Root-integrated toolkit for multivariate data analysis *
6  * Package: TMVA *
7  * Class : Factory *
8  * Web : http://tmva.sourceforge.net *
9  * *
10  * Description: *
11  * Implementation (see header for description) *
12  * *
13  * Authors : *
14  * Andreas Hoecker <Andreas.Hocker@cern.ch> - CERN, Switzerland *
15  * Joerg Stelzer <stelzer@cern.ch> - DESY, Germany *
16  * Peter Speckmayer <peter.speckmayer@cern.ch> - CERN, Switzerland *
17  * Jan Therhaag <Jan.Therhaag@cern.ch> - U of Bonn, Germany *
18  * Eckhard v. Toerne <evt@uni-bonn.de> - U of Bonn, Germany *
19  * Helge Voss <Helge.Voss@cern.ch> - MPI-K Heidelberg, Germany *
20  * Kai Voss <Kai.Voss@cern.ch> - U. of Victoria, Canada *
21  * Omar Zapata <Omar.Zapata@cern.ch> - UdeA/ITM Colombia *
22  * Lorenzo Moneta <Lorenzo.Moneta@cern.ch> - CERN, Switzerland *
23  * Sergei Gleyzer <Sergei.Gleyzer@cern.ch> - U of Florida & CERN *
24  * *
25  * Copyright (c) 2005-2015: *
26  * CERN, Switzerland *
27  * U. of Victoria, Canada *
28  * MPI-K Heidelberg, Germany *
29  * U. of Bonn, Germany *
30  * UdeA/ITM, Colombia *
31  * U. of Florida, USA *
32  * *
33  * Redistribution and use in source and binary forms, with or without *
34  * modification, are permitted according to the terms listed in LICENSE *
35  * (http://tmva.sourceforge.net/LICENSE) *
36  **********************************************************************************/
37 
38 //_______________________________________________________________________
39 //
40 // This is the main MVA steering class: it creates all MVA methods,
41 // and guides them through the training, testing and evaluation
42 // phases
43 //_______________________________________________________________________
44 
45 
46 #include "TMVA/Factory.h"
47 
48 #include "TMVA/ClassifierFactory.h"
49 #include "TMVA/Config.h"
50 #include "TMVA/Configurable.h"
51 #include "TMVA/Tools.h"
52 #include "TMVA/Ranking.h"
53 #include "TMVA/DataSet.h"
54 #include "TMVA/IMethod.h"
55 #include "TMVA/MethodBase.h"
56 #include "TMVA/DataInputHandler.h"
57 #include "TMVA/DataSetManager.h"
58 #include "TMVA/DataSetInfo.h"
59 #include "TMVA/DataLoader.h"
60 #include "TMVA/MethodBoost.h"
61 #include "TMVA/MethodCategory.h"
62 #include "TMVA/ROCCalc.h"
63 #include "TMVA/ROCCurve.h"
64 #include "TMVA/MsgLogger.h"
65 
66 #include "TMVA/VariableInfo.h"
67 #include "TMVA/VariableTransform.h"
68 
69 #include "TMVA/Results.h"
71 #include "TMVA/ResultsRegression.h"
72 #include "TMVA/ResultsMulticlass.h"
73 #include <list>
74 #include <bitset>
75 
76 #include "TMVA/Types.h"
77 
78 #include "TROOT.h"
79 #include "TFile.h"
80 #include "TTree.h"
81 #include "TLeaf.h"
82 #include "TEventList.h"
83 #include "TH2.h"
84 #include "TText.h"
85 #include "TLegend.h"
86 #include "TGraph.h"
87 #include "TStyle.h"
88 #include "TMatrixF.h"
89 #include "TMatrixDSym.h"
90 #include "TPaletteAxis.h"
91 #include "TPrincipal.h"
92 #include "TMath.h"
93 #include "TObjString.h"
94 #include "TSystem.h"
95 #include "TCanvas.h"
96 
98 //const Int_t MinNoTestEvents = 1;
99 
101 
102 #define READXML kTRUE
103 
104 //number of bits for bitset
105 #define VIBITS 32
106 
107 
108 
109 ////////////////////////////////////////////////////////////////////////////////
110 /// standard constructor
111 /// jobname : this name will appear in all weight file names produced by the MVAs
112 /// theTargetFile : output ROOT file; the test tree and all evaluation plots
113 /// will be stored here
114 /// theOption : option string; currently: "V" for verbose
115 
116 TMVA::Factory::Factory( TString jobName, TFile* theTargetFile, TString theOption )
117 : Configurable ( theOption ),
118  fTransformations ( "I" ),
119  fVerbose ( kFALSE ),
120  fCorrelations ( kFALSE ),
121  fROC ( kTRUE ),
122  fSilentFile ( kFALSE ),
123  fJobName ( jobName ),
124  fAnalysisType ( Types::kClassification ),
125  fModelPersistence (kTRUE)
126 {
127  fgTargetFile = theTargetFile;
129 
130  // render silent
131  if (gTools().CheckForSilentOption( GetOptions() )) Log().InhibitOutput(); // make sure is silent if wanted to
132 
133 
134  // init configurable
135  SetConfigDescription( "Configuration options for Factory running" );
136  SetConfigName( GetName() );
137 
138  // histograms are not automatically associated with the current
139  // directory and hence don't go out of scope when closing the file
140  // TH1::AddDirectory(kFALSE);
141  Bool_t silent = kFALSE;
142 #ifdef WIN32
143  // under Windows, switch progress bar and color off by default, as the typical windows shell doesn't handle these (would need different sequences..)
144  Bool_t color = kFALSE;
145  Bool_t drawProgressBar = kFALSE;
146 #else
147  Bool_t color = !gROOT->IsBatch();
148  Bool_t drawProgressBar = kTRUE;
149 #endif
150  DeclareOptionRef( fVerbose, "V", "Verbose flag" );
151  DeclareOptionRef( color, "Color", "Flag for coloured screen output (default: True, if in batch mode: False)" );
152  DeclareOptionRef( fTransformations, "Transformations", "List of transformations to test; formatting example: \"Transformations=I;D;P;U;G,D\", for identity, decorrelation, PCA, Uniform and Gaussianisation followed by decorrelation transformations" );
153  DeclareOptionRef( fCorrelations, "Correlations", "boolean to show correlation in output" );
154  DeclareOptionRef( fROC, "ROC", "boolean to show ROC in output" );
155  DeclareOptionRef( silent, "Silent", "Batch mode: boolean silent flag inhibiting any output from TMVA after the creation of the factory class object (default: False)" );
156  DeclareOptionRef( drawProgressBar,
157  "DrawProgressBar", "Draw progress bar to display training, testing and evaluation schedule (default: True)" );
159  "ModelPersistence",
160  "Option to save the trained model in xml file or using serialization");
161 
162  TString analysisType("Auto");
163  DeclareOptionRef( analysisType,
164  "AnalysisType", "Set the analysis type (Classification, Regression, Multiclass, Auto) (default: Auto)" );
165  AddPreDefVal(TString("Classification"));
166  AddPreDefVal(TString("Regression"));
167  AddPreDefVal(TString("Multiclass"));
168  AddPreDefVal(TString("Auto"));
169 
170  ParseOptions();
172 
173  if (Verbose()) Log().SetMinType( kVERBOSE );
174 
175  // global settings
176  gConfig().SetUseColor( color );
177  gConfig().SetSilent( silent );
178  gConfig().SetDrawProgressBar( drawProgressBar );
179 
180  analysisType.ToLower();
181  if ( analysisType == "classification" ) fAnalysisType = Types::kClassification;
182  else if( analysisType == "regression" ) fAnalysisType = Types::kRegression;
183  else if( analysisType == "multiclass" ) fAnalysisType = Types::kMulticlass;
184  else if( analysisType == "auto" ) fAnalysisType = Types::kNoAnalysisType;
185 
186 // Greetings();
187 }
188 
189 
190 
192 : Configurable ( theOption ),
193  fTransformations ( "I" ),
194  fVerbose ( kFALSE ),
195  fCorrelations ( kFALSE ),
196  fROC ( kTRUE ),
197  fSilentFile ( kTRUE ),
198  fJobName ( jobName ),
199  fAnalysisType ( Types::kClassification ),
201 {
202  fgTargetFile = 0;
204 
205 
206  // render silent
207  if (gTools().CheckForSilentOption( GetOptions() )) Log().InhibitOutput(); // make sure is silent if wanted to
208 
209 
210  // init configurable
211  SetConfigDescription( "Configuration options for Factory running" );
212  SetConfigName( GetName() );
213 
214  // histograms are not automatically associated with the current
215  // directory and hence don't go out of scope when closing the file
217  Bool_t silent = kFALSE;
218 #ifdef WIN32
219  // under Windows, switch progress bar and color off by default, as the typical windows shell doesn't handle these (would need different sequences..)
220  Bool_t color = kFALSE;
221  Bool_t drawProgressBar = kFALSE;
222 #else
223  Bool_t color = !gROOT->IsBatch();
224  Bool_t drawProgressBar = kTRUE;
225 #endif
226  DeclareOptionRef( fVerbose, "V", "Verbose flag" );
227  DeclareOptionRef( color, "Color", "Flag for coloured screen output (default: True, if in batch mode: False)" );
228  DeclareOptionRef( fTransformations, "Transformations", "List of transformations to test; formatting example: \"Transformations=I;D;P;U;G,D\", for identity, decorrelation, PCA, Uniform and Gaussianisation followed by decorrelation transformations" );
229  DeclareOptionRef( fCorrelations, "Correlations", "boolean to show correlation in output" );
230  DeclareOptionRef( fROC, "ROC", "boolean to show ROC in output" );
231  DeclareOptionRef( silent, "Silent", "Batch mode: boolean silent flag inhibiting any output from TMVA after the creation of the factory class object (default: False)" );
232  DeclareOptionRef( drawProgressBar,
233  "DrawProgressBar", "Draw progress bar to display training, testing and evaluation schedule (default: True)" );
235  "ModelPersistence",
236  "Option to save the trained model in xml file or using serialization");
237 
238  TString analysisType("Auto");
239  DeclareOptionRef( analysisType,
240  "AnalysisType", "Set the analysis type (Classification, Regression, Multiclass, Auto) (default: Auto)" );
241  AddPreDefVal(TString("Classification"));
242  AddPreDefVal(TString("Regression"));
243  AddPreDefVal(TString("Multiclass"));
244  AddPreDefVal(TString("Auto"));
245 
246  ParseOptions();
248 
249  if (Verbose()) Log().SetMinType( kVERBOSE );
250 
251  // global settings
252  gConfig().SetUseColor( color );
253  gConfig().SetSilent( silent );
254  gConfig().SetDrawProgressBar( drawProgressBar );
255 
256  analysisType.ToLower();
257  if ( analysisType == "classification" ) fAnalysisType = Types::kClassification;
258  else if( analysisType == "regression" ) fAnalysisType = Types::kRegression;
259  else if( analysisType == "multiclass" ) fAnalysisType = Types::kMulticlass;
260  else if( analysisType == "auto" ) fAnalysisType = Types::kNoAnalysisType;
261 
262  Greetings();
263 }
264 
265 
266 ////////////////////////////////////////////////////////////////////////////////
267 /// print welcome message
268 /// options are: kLogoWelcomeMsg, kIsometricWelcomeMsg, kLeanWelcomeMsg
269 
271 {
273  gTools().TMVAWelcomeMessage( Log(), gTools().kLogoWelcomeMsg );
274  gTools().TMVAVersionMessage( Log() ); Log() << Endl;
275 }
276 
277 //_______________________________________________________________________
279 {
280  return fSilentFile;
281 }
282 
283 //_______________________________________________________________________
285 {
286  return fModelPersistence;
287 }
288 
289 ////////////////////////////////////////////////////////////////////////////////
290 /// destructor
291 /// delete fATreeEvent;
292 
294 {
295  std::vector<TMVA::VariableTransformBase*>::iterator trfIt = fDefaultTrfs.begin();
296  for (;trfIt != fDefaultTrfs.end(); trfIt++) delete (*trfIt);
297 
298  this->DeleteAllMethods();
299 
300 
301  // problem with call of REGISTER_METHOD macro ...
302  // ClassifierFactory::DestroyInstance();
303  // Types::DestroyInstance();
306 }
307 
308 ////////////////////////////////////////////////////////////////////////////////
309 /// delete methods
310 
312 {
313  std::map<TString,MVector*>::iterator itrMap;
314 
315  for(itrMap = fMethodsMap.begin();itrMap != fMethodsMap.end();itrMap++)
316  {
317  MVector *methods=itrMap->second;
318  // delete methods
319  MVector::iterator itrMethod = methods->begin();
320  for (; itrMethod != methods->end(); itrMethod++) {
321  Log() << kDEBUG << "Delete method: " << (*itrMethod)->GetName() << Endl;
322  delete (*itrMethod);
323  }
324  methods->clear();
325  delete methods;
326  }
327 }
328 
329 ////////////////////////////////////////////////////////////////////////////////
330 
332 {
333  fVerbose = v;
334 }
335 
336 //_______________________________________________________________________
337 TMVA::MethodBase* TMVA::Factory::BookMethod( TMVA::DataLoader *loader, TString theMethodName, TString methodTitle, TString theOption )
338 {
339  // Book a classifier or regression method
340  if(fModelPersistence) gSystem->MakeDirectory(loader->GetName());//creating directory for DataLoader output
341 
342  TString datasetname=loader->GetName();
343 
345  if( loader->DefaultDataSetInfo().GetNClasses()==2
346  && loader->DefaultDataSetInfo().GetClassInfo("Signal") != NULL
347  && loader->DefaultDataSetInfo().GetClassInfo("Background") != NULL
348  ){
349  fAnalysisType = Types::kClassification; // default is classification
350  } else if( loader->DefaultDataSetInfo().GetNClasses() >= 2 ){
351  fAnalysisType = Types::kMulticlass; // if two classes, but not named "Signal" and "Background"
352  } else
353  Log() << kFATAL << "No analysis type for " << loader->DefaultDataSetInfo().GetNClasses() << " classes and "
354  << loader->DefaultDataSetInfo().GetNTargets() << " regression targets." << Endl;
355  }
356 
357  // booking via name; the names are translated into enums and the
358  // corresponding overloaded BookMethod is called
359 
360  if(fMethodsMap.find(datasetname)!=fMethodsMap.end())
361  {
362  if (GetMethod( datasetname,methodTitle ) != 0) {
363  Log() << kFATAL << "Booking failed since method with title <"
364  << methodTitle <<"> already exists "<< "in with DataSet Name <"<< loader->GetName()<<"> "
365  << Endl;
366  }
367  }
368 
369 
370  Log() << kHEADER << "Booking method: " << gTools().Color("bold") << methodTitle
371  // << gTools().Color("reset")<<" DataSet Name: "<<gTools().Color("bold")<<loader->GetName()
372  << gTools().Color("reset") << Endl << Endl;
373 
374  // interpret option string with respect to a request for boosting (i.e., BostNum > 0)
375  Int_t boostNum = 0;
376  TMVA::Configurable* conf = new TMVA::Configurable( theOption );
377  conf->DeclareOptionRef( boostNum = 0, "Boost_num",
378  "Number of times the classifier will be boosted" );
379  conf->ParseOptions();
380  delete conf;
381  TString fFileDir;
383  {
384  fFileDir=loader->GetName();
385  fFileDir+="/"+gConfig().GetIONames().fWeightFileDir;
386  }
387  // initialize methods
388  IMethod* im;
389  if (!boostNum) {
390  im = ClassifierFactory::Instance().Create( std::string(theMethodName),
391  fJobName,
392  methodTitle,
393  loader->DefaultDataSetInfo(),
394  theOption );
395  }
396  else {
397  // boosted classifier, requires a specific definition, making it transparent for the user
398  Log() << kDEBUG <<"Boost Number is " << boostNum << " > 0: train boosted classifier" << Endl;
399  im = ClassifierFactory::Instance().Create( std::string("Boost"),
400  fJobName,
401  methodTitle,
402  loader->DefaultDataSetInfo(),
403  theOption );
404  MethodBoost* methBoost = dynamic_cast<MethodBoost*>(im); // DSMTEST divided into two lines
405  if (!methBoost) // DSMTEST
406  Log() << kFATAL << "Method with type kBoost cannot be casted to MethodCategory. /Factory" << Endl; // DSMTEST
407 
408  if(fModelPersistence) methBoost->SetWeightFileDir(fFileDir);
410  methBoost->SetBoostedMethodName( theMethodName ); // DSMTEST divided into two lines
411  methBoost->fDataSetManager = loader->fDataSetManager; // DSMTEST
412  methBoost->SetFile(fgTargetFile);
413  methBoost->SetSilentFile(IsSilentFile());
414  }
415 
416  MethodBase *method = dynamic_cast<MethodBase*>(im);
417  if (method==0) return 0; // could not create method
418 
419  // set fDataSetManager if MethodCategory (to enable Category to create datasetinfo objects) // DSMTEST
420  if (method->GetMethodType() == Types::kCategory) { // DSMTEST
421  MethodCategory *methCat = (dynamic_cast<MethodCategory*>(im)); // DSMTEST
422  if (!methCat) // DSMTEST
423  Log() << kFATAL << "Method with type kCategory cannot be casted to MethodCategory. /Factory" << Endl; // DSMTEST
424 
425  if(fModelPersistence) methCat->SetWeightFileDir(fFileDir);
427  methCat->fDataSetManager = loader->fDataSetManager; // DSMTEST
428  methCat->SetFile(fgTargetFile);
429  methCat->SetSilentFile(IsSilentFile());
430  } // DSMTEST
431 
432 
433  if (!method->HasAnalysisType( fAnalysisType,
434  loader->DefaultDataSetInfo().GetNClasses(),
435  loader->DefaultDataSetInfo().GetNTargets() )) {
436  Log() << kWARNING << "Method " << method->GetMethodTypeName() << " is not capable of handling " ;
438  Log() << "regression with " << loader->DefaultDataSetInfo().GetNTargets() << " targets." << Endl;
439  }
440  else if (fAnalysisType == Types::kMulticlass ) {
441  Log() << "multiclass classification with " << loader->DefaultDataSetInfo().GetNClasses() << " classes." << Endl;
442  }
443  else {
444  Log() << "classification with " << loader->DefaultDataSetInfo().GetNClasses() << " classes." << Endl;
445  }
446  return 0;
447  }
448 
449  if(fModelPersistence) method->SetWeightFileDir(fFileDir);
451  method->SetAnalysisType( fAnalysisType );
452  method->SetupMethod();
453  method->ParseOptions();
454  method->ProcessSetup();
455  method->SetFile(fgTargetFile);
456  method->SetSilentFile(IsSilentFile());
457 
458  // check-for-unused-options is performed; may be overridden by derived classes
459  method->CheckSetup();
460 
461  if(fMethodsMap.find(datasetname)==fMethodsMap.end())
462  {
463  MVector *mvector=new MVector;
464  fMethodsMap[datasetname]=mvector;
465  }
466  fMethodsMap[datasetname]->push_back( method );
467  return method;
468 }
469 
470 //_______________________________________________________________________
472 {
473  // books MVA method; the option configuration string is custom for each MVA
474  // the TString field "theNameAppendix" serves to define (and distinguish)
475  // several instances of a given MVA, eg, when one wants to compare the
476  // performance of various configurations
477  return BookMethod(loader, Types::Instance().GetMethodName( theMethod ), methodTitle, theOption );
478 }
479 
480 //_______________________________________________________________________
481 TMVA::IMethod* TMVA::Factory::GetMethod(const TString& datasetname, const TString &methodTitle ) const
482 {
483  // returns pointer to MVA that corresponds to given method title
484 
485  if(fMethodsMap.find(datasetname)==fMethodsMap.end()) return 0;
486 
487  MVector *methods=fMethodsMap.find(datasetname)->second;
488 
489  MVector::const_iterator itrMethod;
490  //
491  for (itrMethod = methods->begin(); itrMethod != methods->end(); itrMethod++) {
492  MethodBase* mva = dynamic_cast<MethodBase*>(*itrMethod);
493  if ( (mva->GetMethodName())==methodTitle ) return mva;
494  }
495  return 0;
496 }
497 
498 //_______________________________________________________________________
500 {
501  RootBaseDir()->cd();
502 
503  if(!RootBaseDir()->GetDirectory(fDataSetInfo.GetName())) RootBaseDir()->mkdir(fDataSetInfo.GetName());
504  else return; //loader is now in the output file, we dont need to save again
505 
506  RootBaseDir()->cd(fDataSetInfo.GetName());
507  fDataSetInfo.GetDataSet(); // builds dataset (including calculation of correlation matrix)
508 
509 
510  // correlation matrix of the default DS
511  const TMatrixD* m(0);
512  const TH2* h(0);
513 
515  for (UInt_t cls = 0; cls < fDataSetInfo.GetNClasses() ; cls++) {
516  m = fDataSetInfo.CorrelationMatrix(fDataSetInfo.GetClassInfo(cls)->GetName());
517  h = fDataSetInfo.CreateCorrelationMatrixHist(m, TString("CorrelationMatrix")+fDataSetInfo.GetClassInfo(cls)->GetName(),
518  TString("Correlation Matrix (")+ fDataSetInfo.GetClassInfo(cls)->GetName() +TString(")"));
519  if (h!=0) {
520  h->Write();
521  delete h;
522  }
523  }
524  }
525  else{
526  m = fDataSetInfo.CorrelationMatrix( "Signal" );
527  h = fDataSetInfo.CreateCorrelationMatrixHist(m, "CorrelationMatrixS", "Correlation Matrix (signal)");
528  if (h!=0) {
529  h->Write();
530  delete h;
531  }
532 
533  m = fDataSetInfo.CorrelationMatrix( "Background" );
534  h = fDataSetInfo.CreateCorrelationMatrixHist(m, "CorrelationMatrixB", "Correlation Matrix (background)");
535  if (h!=0) {
536  h->Write();
537  delete h;
538  }
539 
540  m = fDataSetInfo.CorrelationMatrix( "Regression" );
541  h = fDataSetInfo.CreateCorrelationMatrixHist(m, "CorrelationMatrix", "Correlation Matrix");
542  if (h!=0) {
543  h->Write();
544  delete h;
545  }
546  }
547 
548  // some default transformations to evaluate
549  // NOTE: all transformations are destroyed after this test
550  TString processTrfs = "I"; //"I;N;D;P;U;G,D;"
551 
552  // plus some user defined transformations
553  processTrfs = fTransformations;
554 
555  // remove any trace of identity transform - if given (avoid to apply it twice)
556  std::vector<TMVA::TransformationHandler*> trfs;
557  TransformationHandler* identityTrHandler = 0;
558 
559  std::vector<TString> trfsDef = gTools().SplitString(processTrfs,';');
560  std::vector<TString>::iterator trfsDefIt = trfsDef.begin();
561  for (; trfsDefIt!=trfsDef.end(); trfsDefIt++) {
562  trfs.push_back(new TMVA::TransformationHandler(fDataSetInfo, "Factory"));
563  TString trfS = (*trfsDefIt);
564 
565  //Log() << kINFO << Endl;
566  Log() << kDEBUG << "current transformation string: '" << trfS.Data() << "'" << Endl;
568  fDataSetInfo,
569  *(trfs.back()),
570  Log() );
571 
572  if (trfS.BeginsWith('I')) identityTrHandler = trfs.back();
573  }
574 
575  const std::vector<Event*>& inputEvents = fDataSetInfo.GetDataSet()->GetEventCollection();
576 
577  // apply all transformations
578  std::vector<TMVA::TransformationHandler*>::iterator trfIt = trfs.begin();
579 
580  for (;trfIt != trfs.end(); trfIt++) {
581  // setting a Root dir causes the variables distributions to be saved to the root file
582  (*trfIt)->SetRootDir(RootBaseDir()->GetDirectory(fDataSetInfo.GetName()));// every dataloader have its own dir
583  (*trfIt)->CalcTransformations(inputEvents);
584  }
585  if(identityTrHandler) identityTrHandler->PrintVariableRanking();
586 
587  // clean up
588  for (trfIt = trfs.begin(); trfIt != trfs.end(); trfIt++) delete *trfIt;
589 }
590 
591 ////////////////////////////////////////////////////////////////////////////////
592 /// iterates through all booked methods and sees if they use parameter tuning and if so..
593 /// does just that i.e. calls "Method::Train()" for different parameter setttings and
594 /// keeps in mind the "optimal one"... and that's the one that will later on be used
595 /// in the main training loop.
596 
597 std::map<TString,Double_t> TMVA::Factory::OptimizeAllMethods(TString fomType, TString fitType)
598 {
599 
600  std::map<TString,MVector*>::iterator itrMap;
601  std::map<TString,Double_t> TunedParameters;
602  for(itrMap = fMethodsMap.begin();itrMap != fMethodsMap.end();itrMap++)
603  {
604  MVector *methods=itrMap->second;
605 
606  MVector::iterator itrMethod;
607 
608  // iterate over methods and optimize
609  for( itrMethod = methods->begin(); itrMethod != methods->end(); itrMethod++ ) {
611  MethodBase* mva = dynamic_cast<MethodBase*>(*itrMethod);
612  if (!mva) {
613  Log() << kFATAL << "Dynamic cast to MethodBase failed" <<Endl;
614  return TunedParameters;
615  }
616 
617  if (mva->Data()->GetNTrainingEvents() < MinNoTrainingEvents) {
618  Log() << kWARNING << "Method " << mva->GetMethodName()
619  << " not trained (training tree has less entries ["
620  << mva->Data()->GetNTrainingEvents()
621  << "] than required [" << MinNoTrainingEvents << "]" << Endl;
622  continue;
623  }
624 
625  Log() << kINFO << "Optimize method: " << mva->GetMethodName() << " for "
626  << (fAnalysisType == Types::kRegression ? "Regression" :
627  (fAnalysisType == Types::kMulticlass ? "Multiclass classification" : "Classification")) << Endl;
628 
629  TunedParameters = mva->OptimizeTuningParameters(fomType,fitType);
630  Log() << kINFO << "Optimization of tuning paremters finished for Method:"<<mva->GetName() << Endl;
631  }
632  }
633 
634  return TunedParameters;
635 
636 }
637 
638 //_______________________________________________________________________
640 {
641  return GetROCIntegral((TString)loader->GetName(),theMethodName);
642 }
643 
644 //_______________________________________________________________________
646 {
647  if (fMethodsMap.find(datasetname) == fMethodsMap.end()) {
648  Log() << kERROR << Form("DataSet = %s not found in methods map.", datasetname.Data()) << Endl;
649  return 0;
650  }
651  MVector *methods = fMethodsMap[datasetname.Data()];
652  MVector::iterator itrMethod = methods->begin();
653  TMVA::MethodBase *method = 0;
654  while (itrMethod != methods->end()) {
655  TMVA::MethodBase *cmethod = dynamic_cast<TMVA::MethodBase *>(*itrMethod);
656  if (!cmethod) {
657  //msg of error here
658  itrMethod++;
659  continue;
660  }
661  if (cmethod->GetMethodName() == theMethodName) {
662  method = cmethod;
663  break;
664  }
665  itrMethod++;
666  }
667 
668  if (!method) {
669  Log() << kERROR << Form("Method = %s not found with Dataset = %s ", theMethodName.Data(), datasetname.Data()) << Endl;
670  return 0;
671  }
672 
674 
675  std::vector<Float_t> *mvaRes = dynamic_cast<ResultsClassification *>(results)->GetValueVector();
676  std::vector<Bool_t> *mvaResType = dynamic_cast<ResultsClassification *>(results)->GetValueVectorTypes();
677 
678  TMVA::ROCCurve *fROCCurve = new TMVA::ROCCurve(*mvaRes, *mvaResType);
679  if (!fROCCurve) Log() << kFATAL << Form("ROCCurve object was not created in Method = %s not found with Dataset = %s ", theMethodName.Data(), datasetname.Data()) << Endl;
680 
681  Double_t fROCalcValue = fROCCurve->GetROCIntegral();
682 
683  return fROCalcValue;
684 }
685 
686 //_______________________________________________________________________
688 {
689  return GetROCCurve((TString)loader->GetName(),theMethodName,fLegend);
690 }
691 
692 //_______________________________________________________________________
693 TGraph* TMVA::Factory::GetROCCurve(TString datasetname,TString theMethodName,Bool_t fLegend)
694 {
695  if (fMethodsMap.find(datasetname) == fMethodsMap.end()) {
696  Log() << kERROR << Form("DataSet = %s not found in methods map.", datasetname.Data()) << Endl;
697  return 0;
698  }
699  MVector *methods = fMethodsMap[datasetname.Data()];
700  MVector::iterator itrMethod = methods->begin();
701  TMVA::MethodBase *method = 0;
702  while (itrMethod != methods->end()) {
703  TMVA::MethodBase *cmethod = dynamic_cast<TMVA::MethodBase *>(*itrMethod);
704  if (!cmethod) {
705  //msg of error here
706  itrMethod++;
707  continue;
708  }
709  if (cmethod->GetMethodName() == theMethodName) {
710  method = cmethod;
711  break;
712  }
713  itrMethod++;
714  }
715 
716  if (!method) {
717  Log() << kERROR << Form("Method = %s not found with Dataset = %s ", theMethodName.Data(), datasetname.Data()) << Endl;
718  return 0;
719  }
720 
722 
723  std::vector<Float_t> *mvaRes = dynamic_cast<ResultsClassification *>(results)->GetValueVector();
724  std::vector<Bool_t> *mvaResType = dynamic_cast<ResultsClassification *>(results)->GetValueVectorTypes();
725 
726  TMVA::ROCCurve *fROCCurve = new TMVA::ROCCurve(*mvaRes, *mvaResType);
727  if (!fROCCurve) Log() << kFATAL << Form("ROCCurve object was not created in Method = %s not found with Dataset = %s ", theMethodName.Data(), datasetname.Data()) << Endl;
728 
729  TGraph *fGraph = (TGraph *)fROCCurve->GetROCCurve()->Clone();
730  if(fLegend)
731  {
732  fGraph->GetYaxis()->SetTitle("Background Rejection");
733  fGraph->GetXaxis()->SetTitle("Signal Efficiency");
734  fGraph->SetTitle(Form("Background Rejection vs. Signal Efficiency (%s)",method->GetMethodName().Data()));
735  }
736  delete fROCCurve;
737  return fGraph;
738 }
739 
740 
741 //_______________________________________________________________________
743 {
744  return GetROCCurve((TString)loader->GetName());
745 }
746 
747 //_______________________________________________________________________
749 {
750  // Lookup dataset.
751  if (fMethodsMap.find(datasetname) == fMethodsMap.end()) {
752  Log() << kERROR << Form("DataSet = %s not found in methods map.", datasetname.Data()) << Endl;
753  return 0;
754  }
755 
756  // Create canvas.
757  TString name("ROCCurve ");
758  name += datasetname;
759  TCanvas *fCanvas = new TCanvas(name,"ROC Curve",200,10,700,500);
760  fCanvas->SetGrid();
761  UInt_t line_color = 0; //Count line colors in canvas.
762 
763  TLegend *fLegend = new TLegend(0.15, 0.15, 0.35, 0.3, "MVA Method");
764  TGraph *fGraph = nullptr;
765 
766  // Loop over dataset.
767  MVector *methods = fMethodsMap[datasetname.Data()];
768  MVector::iterator itr = methods->begin();
769 
770  while (itr != methods->end()) {
771 
772  TMVA::MethodBase *method = dynamic_cast<TMVA::MethodBase *>(*itr);
773  itr++;
774  if (!method) {
775  continue;
776  }
777  // Get results.
778  TMVA::Results *results = method->Data()->GetResults(method->GetMethodName(),
781 
782  std::vector<Float_t> *mvaRes =
783  dynamic_cast<ResultsClassification *>(results)->GetValueVector();
784  std::vector<Bool_t> *mvaResType =
785  dynamic_cast<ResultsClassification *>(results)->GetValueVectorTypes();
786 
787  // Generate ROCCurve.
788  TMVA::ROCCurve *fROCCurve = new TMVA::ROCCurve(*mvaRes, *mvaResType);
789  if (!fROCCurve)
790  Log() << kFATAL << Form("ROCCurve object was not created in Method = %s not found with Dataset = %s ", method->GetMethodName().Data(), datasetname.Data()) << Endl;
791  fGraph=(TGraph*)fROCCurve->GetROCCurve()->Clone();
792  delete fROCCurve;
793  // Draw axes.
794  if (line_color == 0)
795  {
796  fGraph->GetYaxis()->SetTitle("Background Rejection");
797  fGraph->GetXaxis()->SetTitle("Signal Efficiency");
798  fGraph->SetTitle("Background Rejection vs. Signal Efficiency");
799  fGraph->Draw("AC");
800  }
801  else
802  fGraph->Draw("C");
803 
804  fGraph->SetLineWidth(2);
805  fGraph->SetLineColor(++line_color);
806 
807  fLegend->AddEntry(fGraph, method->GetMethodName(), "l");
808  }
809 
810  // Draw legend.
811  fLegend->Draw();
812 
813  return fCanvas;
814 }
815 
816 
817 
818 
819 ////////////////////////////////////////////////////////////////////////////////
820 /// iterates through all booked methods and calls training
821 
823 {
824  // iterates over all MVAs that have been booked, and calls their training methods
825 
826 
827  // don't do anything if no method booked
828  if (fMethodsMap.empty()) {
829  Log() << kINFO << "...nothing found to train" << Endl;
830  return;
831  }
832 
833  // here the training starts
834  //Log() << kINFO << " " << Endl;
835  Log() << kDEBUG << "Train all methods for "
836  << (fAnalysisType == Types::kRegression ? "Regression" :
837  (fAnalysisType == Types::kMulticlass ? "Multiclass" : "Classification") ) << " ..." << Endl;
838 
839  std::map<TString,MVector*>::iterator itrMap;
840 
841  for(itrMap = fMethodsMap.begin();itrMap != fMethodsMap.end();itrMap++)
842  {
843  MVector *methods=itrMap->second;
844  MVector::iterator itrMethod;
845 
846  // iterate over methods and train
847  for( itrMethod = methods->begin(); itrMethod != methods->end(); itrMethod++ ) {
849  MethodBase* mva = dynamic_cast<MethodBase*>(*itrMethod);
850 
851  if(mva==0) continue;
852 
853  if(mva->DataInfo().GetDataSetManager()->DataInput().GetEntries() <=1) { // 0 entries --> 0 events, 1 entry --> dynamical dataset (or one entry)
854  Log() << kFATAL << "No input data for the training provided!" << Endl;
855  }
856 
857  if(fAnalysisType == Types::kRegression && mva->DataInfo().GetNTargets() < 1 )
858  Log() << kFATAL << "You want to do regression training without specifying a target." << Endl;
859  else if( (fAnalysisType == Types::kMulticlass || fAnalysisType == Types::kClassification)
860  && mva->DataInfo().GetNClasses() < 2 )
861  Log() << kFATAL << "You want to do classification training, but specified less than two classes." << Endl;
862 
863  // first print some information about the default dataset
865 
866 
867  if (mva->Data()->GetNTrainingEvents() < MinNoTrainingEvents) {
868  Log() << kWARNING << "Method " << mva->GetMethodName()
869  << " not trained (training tree has less entries ["
870  << mva->Data()->GetNTrainingEvents()
871  << "] than required [" << MinNoTrainingEvents << "]" << Endl;
872  continue;
873  }
874 
875  Log() << kHEADER << "Train method: " << mva->GetMethodName() << " for "
876  << (fAnalysisType == Types::kRegression ? "Regression" :
877  (fAnalysisType == Types::kMulticlass ? "Multiclass classification" : "Classification")) << Endl << Endl;
878  mva->TrainMethod();
879  Log() << kHEADER << "Training finished" << Endl << Endl;
880  }
881 
882  if (fAnalysisType != Types::kRegression) {
883 
884  // variable ranking
885  //Log() << Endl;
886  Log() << kINFO << "Ranking input variables (method specific)..." << Endl;
887  for (itrMethod = methods->begin(); itrMethod != methods->end(); itrMethod++) {
888  MethodBase* mva = dynamic_cast<MethodBase*>(*itrMethod);
889  if (mva && mva->Data()->GetNTrainingEvents() >= MinNoTrainingEvents) {
890 
891  // create and print ranking
892  const Ranking* ranking = (*itrMethod)->CreateRanking();
893  if (ranking != 0) ranking->Print();
894  else Log() << kINFO << "No variable ranking supplied by classifier: "
895  << dynamic_cast<MethodBase*>(*itrMethod)->GetMethodName() << Endl;
896  }
897  }
898  }
899 
900  // delete all methods and recreate them from weight file - this ensures that the application
901  // of the methods (in TMVAClassificationApplication) is consistent with the results obtained
902  // in the testing
903  //Log() << Endl;
904  if (fModelPersistence) {
905 
906  Log() << kHEADER << "=== Destroy and recreate all methods via weight files for testing ===" << Endl << Endl;
907 
908  if(!IsSilentFile())RootBaseDir()->cd();
909 
910  // iterate through all booked methods
911  for (UInt_t i=0; i<methods->size(); i++) {
912 
913  MethodBase* m = dynamic_cast<MethodBase*>((*methods)[i]);
914  if(m==0) continue;
915 
916  TMVA::Types::EMVA methodType = m->GetMethodType();
917  TString weightfile = m->GetWeightFileName();
918 
919  // decide if .txt or .xml file should be read:
920  if (READXML) weightfile.ReplaceAll(".txt",".xml");
921 
922  DataSetInfo& dataSetInfo = m->DataInfo();
923  TString testvarName = m->GetTestvarName();
924  delete m; //itrMethod[i];
925 
926  // recreate
927  m = dynamic_cast<MethodBase*>( ClassifierFactory::Instance()
928  .Create( std::string(Types::Instance().GetMethodName(methodType)),
929  dataSetInfo, weightfile ) );
930  if( m->GetMethodType() == Types::kCategory ){
931  MethodCategory *methCat = (dynamic_cast<MethodCategory*>(m));
932  if( !methCat ) Log() << kFATAL << "Method with type kCategory cannot be casted to MethodCategory. /Factory" << Endl;
933  else methCat->fDataSetManager = m->DataInfo().GetDataSetManager();
934  }
935  //ToDo, Do we need to fill the DataSetManager of MethodBoost here too?
936 
937 
938  TString fFileDir= m->DataInfo().GetName();
939  fFileDir+="/"+gConfig().GetIONames().fWeightFileDir;
940  m->SetWeightFileDir(fFileDir);
943  m->SetAnalysisType(fAnalysisType);
944  m->SetupMethod();
945  m->ReadStateFromFile();
946  m->SetTestvarName(testvarName);
947 
948  // replace trained method by newly created one (from weight file) in methods vector
949  (*methods)[i] = m;
950  }
951  }
952  }
953 }
954 
955 ////////////////////////////////////////////////////////////////////////////////
956 
958 {
959  Log() << kDEBUG << "Test all methods..." << Endl;
960 
961  // don't do anything if no method booked
962  if (fMethodsMap.empty()) {
963  Log() << kINFO << "...nothing found to test" << Endl;
964  return;
965  }
966  std::map<TString,MVector*>::iterator itrMap;
967 
968  for(itrMap = fMethodsMap.begin();itrMap != fMethodsMap.end();itrMap++)
969  {
970  MVector *methods=itrMap->second;
971  MVector::iterator itrMethod;
972 
973  // iterate over methods and test
974  for( itrMethod = methods->begin(); itrMethod != methods->end(); itrMethod++ ) {
976  MethodBase* mva = dynamic_cast<MethodBase*>(*itrMethod);
977  if(mva==0) continue;
978  Types::EAnalysisType analysisType = mva->GetAnalysisType();
979  Log() << kHEADER << "Test method: " << mva->GetMethodName() << " for "
980  << (analysisType == Types::kRegression ? "Regression" :
981  (analysisType == Types::kMulticlass ? "Multiclass classification" : "Classification")) << " performance" << Endl << Endl;
982  mva->AddOutput( Types::kTesting, analysisType );
983  }
984  }
985 }
986 
987 //_______________________________________________________________________
988 void TMVA::Factory::MakeClass(const TString& datasetname , const TString& methodTitle ) const
989 {
990  if (methodTitle != "") {
991  IMethod* method = GetMethod(datasetname, methodTitle);
992  if (method) method->MakeClass();
993  else {
994  Log() << kWARNING << "<MakeClass> Could not find classifier \"" << methodTitle
995  << "\" in list" << Endl;
996  }
997  }
998  else {
999 
1000  // no classifier specified, print all hepl messages
1001  MVector *methods=fMethodsMap.find(datasetname)->second;
1002  MVector::const_iterator itrMethod;
1003  for (itrMethod = methods->begin(); itrMethod != methods->end(); itrMethod++) {
1004  MethodBase* method = dynamic_cast<MethodBase*>(*itrMethod);
1005  if(method==0) continue;
1006  Log() << kINFO << "Make response class for classifier: " << method->GetMethodName() << Endl;
1007  method->MakeClass();
1008  }
1009  }
1010 }
1011 
1012 ////////////////////////////////////////////////////////////////////////////////
1013 /// Print predefined help message of classifier
1014 /// iterate over methods and test
1015 
1016 void TMVA::Factory::PrintHelpMessage(const TString& datasetname , const TString& methodTitle ) const
1017 {
1018  if (methodTitle != "") {
1019  IMethod* method = GetMethod(datasetname , methodTitle );
1020  if (method) method->PrintHelpMessage();
1021  else {
1022  Log() << kWARNING << "<PrintHelpMessage> Could not find classifier \"" << methodTitle
1023  << "\" in list" << Endl;
1024  }
1025  }
1026  else {
1027 
1028  // no classifier specified, print all hepl messages
1029  MVector *methods=fMethodsMap.find(datasetname)->second;
1030  MVector::const_iterator itrMethod ;
1031  for (itrMethod = methods->begin(); itrMethod != methods->end(); itrMethod++) {
1032  MethodBase* method = dynamic_cast<MethodBase*>(*itrMethod);
1033  if(method==0) continue;
1034  Log() << kINFO << "Print help message for classifier: " << method->GetMethodName() << Endl;
1035  method->PrintHelpMessage();
1036  }
1037  }
1038 }
1039 
1040 ////////////////////////////////////////////////////////////////////////////////
1041 /// iterates over all MVA input varables and evaluates them
1042 
1044 {
1045  Log() << kINFO << "Evaluating all variables..." << Endl;
1047 
1048  for (UInt_t i=0; i<loader->DefaultDataSetInfo().GetNVariables(); i++) {
1050  if (options.Contains("V")) s += ":V";
1051  this->BookMethod(loader, "Variable", s );
1052  }
1053 }
1054 
1055 ////////////////////////////////////////////////////////////////////////////////
1056 /// iterates over all MVAs that have been booked, and calls their evaluation methods
1057 
1059 {
1060  Log() << kDEBUG << "Evaluate all methods..." << Endl;
1061 
1062  // don't do anything if no method booked
1063  if (fMethodsMap.empty()) {
1064  Log() << kINFO << "...nothing found to evaluate" << Endl;
1065  return;
1066  }
1067  std::map<TString,MVector*>::iterator itrMap;
1068 
1069  for(itrMap = fMethodsMap.begin();itrMap != fMethodsMap.end();itrMap++)
1070  {
1071  MVector *methods=itrMap->second;
1072 
1073  // -----------------------------------------------------------------------
1074  // First part of evaluation process
1075  // --> compute efficiencies, and other separation estimators
1076  // -----------------------------------------------------------------------
1077 
1078  // although equal, we now want to seperate the outpuf for the variables
1079  // and the real methods
1080  Int_t isel; // will be 0 for a Method; 1 for a Variable
1081  Int_t nmeth_used[2] = {0,0}; // 0 Method; 1 Variable
1082 
1083  std::vector<std::vector<TString> > mname(2);
1084  std::vector<std::vector<Double_t> > sig(2), sep(2), roc(2);
1085  std::vector<std::vector<Double_t> > eff01(2), eff10(2), eff30(2), effArea(2);
1086  std::vector<std::vector<Double_t> > eff01err(2), eff10err(2), eff30err(2);
1087  std::vector<std::vector<Double_t> > trainEff01(2), trainEff10(2), trainEff30(2);
1088 
1089  std::vector<std::vector<Float_t> > multiclass_testEff;
1090  std::vector<std::vector<Float_t> > multiclass_trainEff;
1091  std::vector<std::vector<Float_t> > multiclass_testPur;
1092  std::vector<std::vector<Float_t> > multiclass_trainPur;
1093 
1094  std::vector<std::vector<Double_t> > biastrain(1); // "bias" of the regression on the training data
1095  std::vector<std::vector<Double_t> > biastest(1); // "bias" of the regression on test data
1096  std::vector<std::vector<Double_t> > devtrain(1); // "dev" of the regression on the training data
1097  std::vector<std::vector<Double_t> > devtest(1); // "dev" of the regression on test data
1098  std::vector<std::vector<Double_t> > rmstrain(1); // "rms" of the regression on the training data
1099  std::vector<std::vector<Double_t> > rmstest(1); // "rms" of the regression on test data
1100  std::vector<std::vector<Double_t> > minftrain(1); // "minf" of the regression on the training data
1101  std::vector<std::vector<Double_t> > minftest(1); // "minf" of the regression on test data
1102  std::vector<std::vector<Double_t> > rhotrain(1); // correlation of the regression on the training data
1103  std::vector<std::vector<Double_t> > rhotest(1); // correlation of the regression on test data
1104 
1105  // same as above but for 'truncated' quantities (computed for events within 2sigma of RMS)
1106  std::vector<std::vector<Double_t> > biastrainT(1);
1107  std::vector<std::vector<Double_t> > biastestT(1);
1108  std::vector<std::vector<Double_t> > devtrainT(1);
1109  std::vector<std::vector<Double_t> > devtestT(1);
1110  std::vector<std::vector<Double_t> > rmstrainT(1);
1111  std::vector<std::vector<Double_t> > rmstestT(1);
1112  std::vector<std::vector<Double_t> > minftrainT(1);
1113  std::vector<std::vector<Double_t> > minftestT(1);
1114 
1115  // following vector contains all methods - with the exception of Cuts, which are special
1116  MVector methodsNoCuts;
1117 
1118  Bool_t doRegression = kFALSE;
1119  Bool_t doMulticlass = kFALSE;
1120 
1121  // iterate over methods and evaluate
1122  for (MVector::iterator itrMethod =methods->begin(); itrMethod != methods->end(); itrMethod++) {
1124  MethodBase* theMethod = dynamic_cast<MethodBase*>(*itrMethod);
1125  if(theMethod==0) continue;
1126  theMethod->SetFile(fgTargetFile);
1127  theMethod->SetSilentFile(IsSilentFile());
1128  if (theMethod->GetMethodType() != Types::kCuts) methodsNoCuts.push_back( *itrMethod );
1129 
1130  if (theMethod->DoRegression()) {
1131  doRegression = kTRUE;
1132 
1133  Log() << kINFO << "Evaluate regression method: " << theMethod->GetMethodName() << Endl;
1134  Double_t bias, dev, rms, mInf;
1135  Double_t biasT, devT, rmsT, mInfT;
1136  Double_t rho;
1137 
1138  theMethod->TestRegression( bias, biasT, dev, devT, rms, rmsT, mInf, mInfT, rho, TMVA::Types::kTesting );
1139  biastest[0] .push_back( bias );
1140  devtest[0] .push_back( dev );
1141  rmstest[0] .push_back( rms );
1142  minftest[0] .push_back( mInf );
1143  rhotest[0] .push_back( rho );
1144  biastestT[0] .push_back( biasT );
1145  devtestT[0] .push_back( devT );
1146  rmstestT[0] .push_back( rmsT );
1147  minftestT[0] .push_back( mInfT );
1148 
1149  theMethod->TestRegression( bias, biasT, dev, devT, rms, rmsT, mInf, mInfT, rho, TMVA::Types::kTraining );
1150  biastrain[0] .push_back( bias );
1151  devtrain[0] .push_back( dev );
1152  rmstrain[0] .push_back( rms );
1153  minftrain[0] .push_back( mInf );
1154  rhotrain[0] .push_back( rho );
1155  biastrainT[0].push_back( biasT );
1156  devtrainT[0] .push_back( devT );
1157  rmstrainT[0] .push_back( rmsT );
1158  minftrainT[0].push_back( mInfT );
1159 
1160  mname[0].push_back( theMethod->GetMethodName() );
1161  nmeth_used[0]++;
1162  if(!IsSilentFile())
1163  {
1164  Log() << kDEBUG << "\tWrite evaluation histograms to file" << Endl;
1167  }
1168  }
1169  else if (theMethod->DoMulticlass()) {
1170  doMulticlass = kTRUE;
1171  Log() << kINFO << "Evaluate multiclass classification method: " << theMethod->GetMethodName() << Endl;
1172  if(!IsSilentFile())
1173  {
1174  Log() << kDEBUG << "\tWrite evaluation histograms to file" << Endl;
1177  }
1178  theMethod->TestMulticlass();
1179  multiclass_testEff.push_back(theMethod->GetMulticlassEfficiency(multiclass_testPur));
1180 
1181  nmeth_used[0]++;
1182  mname[0].push_back( theMethod->GetMethodName() );
1183  }
1184  else {
1185 
1186  Log() << kHEADER << "Evaluate classifier: " << theMethod->GetMethodName() << Endl << Endl;
1187  isel = (theMethod->GetMethodTypeName().Contains("Variable")) ? 1 : 0;
1188 
1189  // perform the evaluation
1190  theMethod->TestClassification();
1191 
1192 
1193  // evaluate the classifier
1194  mname[isel].push_back( theMethod->GetMethodName() );
1195  sig[isel].push_back ( theMethod->GetSignificance() );
1196  sep[isel].push_back ( theMethod->GetSeparation() );
1197  roc[isel].push_back ( theMethod->GetROCIntegral() );
1198 
1199  Double_t err;
1200  eff01[isel].push_back( theMethod->GetEfficiency("Efficiency:0.01", Types::kTesting, err) );
1201  eff01err[isel].push_back( err );
1202  eff10[isel].push_back( theMethod->GetEfficiency("Efficiency:0.10", Types::kTesting, err) );
1203  eff10err[isel].push_back( err );
1204  eff30[isel].push_back( theMethod->GetEfficiency("Efficiency:0.30", Types::kTesting, err) );
1205  eff30err[isel].push_back( err );
1206  effArea[isel].push_back( theMethod->GetEfficiency("", Types::kTesting, err) ); // computes the area (average)
1207 
1208  trainEff01[isel].push_back( theMethod->GetTrainingEfficiency("Efficiency:0.01") ); // the first pass takes longer
1209  trainEff10[isel].push_back( theMethod->GetTrainingEfficiency("Efficiency:0.10") );
1210  trainEff30[isel].push_back( theMethod->GetTrainingEfficiency("Efficiency:0.30") );
1211 
1212  nmeth_used[isel]++;
1213 
1214  if(!IsSilentFile())
1215  {
1216  Log() << kDEBUG << "\tWrite evaluation histograms to file" << Endl;
1219  }
1220  }
1221  }
1222  if (doRegression) {
1223 
1224  std::vector<TString> vtemps = mname[0];
1225  std::vector< std::vector<Double_t> > vtmp;
1226  vtmp.push_back( devtest[0] ); // this is the vector that is ranked
1227  vtmp.push_back( devtrain[0] );
1228  vtmp.push_back( biastest[0] );
1229  vtmp.push_back( biastrain[0] );
1230  vtmp.push_back( rmstest[0] );
1231  vtmp.push_back( rmstrain[0] );
1232  vtmp.push_back( minftest[0] );
1233  vtmp.push_back( minftrain[0] );
1234  vtmp.push_back( rhotest[0] );
1235  vtmp.push_back( rhotrain[0] );
1236  vtmp.push_back( devtestT[0] ); // this is the vector that is ranked
1237  vtmp.push_back( devtrainT[0] );
1238  vtmp.push_back( biastestT[0] );
1239  vtmp.push_back( biastrainT[0]);
1240  vtmp.push_back( rmstestT[0] );
1241  vtmp.push_back( rmstrainT[0] );
1242  vtmp.push_back( minftestT[0] );
1243  vtmp.push_back( minftrainT[0]);
1244  gTools().UsefulSortAscending( vtmp, &vtemps );
1245  mname[0] = vtemps;
1246  devtest[0] = vtmp[0];
1247  devtrain[0] = vtmp[1];
1248  biastest[0] = vtmp[2];
1249  biastrain[0] = vtmp[3];
1250  rmstest[0] = vtmp[4];
1251  rmstrain[0] = vtmp[5];
1252  minftest[0] = vtmp[6];
1253  minftrain[0] = vtmp[7];
1254  rhotest[0] = vtmp[8];
1255  rhotrain[0] = vtmp[9];
1256  devtestT[0] = vtmp[10];
1257  devtrainT[0] = vtmp[11];
1258  biastestT[0] = vtmp[12];
1259  biastrainT[0] = vtmp[13];
1260  rmstestT[0] = vtmp[14];
1261  rmstrainT[0] = vtmp[15];
1262  minftestT[0] = vtmp[16];
1263  minftrainT[0] = vtmp[17];
1264  }
1265  else if (doMulticlass) {
1266  // TODO: fill in something meaningfull
1267 
1268  }
1269  else {
1270  // now sort the variables according to the best 'eff at Beff=0.10'
1271  for (Int_t k=0; k<2; k++) {
1272  std::vector< std::vector<Double_t> > vtemp;
1273  vtemp.push_back( effArea[k] ); // this is the vector that is ranked
1274  vtemp.push_back( eff10[k] );
1275  vtemp.push_back( eff01[k] );
1276  vtemp.push_back( eff30[k] );
1277  vtemp.push_back( eff10err[k] );
1278  vtemp.push_back( eff01err[k] );
1279  vtemp.push_back( eff30err[k] );
1280  vtemp.push_back( trainEff10[k] );
1281  vtemp.push_back( trainEff01[k] );
1282  vtemp.push_back( trainEff30[k] );
1283  vtemp.push_back( sig[k] );
1284  vtemp.push_back( sep[k] );
1285  vtemp.push_back( roc[k] );
1286  std::vector<TString> vtemps = mname[k];
1287  gTools().UsefulSortDescending( vtemp, &vtemps );
1288  effArea[k] = vtemp[0];
1289  eff10[k] = vtemp[1];
1290  eff01[k] = vtemp[2];
1291  eff30[k] = vtemp[3];
1292  eff10err[k] = vtemp[4];
1293  eff01err[k] = vtemp[5];
1294  eff30err[k] = vtemp[6];
1295  trainEff10[k] = vtemp[7];
1296  trainEff01[k] = vtemp[8];
1297  trainEff30[k] = vtemp[9];
1298  sig[k] = vtemp[10];
1299  sep[k] = vtemp[11];
1300  roc[k] = vtemp[12];
1301  mname[k] = vtemps;
1302  }
1303  }
1304 
1305  // -----------------------------------------------------------------------
1306  // Second part of evaluation process
1307  // --> compute correlations among MVAs
1308  // --> compute correlations between input variables and MVA (determines importsance)
1309  // --> count overlaps
1310  // -----------------------------------------------------------------------
1311  if(fCorrelations)
1312  {
1313  const Int_t nmeth = methodsNoCuts.size();
1314  MethodBase* method = dynamic_cast<MethodBase*>(methods[0][0]);
1315  const Int_t nvar = method->fDataSetInfo.GetNVariables();
1316  if (!doRegression && !doMulticlass ) {
1317 
1318  if (nmeth > 0) {
1319 
1320  // needed for correlations
1321  Double_t *dvec = new Double_t[nmeth+nvar];
1322  std::vector<Double_t> rvec;
1323 
1324  // for correlations
1325  TPrincipal* tpSig = new TPrincipal( nmeth+nvar, "" );
1326  TPrincipal* tpBkg = new TPrincipal( nmeth+nvar, "" );
1327 
1328  // set required tree branch references
1329  Int_t ivar = 0;
1330  std::vector<TString>* theVars = new std::vector<TString>;
1331  std::vector<ResultsClassification*> mvaRes;
1332  for (MVector::iterator itrMethod = methodsNoCuts.begin(); itrMethod != methodsNoCuts.end(); itrMethod++, ivar++) {
1333  MethodBase* m = dynamic_cast<MethodBase*>(*itrMethod);
1334  if(m==0) continue;
1335  theVars->push_back( m->GetTestvarName() );
1336  rvec.push_back( m->GetSignalReferenceCut() );
1337  theVars->back().ReplaceAll( "MVA_", "" );
1338  mvaRes.push_back( dynamic_cast<ResultsClassification*>( m->Data()->GetResults( m->GetMethodName(),
1339  Types::kTesting,
1341  }
1342 
1343  // for overlap study
1344  TMatrixD* overlapS = new TMatrixD( nmeth, nmeth );
1345  TMatrixD* overlapB = new TMatrixD( nmeth, nmeth );
1346  (*overlapS) *= 0; // init...
1347  (*overlapB) *= 0; // init...
1348 
1349  // loop over test tree
1350  DataSet* defDs = method->fDataSetInfo.GetDataSet();
1352  for (Int_t ievt=0; ievt<defDs->GetNEvents(); ievt++) {
1353  const Event* ev = defDs->GetEvent(ievt);
1354 
1355  // for correlations
1356  TMatrixD* theMat = 0;
1357  for (Int_t im=0; im<nmeth; im++) {
1358  // check for NaN value
1359  Double_t retval = (Double_t)(*mvaRes[im])[ievt][0];
1360  if (TMath::IsNaN(retval)) {
1361  Log() << kWARNING << "Found NaN return value in event: " << ievt
1362  << " for method \"" << methodsNoCuts[im]->GetName() << "\"" << Endl;
1363  dvec[im] = 0;
1364  }
1365  else dvec[im] = retval;
1366  }
1367  for (Int_t iv=0; iv<nvar; iv++) dvec[iv+nmeth] = (Double_t)ev->GetValue(iv);
1368  if (method->fDataSetInfo.IsSignal(ev)) { tpSig->AddRow( dvec ); theMat = overlapS; }
1369  else { tpBkg->AddRow( dvec ); theMat = overlapB; }
1370 
1371  // count overlaps
1372  for (Int_t im=0; im<nmeth; im++) {
1373  for (Int_t jm=im; jm<nmeth; jm++) {
1374  if ((dvec[im] - rvec[im])*(dvec[jm] - rvec[jm]) > 0) {
1375  (*theMat)(im,jm)++;
1376  if (im != jm) (*theMat)(jm,im)++;
1377  }
1378  }
1379  }
1380  }
1381 
1382  // renormalise overlap matrix
1383  (*overlapS) *= (1.0/defDs->GetNEvtSigTest()); // init...
1384  (*overlapB) *= (1.0/defDs->GetNEvtBkgdTest()); // init...
1385 
1386  tpSig->MakePrincipals();
1387  tpBkg->MakePrincipals();
1388 
1389  const TMatrixD* covMatS = tpSig->GetCovarianceMatrix();
1390  const TMatrixD* covMatB = tpBkg->GetCovarianceMatrix();
1391 
1392  const TMatrixD* corrMatS = gTools().GetCorrelationMatrix( covMatS );
1393  const TMatrixD* corrMatB = gTools().GetCorrelationMatrix( covMatB );
1394 
1395  // print correlation matrices
1396  if (corrMatS != 0 && corrMatB != 0) {
1397 
1398  // extract MVA matrix
1399  TMatrixD mvaMatS(nmeth,nmeth);
1400  TMatrixD mvaMatB(nmeth,nmeth);
1401  for (Int_t im=0; im<nmeth; im++) {
1402  for (Int_t jm=0; jm<nmeth; jm++) {
1403  mvaMatS(im,jm) = (*corrMatS)(im,jm);
1404  mvaMatB(im,jm) = (*corrMatB)(im,jm);
1405  }
1406  }
1407 
1408  // extract variables - to MVA matrix
1409  std::vector<TString> theInputVars;
1410  TMatrixD varmvaMatS(nvar,nmeth);
1411  TMatrixD varmvaMatB(nvar,nmeth);
1412  for (Int_t iv=0; iv<nvar; iv++) {
1413  theInputVars.push_back( method->fDataSetInfo.GetVariableInfo( iv ).GetLabel() );
1414  for (Int_t jm=0; jm<nmeth; jm++) {
1415  varmvaMatS(iv,jm) = (*corrMatS)(nmeth+iv,jm);
1416  varmvaMatB(iv,jm) = (*corrMatB)(nmeth+iv,jm);
1417  }
1418  }
1419 
1420  if (nmeth > 1) {
1421  Log() << kINFO << Endl;
1422  Log() << kINFO <<Form("Dataset[%s] : ",method->fDataSetInfo.GetName())<< "Inter-MVA correlation matrix (signal):" << Endl;
1423  gTools().FormattedOutput( mvaMatS, *theVars, Log() );
1424  Log() << kINFO << Endl;
1425 
1426  Log() << kINFO <<Form("Dataset[%s] : ",method->fDataSetInfo.GetName())<< "Inter-MVA correlation matrix (background):" << Endl;
1427  gTools().FormattedOutput( mvaMatB, *theVars, Log() );
1428  Log() << kINFO << Endl;
1429  }
1430 
1431  Log() << kINFO <<Form("Dataset[%s] : ",method->fDataSetInfo.GetName())<< "Correlations between input variables and MVA response (signal):" << Endl;
1432  gTools().FormattedOutput( varmvaMatS, theInputVars, *theVars, Log() );
1433  Log() << kINFO << Endl;
1434 
1435  Log() << kINFO <<Form("Dataset[%s] : ",method->fDataSetInfo.GetName())<< "Correlations between input variables and MVA response (background):" << Endl;
1436  gTools().FormattedOutput( varmvaMatB, theInputVars, *theVars, Log() );
1437  Log() << kINFO << Endl;
1438  }
1439  else Log() << kWARNING <<Form("Dataset[%s] : ",method->fDataSetInfo.GetName())<< "<TestAllMethods> cannot compute correlation matrices" << Endl;
1440 
1441  // print overlap matrices
1442  Log() << kINFO <<Form("Dataset[%s] : ",method->fDataSetInfo.GetName())<< "The following \"overlap\" matrices contain the fraction of events for which " << Endl;
1443  Log() << kINFO <<Form("Dataset[%s] : ",method->fDataSetInfo.GetName())<< "the MVAs 'i' and 'j' have returned conform answers about \"signal-likeness\"" << Endl;
1444  Log() << kINFO <<Form("Dataset[%s] : ",method->fDataSetInfo.GetName())<< "An event is signal-like, if its MVA output exceeds the following value:" << Endl;
1445  gTools().FormattedOutput( rvec, *theVars, "Method" , "Cut value", Log() );
1446  Log() << kINFO <<Form("Dataset[%s] : ",method->fDataSetInfo.GetName())<< "which correspond to the working point: eff(signal) = 1 - eff(background)" << Endl;
1447 
1448  // give notice that cut method has been excluded from this test
1449  if (nmeth != (Int_t)methods->size())
1450  Log() << kINFO <<Form("Dataset[%s] : ",method->fDataSetInfo.GetName())<< "Note: no correlations and overlap with cut method are provided at present" << Endl;
1451 
1452  if (nmeth > 1) {
1453  Log() << kINFO << Endl;
1454  Log() << kINFO <<Form("Dataset[%s] : ",method->fDataSetInfo.GetName())<< "Inter-MVA overlap matrix (signal):" << Endl;
1455  gTools().FormattedOutput( *overlapS, *theVars, Log() );
1456  Log() << kINFO << Endl;
1457 
1458  Log() << kINFO <<Form("Dataset[%s] : ",method->fDataSetInfo.GetName())<< "Inter-MVA overlap matrix (background):" << Endl;
1459  gTools().FormattedOutput( *overlapB, *theVars, Log() );
1460  }
1461 
1462  // cleanup
1463  delete tpSig;
1464  delete tpBkg;
1465  delete corrMatS;
1466  delete corrMatB;
1467  delete theVars;
1468  delete overlapS;
1469  delete overlapB;
1470  delete [] dvec;
1471  }
1472  }
1473  }
1474  // -----------------------------------------------------------------------
1475  // Third part of evaluation process
1476  // --> output
1477  // -----------------------------------------------------------------------
1478 
1479  if (doRegression) {
1480 
1481  Log() << kINFO << Endl;
1482  TString hLine = "--------------------------------------------------------------------------------------------------";
1483  Log() << kINFO << "Evaluation results ranked by smallest RMS on test sample:" << Endl;
1484  Log() << kINFO << "(\"Bias\" quotes the mean deviation of the regression from true target." << Endl;
1485  Log() << kINFO << " \"MutInf\" is the \"Mutual Information\" between regression and target." << Endl;
1486  Log() << kINFO << " Indicated by \"_T\" are the corresponding \"truncated\" quantities ob-" << Endl;
1487  Log() << kINFO << " tained when removing events deviating more than 2sigma from average.)" << Endl;
1488  Log() << kINFO << hLine << Endl;
1489  //Log() << kINFO << "DataSet Name: MVA Method: <Bias> <Bias_T> RMS RMS_T | MutInf MutInf_T" << Endl;
1490  Log() << kINFO << hLine << Endl;
1491 
1492  for (Int_t i=0; i<nmeth_used[0]; i++) {
1493  MethodBase* theMethod = dynamic_cast<MethodBase*>((*methods)[i]);
1494  if(theMethod==0) continue;
1495 
1496  Log() << kINFO << Form("%-20s %-15s:%#9.3g%#9.3g%#9.3g%#9.3g | %#5.3f %#5.3f",
1497  theMethod->fDataSetInfo.GetName(),
1498  (const char*)mname[0][i],
1499  biastest[0][i], biastestT[0][i],
1500  rmstest[0][i], rmstestT[0][i],
1501  minftest[0][i], minftestT[0][i] )
1502  << Endl;
1503  }
1504  Log() << kINFO << hLine << Endl;
1505  Log() << kINFO << Endl;
1506  Log() << kINFO << "Evaluation results ranked by smallest RMS on training sample:" << Endl;
1507  Log() << kINFO << "(overtraining check)" << Endl;
1508  Log() << kINFO << hLine << Endl;
1509  Log() << kINFO << "DataSet Name: MVA Method: <Bias> <Bias_T> RMS RMS_T | MutInf MutInf_T" << Endl;
1510  Log() << kINFO << hLine << Endl;
1511 
1512  for (Int_t i=0; i<nmeth_used[0]; i++) {
1513  MethodBase* theMethod = dynamic_cast<MethodBase*>((*methods)[i]);
1514  if(theMethod==0) continue;
1515  Log() << kINFO << Form("%-20s %-15s:%#9.3g%#9.3g%#9.3g%#9.3g | %#5.3f %#5.3f",
1516  theMethod->fDataSetInfo.GetName(),
1517  (const char*)mname[0][i],
1518  biastrain[0][i], biastrainT[0][i],
1519  rmstrain[0][i], rmstrainT[0][i],
1520  minftrain[0][i], minftrainT[0][i] )
1521  << Endl;
1522  }
1523  Log() << kINFO << hLine << Endl;
1524  Log() << kINFO << Endl;
1525  }
1526  else if( doMulticlass ){
1527  Log() << Endl;
1528  TString hLine = "-------------------------------------------------------------------------------------------------------";
1529  Log() << kINFO << "Evaluation results ranked by best signal efficiency times signal purity " << Endl;
1530  Log() << kINFO << hLine << Endl;
1531  // iterate over methods and evaluate
1532  for (MVector::iterator itrMethod = methods->begin(); itrMethod != methods->end(); itrMethod++) {
1533  MethodBase* theMethod = dynamic_cast<MethodBase*>(*itrMethod);
1534  if(theMethod==0) continue;
1535 
1536  TString header= "DataSet Name MVA Method ";
1537  for(UInt_t icls = 0; icls<theMethod->fDataSetInfo.GetNClasses(); ++icls){
1538  header += Form("%-12s ",theMethod->fDataSetInfo.GetClassInfo(icls)->GetName());
1539  }
1540  Log() << kINFO << header << Endl;
1541  Log() << kINFO << hLine << Endl;
1542  for (Int_t i=0; i<nmeth_used[0]; i++) {
1543  TString res = Form("[%-14s] %-15s",theMethod->fDataSetInfo.GetName(),(const char*)mname[0][i]);
1544  for(UInt_t icls = 0; icls<theMethod->fDataSetInfo.GetNClasses(); ++icls){
1545  res += Form("%#1.3f ",(multiclass_testEff[i][icls])*(multiclass_testPur[i][icls]));
1546  }
1547  Log() << kINFO << res << Endl;
1548  }
1549  Log() << kINFO << hLine << Endl;
1550  Log() << kINFO << Endl;
1551  }
1552  }
1553  else {
1554  if(fROC)
1555  {
1556  Log().EnableOutput();
1558  Log() << Endl;
1559  TString hLine = "-------------------------------------------------------------------------------------------------------------------";
1560  Log() << kINFO << "Evaluation results ranked by best signal efficiency and purity (area)" << Endl;
1561  Log() << kINFO << hLine << Endl;
1562  Log() << kINFO << "DataSet MVA " << Endl;
1563  Log() << kINFO << "Name: Method: ROC-integ" << Endl;
1564 
1565 // Log() << kDEBUG << "DataSet MVA Signal efficiency at bkg eff.(error): | Sepa- Signifi- " << Endl;
1566 // Log() << kDEBUG << "Name: Method: @B=0.01 @B=0.10 @B=0.30 ROC-integ ROCCurve| ration: cance: " << Endl;
1567  Log() << kDEBUG << hLine << Endl;
1568  for (Int_t k=0; k<2; k++) {
1569  if (k == 1 && nmeth_used[k] > 0) {
1570  Log() << kINFO << hLine << Endl;
1571  Log() << kINFO << "Input Variables: " << Endl << hLine << Endl;
1572  }
1573  for (Int_t i=0; i<nmeth_used[k]; i++) {
1574  if (k == 1) mname[k][i].ReplaceAll( "Variable_", "" );
1575 
1576  MethodBase* theMethod = dynamic_cast<MethodBase*>(GetMethod(itrMap->first,mname[k][i]));
1577  if(theMethod==0) continue;
1578  TMVA::Results *results=theMethod->Data()->GetResults(mname[k][i],Types::kTesting,Types::kClassification);
1579  std::vector<Float_t> *mvaRes = dynamic_cast<ResultsClassification *>(results)->GetValueVector();
1580  std::vector<Bool_t> *mvaResType = dynamic_cast<ResultsClassification *>(results)->GetValueVectorTypes();
1581  Double_t fROCalcValue = 0;
1582  TMVA::ROCCurve *fROCCurve = nullptr;
1583  if (mvaResType->size() != 0) {
1584  fROCCurve = new TMVA::ROCCurve(*mvaRes, *mvaResType);
1585  fROCalcValue = fROCCurve->GetROCIntegral();
1586  }
1587 
1588  if (sep[k][i] < 0 || sig[k][i] < 0) {
1589  // cannot compute separation/significance -> no MVA (usually for Cuts)
1590  Log() << kINFO << Form("%-13s %-15s: %#1.3f",
1591  itrMap->first.Data(),
1592  (const char*)mname[k][i],
1593  effArea[k][i]) << Endl;
1594 
1595 // Log() << kDEBUG << Form("%-20s %-15s: %#1.3f(%02i) %#1.3f(%02i) %#1.3f(%02i) %#1.3f %#1.3f | -- --",
1596 // itrMap->first.Data(),
1597 // (const char*)mname[k][i],
1598 // eff01[k][i], Int_t(1000*eff01err[k][i]),
1599 // eff10[k][i], Int_t(1000*eff10err[k][i]),
1600 // eff30[k][i], Int_t(1000*eff30err[k][i]),
1601 // effArea[k][i],fROCalcValue) << Endl;
1602  }
1603  else {
1604  Log() << kINFO << Form("%-13s %-15s: %#1.3f",
1605  itrMap->first.Data(),
1606  (const char*)mname[k][i],
1607  fROCalcValue) << Endl;
1608 // Log() << kDEBUG << Form("%-20s %-15s: %#1.3f(%02i) %#1.3f(%02i) %#1.3f(%02i) %#1.3f %#1.3f | %#1.3f %#1.3f",
1609 // itrMap->first.Data(),
1610 // (const char*)mname[k][i],
1611 // eff01[k][i], Int_t(1000*eff01err[k][i]),
1612 // eff10[k][i], Int_t(1000*eff10err[k][i]),
1613 // eff30[k][i], Int_t(1000*eff30err[k][i]),
1614 // effArea[k][i],fROCalcValue,
1615 // sep[k][i], sig[k][i]) << Endl;
1616  }
1617  if (fROCCurve) delete fROCCurve;
1618  }
1619  }
1620  Log() << kINFO << hLine << Endl;
1621  Log() << kINFO << Endl;
1622  Log() << kINFO << "Testing efficiency compared to training efficiency (overtraining check)" << Endl;
1623  Log() << kINFO << hLine << Endl;
1624  Log() << kINFO << "DataSet MVA Signal efficiency: from test sample (from training sample) " << Endl;
1625  Log() << kINFO << "Name: Method: @B=0.01 @B=0.10 @B=0.30 " << Endl;
1626  Log() << kINFO << hLine << Endl;
1627  for (Int_t k=0; k<2; k++) {
1628  if (k == 1 && nmeth_used[k] > 0) {
1629  Log() << kINFO << hLine << Endl;
1630  Log() << kINFO << "Input Variables: " << Endl << hLine << Endl;
1631  }
1632  for (Int_t i=0; i<nmeth_used[k]; i++) {
1633  if (k == 1) mname[k][i].ReplaceAll( "Variable_", "" );
1634  MethodBase* theMethod = dynamic_cast<MethodBase*>((*methods)[i]);
1635  if(theMethod==0) continue;
1636 
1637  Log() << kINFO << Form("%-20s %-15s: %#1.3f (%#1.3f) %#1.3f (%#1.3f) %#1.3f (%#1.3f)",
1638  theMethod->fDataSetInfo.GetName(),
1639  (const char*)mname[k][i],
1640  eff01[k][i],trainEff01[k][i],
1641  eff10[k][i],trainEff10[k][i],
1642  eff30[k][i],trainEff30[k][i]) << Endl;
1643  }
1644  }
1645  Log() << kINFO << hLine << Endl;
1646  Log() << kINFO << Endl;
1647 
1648  if (gTools().CheckForSilentOption( GetOptions() )) Log().InhibitOutput();
1649  }//end fROC
1650  }
1651  if(!IsSilentFile())
1652  {
1653  std::list<TString> datasets;
1654  for (Int_t k=0; k<2; k++) {
1655  for (Int_t i=0; i<nmeth_used[k]; i++) {
1656  MethodBase* theMethod = dynamic_cast<MethodBase*>((*methods)[i]);
1657  if(theMethod==0) continue;
1658  // write test/training trees
1659  RootBaseDir()->cd(theMethod->fDataSetInfo.GetName());
1660  if(std::find(datasets.begin(), datasets.end(), theMethod->fDataSetInfo.GetName()) == datasets.end())
1661  {
1664  datasets.push_back(theMethod->fDataSetInfo.GetName());
1665  }
1666  }
1667  }
1668  }
1669  }//end for MethodsMap
1670  // references for citation
1672 }
1673 
1674 ////////////////////////////////////////////////////////////////////////////////
1675 /// Evaluate Variable Importance
1676 
1677 TH1F* TMVA::Factory::EvaluateImportance(DataLoader *loader,VIType vitype, Types::EMVA theMethod, TString methodTitle, const char *theOption)
1678 {
1680  fSilentFile=kTRUE;//we need silent file here beause we need fast classification results
1681 
1682  //getting number of variables and variable names from loader
1683  const int nbits = loader->DefaultDataSetInfo().GetNVariables();
1684  if(vitype==VIType::kShort)
1685  return EvaluateImportanceShort(loader,theMethod,methodTitle,theOption);
1686  else if(vitype==VIType::kAll)
1687  return EvaluateImportanceAll(loader,theMethod,methodTitle,theOption);
1688  else if(vitype==VIType::kRandom&&nbits>10)
1689  {
1690  return EvaluateImportanceRandom(loader,pow(2,nbits),theMethod,methodTitle,theOption);
1691  }else
1692  {
1693  std::cerr<<"Error in Variable Importance: Random mode require more that 10 variables in the dataset."<<std::endl;
1694  return nullptr;
1695  }
1696 }
1697 
1698 TH1F* TMVA::Factory::EvaluateImportanceAll(DataLoader *loader, Types::EMVA theMethod, TString methodTitle, const char *theOption)
1699 {
1700 
1701  uint64_t x = 0;
1702  uint64_t y = 0;
1703 
1704  //getting number of variables and variable names from loader
1705  const int nbits = loader->DefaultDataSetInfo().GetNVariables();
1706  std::vector<TString> varNames = loader->DefaultDataSetInfo().GetListOfVariables();
1707 
1708  uint64_t range = pow(2, nbits);
1709 
1710  //vector to save importances
1711  std::vector<Double_t> importances(nbits);
1712  //vector to save ROC
1713  std::vector<Double_t> ROC(range);
1714  ROC[0]=0.5;
1715  for (int i = 0; i < nbits; i++)importances[i] = 0;
1716 
1717  Double_t SROC, SSROC; //computed ROC value
1718  for ( x = 1; x <range ; x++) {
1719 
1720  std::bitset<VIBITS> xbitset(x);
1721  if (x == 0) continue; //dataloader need at least one variable
1722 
1723  //creating loader for seed
1724  TMVA::DataLoader *seedloader = new TMVA::DataLoader(xbitset.to_string());
1725 
1726  //adding variables from seed
1727  for (int index = 0; index < nbits; index++) {
1728  if (xbitset[index]) seedloader->AddVariable(varNames[index], 'F');
1729  }
1730 
1731  DataLoaderCopy(seedloader,loader);
1732  seedloader->PrepareTrainingAndTestTree(loader->DefaultDataSetInfo().GetCut("Signal"), loader->DefaultDataSetInfo().GetCut("Background"), loader->DefaultDataSetInfo().GetSplitOptions());
1733 
1734  //Booking Seed
1735  BookMethod(seedloader, theMethod, methodTitle, theOption);
1736 
1737  //Train/Test/Evaluation
1738  TrainAllMethods();
1739  TestAllMethods();
1741 
1742  //getting ROC
1743  ROC[x] = GetROCIntegral(xbitset.to_string(), methodTitle);
1744 
1745  //cleaning information to process subseeds
1746  TMVA::MethodBase *smethod=dynamic_cast<TMVA::MethodBase*>(fMethodsMap[xbitset.to_string().c_str()][0][0]);
1747  TMVA::ResultsClassification *sresults = (TMVA::ResultsClassification*)smethod->Data()->GetResults(smethod->GetMethodName(), Types::kTesting, Types::kClassification);
1748  delete sresults;
1749  delete seedloader;
1750  this->DeleteAllMethods();
1751 
1752  fMethodsMap.clear();
1753  //removing global result because it is requiring alot amount of RAM for all seeds
1754  }
1755 
1756 
1757  for ( x = 0; x <range ; x++)
1758  {
1759  SROC=ROC[x];
1760  for (uint32_t i = 0; i < VIBITS; ++i) {
1761  if (x & (1 << i)) {
1762  y = x & ~(1 << i);
1763  std::bitset<VIBITS> ybitset(y);
1764  //need at least one variable
1765  //NOTE: if subssed is zero then is the special case
1766  //that count in xbitset is 1
1767  Double_t ny = log(x - y) / 0.693147;
1768  if (y == 0) {
1769  importances[ny] = SROC - 0.5;
1770  continue;
1771  }
1772 
1773  //getting ROC
1774  SSROC = ROC[y];
1775  importances[ny] += SROC - SSROC;
1776  //cleaning information
1777  }
1778 
1779  }
1780  }
1781  std::cout<<"--- Variable Importance Results (All)"<<std::endl;
1782  return GetImportance(nbits,importances,varNames);
1783 }
1784 
1785 static long int sum(long int i)
1786 {
1787  long int _sum=0;
1788  for(long int n=0;n<i;n++) _sum+=pow(2,n);
1789  return _sum;
1790 }
1791 
1792 
1793 TH1F* TMVA::Factory::EvaluateImportanceShort(DataLoader *loader, Types::EMVA theMethod, TString methodTitle, const char *theOption)
1794 {
1795  uint64_t x = 0;
1796  uint64_t y = 0;
1797 
1798  //getting number of variables and variable names from loader
1799  const int nbits = loader->DefaultDataSetInfo().GetNVariables();
1800  std::vector<TString> varNames = loader->DefaultDataSetInfo().GetListOfVariables();
1801 
1802  long int range = sum(nbits);
1803 // std::cout<<range<<std::endl;
1804  //vector to save importances
1805  std::vector<Double_t> importances(nbits);
1806  for (int i = 0; i < nbits; i++)importances[i] = 0;
1807 
1808  Double_t SROC, SSROC; //computed ROC value
1809 
1810  x = range;
1811 
1812  std::bitset<VIBITS> xbitset(x);
1813  if (x == 0) Log()<<kFATAL<<"Error: need at least one variable."; //dataloader need at least one variable
1814 
1815 
1816  //creating loader for seed
1817  TMVA::DataLoader *seedloader = new TMVA::DataLoader(xbitset.to_string());
1818 
1819  //adding variables from seed
1820  for (int index = 0; index < nbits; index++) {
1821  if (xbitset[index]) seedloader->AddVariable(varNames[index], 'F');
1822  }
1823 
1824  //Loading Dataset
1825  DataLoaderCopy(seedloader,loader);
1826 
1827  //Booking Seed
1828  BookMethod(seedloader, theMethod, methodTitle, theOption);
1829 
1830  //Train/Test/Evaluation
1831  TrainAllMethods();
1832  TestAllMethods();
1834 
1835  //getting ROC
1836  SROC = GetROCIntegral(xbitset.to_string(), methodTitle);
1837 
1838  //cleaning information to process subseeds
1839  TMVA::MethodBase *smethod=dynamic_cast<TMVA::MethodBase*>(fMethodsMap[xbitset.to_string().c_str()][0][0]);
1840  TMVA::ResultsClassification *sresults = (TMVA::ResultsClassification*)smethod->Data()->GetResults(smethod->GetMethodName(), Types::kTesting, Types::kClassification);
1841  delete sresults;
1842  delete seedloader;
1843  this->DeleteAllMethods();
1844  fMethodsMap.clear();
1845 
1846  //removing global result because it is requiring alot amount of RAM for all seeds
1847 
1848  for (uint32_t i = 0; i < VIBITS; ++i) {
1849  if (x & (1 << i)) {
1850  y = x & ~(1 << i);
1851  std::bitset<VIBITS> ybitset(y);
1852  //need at least one variable
1853  //NOTE: if subssed is zero then is the special case
1854  //that count in xbitset is 1
1855  Double_t ny = log(x - y) / 0.693147;
1856  if (y == 0) {
1857  importances[ny] = SROC - 0.5;
1858  continue;
1859  }
1860 
1861  //creating loader for subseed
1862  TMVA::DataLoader *subseedloader = new TMVA::DataLoader(ybitset.to_string());
1863  //adding variables from subseed
1864  for (int index = 0; index < nbits; index++) {
1865  if (ybitset[index]) subseedloader->AddVariable(varNames[index], 'F');
1866  }
1867 
1868  //Loading Dataset
1869  DataLoaderCopy(subseedloader,loader);
1870 
1871  //Booking SubSeed
1872  BookMethod(subseedloader, theMethod, methodTitle, theOption);
1873 
1874  //Train/Test/Evaluation
1875  TrainAllMethods();
1876  TestAllMethods();
1878 
1879  //getting ROC
1880  SSROC = GetROCIntegral(ybitset.to_string(), methodTitle);
1881  importances[ny] += SROC - SSROC;
1882 
1883  //cleaning information
1884  TMVA::MethodBase *ssmethod=dynamic_cast<TMVA::MethodBase*>(fMethodsMap[ybitset.to_string().c_str()][0][0]);
1886  delete ssresults;
1887  delete subseedloader;
1888  this->DeleteAllMethods();
1889  fMethodsMap.clear();
1890  }
1891  }
1892  std::cout<<"--- Variable Importance Results (Short)"<<std::endl;
1893  return GetImportance(nbits,importances,varNames);
1894 }
1895 
1896 TH1F* TMVA::Factory::EvaluateImportanceRandom(DataLoader *loader, UInt_t nseeds, Types::EMVA theMethod, TString methodTitle, const char *theOption)
1897 {
1898  TRandom3 *rangen = new TRandom3(0); //Random Gen.
1899 
1900  uint64_t x = 0;
1901  uint64_t y = 0;
1902 
1903  //getting number of variables and variable names from loader
1904  const int nbits = loader->DefaultDataSetInfo().GetNVariables();
1905  std::vector<TString> varNames = loader->DefaultDataSetInfo().GetListOfVariables();
1906 
1907  long int range = pow(2, nbits);
1908 
1909  //vector to save importances
1910  std::vector<Double_t> importances(nbits);
1911  Double_t importances_norm = 0;
1912  for (int i = 0; i < nbits; i++)importances[i] = 0;
1913 
1914  Double_t SROC, SSROC; //computed ROC value
1915  for (UInt_t n = 0; n < nseeds; n++) {
1916  x = rangen -> Integer(range);
1917 
1918  std::bitset<32> xbitset(x);
1919  if (x == 0) continue; //dataloader need at least one variable
1920 
1921 
1922  //creating loader for seed
1923  TMVA::DataLoader *seedloader = new TMVA::DataLoader(xbitset.to_string());
1924 
1925  //adding variables from seed
1926  for (int index = 0; index < nbits; index++) {
1927  if (xbitset[index]) seedloader->AddVariable(varNames[index], 'F');
1928  }
1929 
1930  //Loading Dataset
1931  DataLoaderCopy(seedloader,loader);
1932 
1933  //Booking Seed
1934  BookMethod(seedloader, theMethod, methodTitle, theOption);
1935 
1936  //Train/Test/Evaluation
1937  TrainAllMethods();
1938  TestAllMethods();
1940 
1941  //getting ROC
1942  SROC = GetROCIntegral(xbitset.to_string(), methodTitle);
1943 // std::cout << "Seed: n " << n << " x " << x << " xbitset:" << xbitset << " ROC " << SROC << std::endl;
1944 
1945  //cleaning information to process subseeds
1946  TMVA::MethodBase *smethod=dynamic_cast<TMVA::MethodBase*>(fMethodsMap[xbitset.to_string().c_str()][0][0]);
1947  TMVA::ResultsClassification *sresults = (TMVA::ResultsClassification*)smethod->Data()->GetResults(smethod->GetMethodName(), Types::kTesting, Types::kClassification);
1948  delete sresults;
1949  delete seedloader;
1950  this->DeleteAllMethods();
1951  fMethodsMap.clear();
1952 
1953  //removing global result because it is requiring alot amount of RAM for all seeds
1954 
1955  for (uint32_t i = 0; i < 32; ++i) {
1956  if (x & (1 << i)) {
1957  y = x & ~(1 << i);
1958  std::bitset<32> ybitset(y);
1959  //need at least one variable
1960  //NOTE: if subssed is zero then is the special case
1961  //that count in xbitset is 1
1962  Double_t ny = log(x - y) / 0.693147;
1963  if (y == 0) {
1964  importances[ny] = SROC - 0.5;
1965  importances_norm += importances[ny];
1966  // std::cout << "SubSeed: " << y << " y:" << ybitset << "ROC " << 0.5 << std::endl;
1967  continue;
1968  }
1969 
1970  //creating loader for subseed
1971  TMVA::DataLoader *subseedloader = new TMVA::DataLoader(ybitset.to_string());
1972  //adding variables from subseed
1973  for (int index = 0; index < nbits; index++) {
1974  if (ybitset[index]) subseedloader->AddVariable(varNames[index], 'F');
1975  }
1976 
1977  //Loading Dataset
1978  DataLoaderCopy(subseedloader,loader);
1979 
1980  //Booking SubSeed
1981  BookMethod(subseedloader, theMethod, methodTitle, theOption);
1982 
1983  //Train/Test/Evaluation
1984  TrainAllMethods();
1985  TestAllMethods();
1987 
1988  //getting ROC
1989  SSROC = GetROCIntegral(ybitset.to_string(), methodTitle);
1990  importances[ny] += SROC - SSROC;
1991  //std::cout << "SubSeed: " << y << " y:" << ybitset << " x-y " << x - y << " " << std::bitset<32>(x - y) << " ny " << ny << " SROC " << SROC << " SSROC " << SSROC << " Importance = " << importances[ny] << std::endl;
1992  //cleaning information
1993  TMVA::MethodBase *ssmethod=dynamic_cast<TMVA::MethodBase*>(fMethodsMap[ybitset.to_string().c_str()][0][0]);
1995  delete ssresults;
1996  delete subseedloader;
1997  this->DeleteAllMethods();
1998  fMethodsMap.clear();
1999  }
2000  }
2001  }
2002  std::cout<<"--- Variable Importance Results (Random)"<<std::endl;
2003  return GetImportance(nbits,importances,varNames);
2004 }
2005 
2006 
2007 
2008 TH1F* TMVA::Factory::GetImportance(const int nbits,std::vector<Double_t> importances,std::vector<TString> varNames)
2009 {
2010  TH1F *vih1 = new TH1F("vih1", "", nbits, 0, nbits);
2011 
2012  gStyle->SetOptStat(000000);
2013 
2014  Float_t normalization = 0.0;
2015  for (int i = 0; i < nbits; i++) {
2016  normalization = normalization + importances[i];
2017  }
2018 
2019  Float_t roc = 0.0;
2020 
2021  gStyle->SetTitleXOffset(0.4);
2022  gStyle->SetTitleXOffset(1.2);
2023 
2024 
2025  Double_t x_ie[nbits], y_ie[nbits];
2026  for (Int_t i = 1; i < nbits + 1; i++) {
2027  x_ie[i - 1] = (i - 1) * 1.;
2028  roc = 100.0 * importances[i - 1] / normalization;
2029  y_ie[i - 1] = roc;
2030  std::cout<<"--- "<<varNames[i-1]<<" = "<<roc<<" %"<<std::endl;
2031  vih1->GetXaxis()->SetBinLabel(i, varNames[i - 1].Data());
2032  vih1->SetBinContent(i, roc);
2033  }
2034  TGraph *g_ie = new TGraph(nbits + 2, x_ie, y_ie);
2035  g_ie->SetTitle("");
2036 
2037  vih1->LabelsOption("v >", "X");
2038  vih1->SetBarWidth(0.97);
2039  Int_t ca = TColor::GetColor("#006600");
2040  vih1->SetFillColor(ca);
2041  //Int_t ci = TColor::GetColor("#990000");
2042 
2043  vih1->GetYaxis()->SetTitle("Importance (%)");
2044  vih1->GetYaxis()->SetTitleSize(0.045);
2045  vih1->GetYaxis()->CenterTitle();
2046  vih1->GetYaxis()->SetTitleOffset(1.24);
2047 
2048  vih1->GetYaxis()->SetRangeUser(-7, 50);
2049  vih1->SetDirectory(0);
2050 
2051 // vih1->Draw("B");
2052  return vih1;
2053 }
2054 
IMethod * Create(const std::string &name, const TString &job, const TString &title, DataSetInfo &dsi, const TString &option)
creates the method if needed based on the method name using the creator function the factory has stor...
static ClassifierFactory & Instance()
access to the ClassifierFactory singleton creates the instance if needed
Config & gConfig()
Definition: Config.cxx:43
virtual void SetTitleOffset(Float_t offset=1)
Set distance between the axis and the axis title Offset is a correction factor with respect to the "s...
Definition: TAttAxis.cxx:262
void SetModelPersistence(Bool_t status)
Definition: MethodBase.h:378
virtual Int_t Write(const char *name=0, Int_t option=0, Int_t bufsize=0)
Write this object to the current directory.
Definition: TObject.cxx:830
TH1F * GetImportance(const int nbits, std::vector< Double_t > importances, std::vector< TString > varNames)
Definition: Factory.cxx:2008
Double_t GetROCIntegral(DataLoader *loader, TString theMethodName)
Definition: Factory.cxx:639
DataSetManager * fDataSetManager
Definition: DataLoader.h:197
static long int sum(long int i)
Definition: Factory.cxx:1785
Principal Components Analysis (PCA)
Definition: TPrincipal.h:28
void UsefulSortDescending(std::vector< std::vector< Double_t > > &, std::vector< TString > *vs=0)
sort 2D vector (AND in parallel a TString vector) in such a way that the "first vector is sorted" and...
Definition: Tools.cxx:573
MethodBase * BookMethod(DataLoader *loader, TString theMethodName, TString methodTitle, TString theOption="")
Definition: Factory.cxx:337
Random number generator class based on M.
Definition: TRandom3.h:29
MsgLogger & Endl(MsgLogger &ml)
Definition: MsgLogger.h:162
void AddOutput(Types::ETreeType type, Types::EAnalysisType analysisType)
This class displays a legend box (TPaveText) containing several legend entries.
Definition: TLegend.h:27
std::vector< TMVA::VariableTransformBase * > fDefaultTrfs
ROOT output file.
Definition: Factory.h:195
virtual void LabelsOption(Option_t *option="h", Option_t *axis="X")
Set option(s) to draw axis with labels.
Definition: TH1.cxx:4908
void EvaluateAllVariables(DataLoader *loader, TString options="")
iterates over all MVA input varables and evaluates them
Definition: Factory.cxx:1043
Bool_t fROC
enable to calculate corelations
Definition: Factory.h:202
float Float_t
Definition: RtypesCore.h:53
virtual void SetDirectory(TDirectory *dir)
By default when an histogram is created, it is added to the list of histogram objects in the current ...
Definition: TH1.cxx:8008
void ROOTVersionMessage(MsgLogger &logger)
prints the ROOT release number and date
Definition: Tools.cxx:1333
VIType
Definition: Types.h:75
TString & ReplaceAll(const TString &s1, const TString &s2)
Definition: TString.h:635
const char * GetName() const
Definition: MethodBase.h:330
R__EXTERN TStyle * gStyle
Definition: TStyle.h:418
static Types & Instance()
the the single instance of "Types" if existin already, or create it (Signleton)
Definition: Types.cxx:64
void CheckForUnusedOptions() const
checks for unused options in option string
virtual void WriteEvaluationHistosToFile(Types::ETreeType treetype)
writes all MVA evaluation histograms to file
UInt_t GetNClasses() const
Definition: DataSetInfo.h:154
TH1F * EvaluateImportanceShort(DataLoader *loader, Types::EMVA theMethod, TString methodTitle, const char *theOption="")
Definition: Factory.cxx:1793
virtual std::map< TString, Double_t > OptimizeTuningParameters(TString fomType="ROCIntegral", TString fitType="FitGA")
call the Optimzier with the set of paremeters and ranges that are meant to be tuned.
Definition: MethodBase.cxx:617
virtual void Draw(Option_t *option="")
Draw this legend with its current attributes.
Definition: TLegend.cxx:373
THist< 1, float, THistStatContent, THistStatUncertainty > TH1F
Definition: THist.hxx:302
TH1 * h
Definition: legend2.C:5
OptionBase * DeclareOptionRef(T &ref, const TString &name, const TString &desc="")
A ROOT file is a suite of consecutive data records (TKey instances) with a well defined format...
Definition: TFile.h:50
DataSet * Data() const
Definition: MethodBase.h:405
EAnalysisType
Definition: Types.h:128
virtual int MakeDirectory(const char *name)
Make a directory.
Definition: TSystem.cxx:822
DataSetInfo & DefaultDataSetInfo()
Definition: DataLoader.cxx:491
UInt_t GetNTargets() const
Definition: DataSetInfo.h:129
#define gROOT
Definition: TROOT.h:364
TString fTransformations
option string given by construction (presently only "V")
Definition: Factory.h:199
Basic string class.
Definition: TString.h:137
tomato 1-D histogram with a float per channel (see TH1 documentation)}
Definition: TH1.h:575
void ToLower()
Change string to lower-case.
Definition: TString.cxx:1089
int Int_t
Definition: RtypesCore.h:41
virtual TDirectory * mkdir(const char *name, const char *title="")
Create a sub-directory and return a pointer to the created directory.
Definition: TDirectory.cxx:957
bool Bool_t
Definition: RtypesCore.h:59
void TrainAllMethods()
iterates through all booked methods and calls training
Definition: Factory.cxx:822
const Bool_t kFALSE
Definition: Rtypes.h:92
virtual void TestMulticlass()
test multiclass classification
virtual void SetTitle(const char *title="")
Set graph title.
Definition: TGraph.cxx:2176
void DataLoaderCopy(TMVA::DataLoader *des, TMVA::DataLoader *src)
Definition: DataLoader.cxx:794
void WriteDataInformation(DataSetInfo &fDataSetInfo)
Definition: Factory.cxx:499
void SetSilentFile(Bool_t status)
Definition: MethodBase.h:374
virtual Bool_t HasAnalysisType(Types::EAnalysisType type, UInt_t numberClasses, UInt_t numberTargets)=0
void CenterTitle(Bool_t center=kTRUE)
Center axis title.
Definition: TAxis.h:190
UInt_t GetNVariables() const
Definition: DataSetInfo.h:128
Bool_t BeginsWith(const char *s, ECaseCompare cmp=kExact) const
Definition: TString.h:558
IMethod * GetMethod(const TString &datasetname, const TString &title) const
Definition: Factory.cxx:481
DataSet * GetDataSet() const
returns data set
MsgLogger * fLogger
Definition: Configurable.h:134
void AddVariable(const TString &expression, const TString &title, const TString &unit, char type='F', Double_t min=0, Double_t max=0)
Definition: DataLoader.cxx:455
TH1F * EvaluateImportanceRandom(DataLoader *loader, UInt_t nseeds, Types::EMVA theMethod, TString methodTitle, const char *theOption="")
Definition: Factory.cxx:1896
overwrite existing object with same name
Definition: TObject.h:84
const TString & GetMethodName() const
Definition: MethodBase.h:327
static void AddDirectory(Bool_t add=kTRUE)
Sets the flag controlling the automatic add of histograms in memory.
Definition: TH1.cxx:1220
TString GetWeightFileName() const
retrieve weight file name
Float_t GetValue(UInt_t ivar) const
return value of i&#39;th variable
Definition: Event.cxx:233
UInt_t GetEntries(const TString &name) const
virtual void SetBarWidth(Float_t width=0.5)
Definition: TH1.h:362
virtual const char * GetName() const
Returns name of object.
Definition: DataSetInfo.h:85
const char * Data() const
Definition: TString.h:349
virtual void SetRangeUser(Double_t ufirst, Double_t ulast)
Set the viewing range for the axis from ufirst to ulast (in user coordinates).
Definition: TAxis.cxx:925
Types::EAnalysisType GetAnalysisType() const
Definition: MethodBase.h:433
static void InhibitOutput()
Definition: MsgLogger.cxx:69
static void SetIsTraining(Bool_t)
when this static function is called, it sets the flag whether events with negative event weight shoul...
Definition: Event.cxx:388
Tools & gTools()
Definition: Tools.cxx:79
#define READXML
Definition: Factory.cxx:102
Double_t x[n]
Definition: legend1.C:17
DataSetInfo & fDataSetInfo
Definition: MethodBase.h:601
TH1F * EvaluateImportance(DataLoader *loader, VIType vitype, Types::EMVA theMethod, TString methodTitle, const char *theOption="")
Evaluate Variable Importance.
Definition: Factory.cxx:1677
Bool_t IsSignal(const Event *ev) const
const int ny
Definition: kalman.C:17
virtual void SetGrid(Int_t valuex=1, Int_t valuey=1)
Definition: TPad.h:318
Bool_t fModelPersistence
the training type
Definition: Factory.h:208
TTree * GetTree(Types::ETreeType type)
create the test/trainings tree with all the variables, the weights, the classes, the targets...
Definition: DataSet.cxx:602
TString fWeightFileDir
Definition: Config.h:100
void ReadStateFromFile()
Function to write options and weights to file.
virtual void MakeClass(const TString &classFileName=TString("")) const
create reader class for method (classification only at present)
double pow(double, double)
Bool_t DoMulticlass() const
Definition: MethodBase.h:435
std::vector< std::vector< double > > Data
IONames & GetIONames()
Definition: Config.h:78
virtual void ParseOptions()
options parser
void SetupMethod()
setup of methods
Definition: MethodBase.cxx:403
void SetMinType(EMsgType minType)
Definition: MsgLogger.h:76
void SetDrawProgressBar(Bool_t d)
Definition: Config.h:70
TH1F * EvaluateImportanceAll(DataLoader *loader, Types::EMVA theMethod, TString methodTitle, const char *theOption="")
Definition: Factory.cxx:1698
Types::EMVA GetMethodType() const
Definition: MethodBase.h:329
virtual TObject * Clone(const char *newname="") const
Make a clone of an object using the Streamer facility.
Definition: TNamed.cxx:65
Bool_t IsModelPersistence()
Definition: Factory.cxx:284
std::map< TString, MVector * > fMethodsMap
Definition: Factory.h:91
Bool_t fSilentFile
enable to calculate ROC values
Definition: Factory.h:203
virtual Double_t GetEfficiency(const TString &, Types::ETreeType, Double_t &err)
fill background efficiency (resp.
void CreateVariableTransforms(const TString &trafoDefinition, TMVA::DataSetInfo &dataInfo, TMVA::TransformationHandler &transformationHandler, TMVA::MsgLogger &log)
virtual std::vector< Float_t > GetMulticlassEfficiency(std::vector< std::vector< Float_t > > &purity)
void PrintHelpMessage(const TString &datasetname, const TString &methodTitle="") const
Print predefined help message of classifier iterate over methods and test.
Definition: Factory.cxx:1016
virtual void MakeClass(const TString &classFileName=TString("")) const =0
const TMatrixD * GetCovarianceMatrix() const
Definition: TPrincipal.h:66
TMatrixT< Double_t > TMatrixD
Definition: TMatrixDfwd.h:24
static void DestroyInstance()
Definition: Tools.cxx:95
const Int_t MinNoTrainingEvents
Definition: Factory.cxx:97
virtual ~Factory()
destructor delete fATreeEvent;
Definition: Factory.cxx:293
virtual void PrintHelpMessage() const =0
std::map< TString, Double_t > OptimizeAllMethods(TString fomType="ROCIntegral", TString fitType="FitGA")
iterates through all booked methods and sees if they use parameter tuning and if so.
Definition: Factory.cxx:597
TDirectory * RootBaseDir()
Definition: Factory.h:153
virtual Int_t Write(const char *name=0, Int_t option=0, Int_t bufsize=0)
Write this object to the current directory.
Definition: TTree.cxx:9001
Results * GetResults(const TString &, Types::ETreeType type, Types::EAnalysisType analysistype)
TString info(resultsName+"/"); switch(type) { case Types::kTraining: info += "kTraining/"; break; cas...
Definition: DataSet.cxx:286
std::string GetMethodName(TCppMethod_t)
Definition: Cppyy.cxx:707
DataSetManager * GetDataSetManager()
Definition: DataSetInfo.h:193
Service class for 2-Dim histogram classes.
Definition: TH2.h:36
Double_t GetROCIntegral()
ROC Integral (AUC)
Definition: ROCCurve.cxx:72
R__EXTERN TSystem * gSystem
Definition: TSystem.h:549
virtual void AddRow(const Double_t *x)
Add a data point and update the covariance matrix.
Definition: TPrincipal.cxx:410
SVector< double, 2 > v
Definition: Dict.h:5
void TMVAWelcomeMessage()
direct output, eg, when starting ROOT session -> no use of Logger here
Definition: Tools.cxx:1310
Long64_t GetNEvtSigTest()
return number of signal test events in dataset
Definition: DataSet.cxx:419
virtual void SetFillColor(Color_t fcolor)
Set the fill area color.
Definition: TAttFill.h:42
Bool_t fCorrelations
verbose mode
Definition: Factory.h:201
void EvaluateAllMethods(void)
iterates over all MVAs that have been booked, and calls their evaluation methods
Definition: Factory.cxx:1058
ClassInfo * GetClassInfo(Int_t clNum) const
virtual void SetBinContent(Int_t bin, Double_t content)
Set bin content see convention for numbering bins in TH1::GetBin In case the bin number is greater th...
Definition: TH1.cxx:8280
void TestAllMethods()
Definition: Factory.cxx:957
const TMatrixD * CorrelationMatrix(const TString &className) const
unsigned int UInt_t
Definition: RtypesCore.h:42
TMarker * m
Definition: textangle.C:8
char * Form(const char *fmt,...)
DataSetManager * fDataSetManager
static Int_t GetColor(const char *hexcolor)
Static method returning color number for color specified by hex color string of form: "#rrggbb"...
Definition: TColor.cxx:1706
virtual const char * GetName() const
Returns name of object.
Definition: TNamed.h:51
virtual Double_t GetSignificance() const
compute significance of mean difference significance = |<S> - |/Sqrt(RMS_S2 + RMS_B2) ...
TAxis * GetYaxis()
Definition: TH1.h:325
void Greetings()
print welcome message options are: kLogoWelcomeMsg, kIsometricWelcomeMsg, kLeanWelcomeMsg ...
Definition: Factory.cxx:270
const TCut & GetCut(Int_t i) const
Definition: DataSetInfo.h:167
virtual void MakePrincipals()
Perform the principal components analysis.
Definition: TPrincipal.cxx:862
void SetBoostedMethodName(TString methodName)
Definition: MethodBoost.h:85
void SetVerbose(Bool_t v=kTRUE)
Definition: Factory.cxx:331
TFile * fgTargetFile
Definition: Factory.h:192
Long64_t GetNEvtBkgdTest()
return number of background test events in dataset
Definition: DataSet.cxx:427
virtual void SetTitleSize(Float_t size=0.04)
Set size of axis title The size is expressed in per cent of the pad width.
Definition: TAttAxis.cxx:272
TH2 * CreateCorrelationMatrixHist(const TMatrixD *m, const TString &hName, const TString &hTitle) const
const TString & GetSplitOptions() const
Definition: DataSetInfo.h:185
const Event * GetEvent() const
Definition: DataSet.cxx:211
void SetCurrentType(Types::ETreeType type) const
Definition: DataSet.h:114
The Canvas class.
Definition: TCanvas.h:41
void PrepareTrainingAndTestTree(const TCut &cut, const TString &splitOpt)
Definition: DataLoader.cxx:579
virtual const char * GetName() const
Returns name of object.
Definition: Factory.h:102
virtual void CheckSetup()
check may be overridden by derived class (sometimes, eg, fitters are used which can only be implement...
Definition: MethodBase.cxx:430
#define ClassImp(name)
Definition: Rtypes.h:279
double Double_t
Definition: RtypesCore.h:55
TLegendEntry * AddEntry(const TObject *obj, const char *label="", Option_t *option="lpf")
Add a new entry to this legend.
Definition: TLegend.cxx:280
virtual Double_t GetROCIntegral(TH1D *histS, TH1D *histB) const
calculate the area (integral) under the ROC curve as a overall quality measure of the classification ...
const TMatrixD * GetCorrelationMatrix(const TMatrixD *covMat)
turns covariance into correlation matrix
Definition: Tools.cxx:337
void SetFile(TFile *file)
Definition: MethodBase.h:371
Long64_t GetNEvents(Types::ETreeType type=Types::kMaxTreeType) const
Definition: DataSet.h:229
Double_t y[n]
Definition: legend1.C:17
static void DestroyInstance()
static function: destroy TMVA instance
Definition: Config.cxx:81
MsgLogger & Log() const
Definition: Configurable.h:128
DataSetInfo & DataInfo() const
Definition: MethodBase.h:406
void UsefulSortAscending(std::vector< std::vector< Double_t > > &, std::vector< TString > *vs=0)
sort 2D vector (AND in parallel a TString vector) in such a way that the "first vector is sorted" and...
Definition: Tools.cxx:547
VariableInfo & GetVariableInfo(Int_t i)
Definition: DataSetInfo.h:114
virtual void SetBinLabel(Int_t bin, const char *label)
Set label for bin.
Definition: TAxis.cxx:808
void AddPreDefVal(const T &)
Definition: Configurable.h:174
void TMVAVersionMessage(MsgLogger &logger)
prints the TMVA release number and date
Definition: Tools.cxx:1324
void ProcessSetup()
process all options the "CheckForUnusedOptions" is done in an independent call, since it may be overr...
Definition: MethodBase.cxx:420
void PrintVariableRanking() const
prints ranking of input variables
void FormattedOutput(const std::vector< Double_t > &, const std::vector< TString > &, const TString titleVars, const TString titleValues, MsgLogger &logger, TString format="%+1.3f")
formatted output of simple table
Definition: Tools.cxx:896
const TString & Color(const TString &)
human readable color strings
Definition: Tools.cxx:837
void SetUseColor(Bool_t uc)
Definition: Config.h:61
void SetConfigName(const char *n)
Definition: Configurable.h:69
void SetSource(const std::string &source)
Definition: MsgLogger.h:74
TGraph * GetROCCurve(DataLoader *loader, TString theMethodName, Bool_t fLegend=kTRUE)
Definition: Factory.cxx:687
#define VIBITS
Definition: Factory.cxx:105
Bool_t DoRegression() const
Definition: MethodBase.h:434
void SetTitleXOffset(Float_t offset=1)
Definition: TStyle.h:398
virtual void MakeClass(const TString &datasetname, const TString &methodTitle="") const
Definition: Factory.cxx:988
virtual Double_t GetSeparation(TH1 *, TH1 *) const
compute "separation" defined as <s2> = (1/2) Int_-oo..+oo { (S(x) - B(x))^2/(S(x) + B(x)) dx } ...
TGraph * GetROCCurve(const UInt_t points=100)
Definition: ROCCurve.cxx:120
virtual void TestRegression(Double_t &bias, Double_t &biasT, Double_t &dev, Double_t &devT, Double_t &rms, Double_t &rmsT, Double_t &mInf, Double_t &mInfT, Double_t &corr, Types::ETreeType type)
calculate <sum-of-deviation-squared> of regression output versus "true" value from test sample ...
Definition: MethodBase.cxx:963
DataSetManager * fDataSetManager
Definition: MethodBoost.h:192
virtual Bool_t cd(const char *path=0)
Change current directory to "this" directory.
Definition: TDirectory.cxx:435
Bool_t Verbose(void) const
Definition: Factory.h:138
Factory(TString theJobName, TFile *theTargetFile, TString theOption="")
standard constructor jobname : this name will appear in all weight file names produced by the MVAs th...
Definition: Factory.cxx:116
DataInputHandler & DataInput()
void PrintHelpMessage() const
prints out method-specific help method
void SetSilent(Bool_t s)
Definition: Config.h:64
const TString & GetOptions() const
Definition: Configurable.h:90
Bool_t Contains(const char *pat, ECaseCompare cmp=kExact) const
Definition: TString.h:567
Int_t IsNaN(Double_t x)
Definition: TMath.h:613
void SetWeightFileDir(TString fileDir)
set directory of weight file
TString fJobName
used in contructor wihtout file
Definition: Factory.h:205
A Graph is a graphics object made of two arrays X and Y with npoints each.
Definition: TGraph.h:53
void DeleteAllMethods(void)
delete methods
Definition: Factory.cxx:311
void SetOptStat(Int_t stat=1)
The type of information printed in the histogram statistics box can be selected via the parameter mod...
Definition: TStyle.cxx:1257
#define NULL
Definition: Rtypes.h:82
virtual Double_t GetTrainingEfficiency(const TString &)
const std::vector< Event * > & GetEventCollection(Types::ETreeType type=Types::kMaxTreeType) const
Definition: DataSet.h:239
Bool_t IsSilentFile()
Definition: Factory.cxx:278
std::vector< TString > GetListOfVariables() const
returns list of variables
Long64_t GetNTrainingEvents() const
Definition: DataSet.h:93
Types::EAnalysisType fAnalysisType
jobname, used as extension in weight file names
Definition: Factory.h:207
void TMVACitation(MsgLogger &logger, ECitation citType=kPlainText)
kinds of TMVA citation
Definition: Tools.cxx:1449
std::vector< IMethod * > MVector
Definition: Factory.h:90
Double_t GetSignalReferenceCut() const
Definition: MethodBase.h:356
virtual void Print() const
get maximum length of variable names
Definition: Ranking.cxx:111
static void EnableOutput()
Definition: MsgLogger.cxx:70
TString GetMethodTypeName() const
Definition: MethodBase.h:328
const TString & GetLabel() const
Definition: VariableInfo.h:67
const Bool_t kTRUE
Definition: Rtypes.h:91
virtual void SetTitle(const char *title="")
Set the title of the TNamed.
Definition: TNamed.cxx:155
void SetTestvarName(const TString &v="")
Definition: MethodBase.h:337
const Int_t n
Definition: legend1.C:16
virtual void TestClassification()
initialization
std::vector< TString > SplitString(const TString &theOpt, const char separator) const
splits the option string at &#39;separator&#39; and fills the list &#39;splitV&#39; with the primitive strings ...
Definition: Tools.cxx:1207
virtual void SetAnalysisType(Types::EAnalysisType type)
Definition: MethodBase.h:432
char name[80]
Definition: TGX11.cxx:109
double log(double)
TAxis * GetXaxis()
Definition: TH1.h:324
void SetConfigDescription(const char *d)
Definition: Configurable.h:70
Bool_t fVerbose
List of transformations to test.
Definition: Factory.h:200
const TString & GetTestvarName() const
Definition: MethodBase.h:331