Logo ROOT   6.08/07
Reference Guide
Factory.cxx
Go to the documentation of this file.
1 // @(#)Root/tmva $Id$
2 // Author: Andreas Hoecker, Peter Speckmayer, Joerg Stelzer, Helge Voss, Kai Voss, Eckhard von Toerne, Jan Therhaag
3 // Updated by: Omar Zapata
4 /**********************************************************************************
5  * Project: TMVA - a Root-integrated toolkit for multivariate data analysis *
6  * Package: TMVA *
7  * Class : Factory *
8  * Web : http://tmva.sourceforge.net *
9  * *
10  * Description: *
11  * Implementation (see header for description) *
12  * *
13  * Authors : *
14  * Andreas Hoecker <Andreas.Hocker@cern.ch> - CERN, Switzerland *
15  * Joerg Stelzer <stelzer@cern.ch> - DESY, Germany *
16  * Peter Speckmayer <peter.speckmayer@cern.ch> - CERN, Switzerland *
17  * Jan Therhaag <Jan.Therhaag@cern.ch> - U of Bonn, Germany *
18  * Eckhard v. Toerne <evt@uni-bonn.de> - U of Bonn, Germany *
19  * Helge Voss <Helge.Voss@cern.ch> - MPI-K Heidelberg, Germany *
20  * Kai Voss <Kai.Voss@cern.ch> - U. of Victoria, Canada *
21  * Omar Zapata <Omar.Zapata@cern.ch> - UdeA/ITM Colombia *
22  * Lorenzo Moneta <Lorenzo.Moneta@cern.ch> - CERN, Switzerland *
23  * Sergei Gleyzer <Sergei.Gleyzer@cern.ch> - U of Florida & CERN *
24  * *
25  * Copyright (c) 2005-2015: *
26  * CERN, Switzerland *
27  * U. of Victoria, Canada *
28  * MPI-K Heidelberg, Germany *
29  * U. of Bonn, Germany *
30  * UdeA/ITM, Colombia *
31  * U. of Florida, USA *
32  * *
33  * Redistribution and use in source and binary forms, with or without *
34  * modification, are permitted according to the terms listed in LICENSE *
35  * (http://tmva.sourceforge.net/LICENSE) *
36  **********************************************************************************/
37 
38 //_______________________________________________________________________
39 //
40 // This is the main MVA steering class: it creates all MVA methods,
41 // and guides them through the training, testing and evaluation
42 // phases
43 //_______________________________________________________________________
44 
45 
46 #include "TMVA/Factory.h"
47 
48 #include "TMVA/ClassifierFactory.h"
49 #include "TMVA/Config.h"
50 #include "TMVA/Configurable.h"
51 #include "TMVA/Tools.h"
52 #include "TMVA/Ranking.h"
53 #include "TMVA/DataSet.h"
54 #include "TMVA/IMethod.h"
55 #include "TMVA/MethodBase.h"
56 #include "TMVA/DataInputHandler.h"
57 #include "TMVA/DataSetManager.h"
58 #include "TMVA/DataSetInfo.h"
59 #include "TMVA/DataLoader.h"
60 #include "TMVA/MethodBoost.h"
61 #include "TMVA/MethodCategory.h"
62 #include "TMVA/ROCCalc.h"
63 #include "TMVA/ROCCurve.h"
64 #include "TMVA/MsgLogger.h"
65 
66 #include "TMVA/VariableInfo.h"
67 #include "TMVA/VariableTransform.h"
68 
69 #include "TMVA/Results.h"
71 #include "TMVA/ResultsRegression.h"
72 #include "TMVA/ResultsMulticlass.h"
73 #include <list>
74 #include <bitset>
75 
76 #include "TMVA/Types.h"
77 
78 #include "TROOT.h"
79 #include "TFile.h"
80 #include "TTree.h"
81 #include "TLeaf.h"
82 #include "TEventList.h"
83 #include "TH2.h"
84 #include "TText.h"
85 #include "TLegend.h"
86 #include "TGraph.h"
87 #include "TStyle.h"
88 #include "TMatrixF.h"
89 #include "TMatrixDSym.h"
90 #include "TPaletteAxis.h"
91 #include "TPrincipal.h"
92 #include "TMath.h"
93 #include "TObjString.h"
94 #include "TSystem.h"
95 #include "TCanvas.h"
96 
98 //const Int_t MinNoTestEvents = 1;
99 
101 
102 #define READXML kTRUE
103 
104 //number of bits for bitset
105 #define VIBITS 32
106 
107 
108 
109 ////////////////////////////////////////////////////////////////////////////////
110 /// standard constructor
111 /// jobname : this name will appear in all weight file names produced by the MVAs
112 /// theTargetFile : output ROOT file; the test tree and all evaluation plots
113 /// will be stored here
114 /// theOption : option string; currently: "V" for verbose
115 
116 TMVA::Factory::Factory( TString jobName, TFile* theTargetFile, TString theOption )
117 : Configurable ( theOption ),
118  fTransformations ( "I" ),
119  fVerbose ( kFALSE ),
120  fCorrelations ( kFALSE ),
121  fROC ( kTRUE ),
122  fSilentFile ( kFALSE ),
123  fJobName ( jobName ),
124  fAnalysisType ( Types::kClassification ),
125  fModelPersistence (kTRUE)
126 {
127  fgTargetFile = theTargetFile;
129 
130  // render silent
131  if (gTools().CheckForSilentOption( GetOptions() )) Log().InhibitOutput(); // make sure is silent if wanted to
132 
133 
134  // init configurable
135  SetConfigDescription( "Configuration options for Factory running" );
136  SetConfigName( GetName() );
137 
138  // histograms are not automatically associated with the current
139  // directory and hence don't go out of scope when closing the file
140  // TH1::AddDirectory(kFALSE);
141  Bool_t silent = kFALSE;
142 #ifdef WIN32
143  // under Windows, switch progress bar and color off by default, as the typical windows shell doesn't handle these (would need different sequences..)
144  Bool_t color = kFALSE;
145  Bool_t drawProgressBar = kFALSE;
146 #else
147  Bool_t color = !gROOT->IsBatch();
148  Bool_t drawProgressBar = kTRUE;
149 #endif
150  DeclareOptionRef( fVerbose, "V", "Verbose flag" );
151  DeclareOptionRef( color, "Color", "Flag for coloured screen output (default: True, if in batch mode: False)" );
152  DeclareOptionRef( fTransformations, "Transformations", "List of transformations to test; formatting example: \"Transformations=I;D;P;U;G,D\", for identity, decorrelation, PCA, Uniform and Gaussianisation followed by decorrelation transformations" );
153  DeclareOptionRef( fCorrelations, "Correlations", "boolean to show correlation in output" );
154  DeclareOptionRef( fROC, "ROC", "boolean to show ROC in output" );
155  DeclareOptionRef( silent, "Silent", "Batch mode: boolean silent flag inhibiting any output from TMVA after the creation of the factory class object (default: False)" );
156  DeclareOptionRef( drawProgressBar,
157  "DrawProgressBar", "Draw progress bar to display training, testing and evaluation schedule (default: True)" );
159  "ModelPersistence",
160  "Option to save the trained model in xml file or using serialization");
161 
162  TString analysisType("Auto");
163  DeclareOptionRef( analysisType,
164  "AnalysisType", "Set the analysis type (Classification, Regression, Multiclass, Auto) (default: Auto)" );
165  AddPreDefVal(TString("Classification"));
166  AddPreDefVal(TString("Regression"));
167  AddPreDefVal(TString("Multiclass"));
168  AddPreDefVal(TString("Auto"));
169 
170  ParseOptions();
172 
173  if (Verbose()) Log().SetMinType( kVERBOSE );
174 
175  // global settings
176  gConfig().SetUseColor( color );
177  gConfig().SetSilent( silent );
178  gConfig().SetDrawProgressBar( drawProgressBar );
179 
180  analysisType.ToLower();
181  if ( analysisType == "classification" ) fAnalysisType = Types::kClassification;
182  else if( analysisType == "regression" ) fAnalysisType = Types::kRegression;
183  else if( analysisType == "multiclass" ) fAnalysisType = Types::kMulticlass;
184  else if( analysisType == "auto" ) fAnalysisType = Types::kNoAnalysisType;
185 
186 // Greetings();
187 }
188 
189 
190 
192 : Configurable ( theOption ),
193  fTransformations ( "I" ),
194  fVerbose ( kFALSE ),
195  fCorrelations ( kFALSE ),
196  fROC ( kTRUE ),
197  fSilentFile ( kTRUE ),
198  fJobName ( jobName ),
199  fAnalysisType ( Types::kClassification ),
201 {
202  fgTargetFile = 0;
204 
205 
206  // render silent
207  if (gTools().CheckForSilentOption( GetOptions() )) Log().InhibitOutput(); // make sure is silent if wanted to
208 
209 
210  // init configurable
211  SetConfigDescription( "Configuration options for Factory running" );
212  SetConfigName( GetName() );
213 
214  // histograms are not automatically associated with the current
215  // directory and hence don't go out of scope when closing the file
217  Bool_t silent = kFALSE;
218 #ifdef WIN32
219  // under Windows, switch progress bar and color off by default, as the typical windows shell doesn't handle these (would need different sequences..)
220  Bool_t color = kFALSE;
221  Bool_t drawProgressBar = kFALSE;
222 #else
223  Bool_t color = !gROOT->IsBatch();
224  Bool_t drawProgressBar = kTRUE;
225 #endif
226  DeclareOptionRef( fVerbose, "V", "Verbose flag" );
227  DeclareOptionRef( color, "Color", "Flag for coloured screen output (default: True, if in batch mode: False)" );
228  DeclareOptionRef( fTransformations, "Transformations", "List of transformations to test; formatting example: \"Transformations=I;D;P;U;G,D\", for identity, decorrelation, PCA, Uniform and Gaussianisation followed by decorrelation transformations" );
229  DeclareOptionRef( fCorrelations, "Correlations", "boolean to show correlation in output" );
230  DeclareOptionRef( fROC, "ROC", "boolean to show ROC in output" );
231  DeclareOptionRef( silent, "Silent", "Batch mode: boolean silent flag inhibiting any output from TMVA after the creation of the factory class object (default: False)" );
232  DeclareOptionRef( drawProgressBar,
233  "DrawProgressBar", "Draw progress bar to display training, testing and evaluation schedule (default: True)" );
235  "ModelPersistence",
236  "Option to save the trained model in xml file or using serialization");
237 
238  TString analysisType("Auto");
239  DeclareOptionRef( analysisType,
240  "AnalysisType", "Set the analysis type (Classification, Regression, Multiclass, Auto) (default: Auto)" );
241  AddPreDefVal(TString("Classification"));
242  AddPreDefVal(TString("Regression"));
243  AddPreDefVal(TString("Multiclass"));
244  AddPreDefVal(TString("Auto"));
245 
246  ParseOptions();
248 
249  if (Verbose()) Log().SetMinType( kVERBOSE );
250 
251  // global settings
252  gConfig().SetUseColor( color );
253  gConfig().SetSilent( silent );
254  gConfig().SetDrawProgressBar( drawProgressBar );
255 
256  analysisType.ToLower();
257  if ( analysisType == "classification" ) fAnalysisType = Types::kClassification;
258  else if( analysisType == "regression" ) fAnalysisType = Types::kRegression;
259  else if( analysisType == "multiclass" ) fAnalysisType = Types::kMulticlass;
260  else if( analysisType == "auto" ) fAnalysisType = Types::kNoAnalysisType;
261 
262  Greetings();
263 }
264 
265 
266 ////////////////////////////////////////////////////////////////////////////////
267 /// print welcome message
268 /// options are: kLogoWelcomeMsg, kIsometricWelcomeMsg, kLeanWelcomeMsg
269 
271 {
273  gTools().TMVAWelcomeMessage( Log(), gTools().kLogoWelcomeMsg );
274  gTools().TMVAVersionMessage( Log() ); Log() << Endl;
275 }
276 
277 //_______________________________________________________________________
279 {
280  return fSilentFile;
281 }
282 
283 //_______________________________________________________________________
285 {
286  return fModelPersistence;
287 }
288 
289 ////////////////////////////////////////////////////////////////////////////////
290 /// destructor
291 /// delete fATreeEvent;
292 
294 {
295  std::vector<TMVA::VariableTransformBase*>::iterator trfIt = fDefaultTrfs.begin();
296  for (;trfIt != fDefaultTrfs.end(); trfIt++) delete (*trfIt);
297 
298  this->DeleteAllMethods();
299 
300 
301  // problem with call of REGISTER_METHOD macro ...
302  // ClassifierFactory::DestroyInstance();
303  // Types::DestroyInstance();
306 }
307 
308 ////////////////////////////////////////////////////////////////////////////////
309 /// delete methods
310 
312 {
313  std::map<TString,MVector*>::iterator itrMap;
314 
315  for(itrMap = fMethodsMap.begin();itrMap != fMethodsMap.end();itrMap++)
316  {
317  MVector *methods=itrMap->second;
318  // delete methods
319  MVector::iterator itrMethod = methods->begin();
320  for (; itrMethod != methods->end(); itrMethod++) {
321  Log() << kDEBUG << "Delete method: " << (*itrMethod)->GetName() << Endl;
322  delete (*itrMethod);
323  }
324  methods->clear();
325  delete methods;
326  }
327 }
328 
329 ////////////////////////////////////////////////////////////////////////////////
330 
332 {
333  fVerbose = v;
334 }
335 
336 //_______________________________________________________________________
337 TMVA::MethodBase* TMVA::Factory::BookMethod( TMVA::DataLoader *loader, TString theMethodName, TString methodTitle, TString theOption )
338 {
339  // Book a classifier or regression method
340  if(fModelPersistence) gSystem->MakeDirectory(loader->GetName());//creating directory for DataLoader output
341 
342  TString datasetname=loader->GetName();
343 
345  if( loader->DefaultDataSetInfo().GetNClasses()==2
346  && loader->DefaultDataSetInfo().GetClassInfo("Signal") != NULL
347  && loader->DefaultDataSetInfo().GetClassInfo("Background") != NULL
348  ){
349  fAnalysisType = Types::kClassification; // default is classification
350  } else if( loader->DefaultDataSetInfo().GetNClasses() >= 2 ){
351  fAnalysisType = Types::kMulticlass; // if two classes, but not named "Signal" and "Background"
352  } else
353  Log() << kFATAL << "No analysis type for " << loader->DefaultDataSetInfo().GetNClasses() << " classes and "
354  << loader->DefaultDataSetInfo().GetNTargets() << " regression targets." << Endl;
355  }
356 
357  // booking via name; the names are translated into enums and the
358  // corresponding overloaded BookMethod is called
359 
360  if(fMethodsMap.find(datasetname)!=fMethodsMap.end())
361  {
362  if (GetMethod( datasetname,methodTitle ) != 0) {
363  Log() << kFATAL << "Booking failed since method with title <"
364  << methodTitle <<"> already exists "<< "in with DataSet Name <"<< loader->GetName()<<"> "
365  << Endl;
366  }
367  }
368 
369 
370  Log() << kHEADER << "Booking method: " << gTools().Color("bold") << methodTitle
371  // << gTools().Color("reset")<<" DataSet Name: "<<gTools().Color("bold")<<loader->GetName()
372  << gTools().Color("reset") << Endl << Endl;
373 
374  // interpret option string with respect to a request for boosting (i.e., BostNum > 0)
375  Int_t boostNum = 0;
376  TMVA::Configurable* conf = new TMVA::Configurable( theOption );
377  conf->DeclareOptionRef( boostNum = 0, "Boost_num",
378  "Number of times the classifier will be boosted" );
379  conf->ParseOptions();
380  delete conf;
381  TString fFileDir;
383  {
384  fFileDir=loader->GetName();
385  fFileDir+="/"+gConfig().GetIONames().fWeightFileDir;
386  }
387  // initialize methods
388  IMethod* im;
389  if (!boostNum) {
390  im = ClassifierFactory::Instance().Create( std::string(theMethodName),
391  fJobName,
392  methodTitle,
393  loader->DefaultDataSetInfo(),
394  theOption );
395  }
396  else {
397  // boosted classifier, requires a specific definition, making it transparent for the user
398  Log() << kDEBUG <<"Boost Number is " << boostNum << " > 0: train boosted classifier" << Endl;
399  im = ClassifierFactory::Instance().Create( std::string("Boost"),
400  fJobName,
401  methodTitle,
402  loader->DefaultDataSetInfo(),
403  theOption );
404  MethodBoost* methBoost = dynamic_cast<MethodBoost*>(im); // DSMTEST divided into two lines
405  if (!methBoost) // DSMTEST
406  Log() << kFATAL << "Method with type kBoost cannot be casted to MethodCategory. /Factory" << Endl; // DSMTEST
407 
408  if(fModelPersistence) methBoost->SetWeightFileDir(fFileDir);
410  methBoost->SetBoostedMethodName( theMethodName ); // DSMTEST divided into two lines
411  methBoost->fDataSetManager = loader->fDataSetManager; // DSMTEST
412  methBoost->SetFile(fgTargetFile);
413  methBoost->SetSilentFile(IsSilentFile());
414  }
415 
416  MethodBase *method = dynamic_cast<MethodBase*>(im);
417  if (method==0) return 0; // could not create method
418 
419  // set fDataSetManager if MethodCategory (to enable Category to create datasetinfo objects) // DSMTEST
420  if (method->GetMethodType() == Types::kCategory) { // DSMTEST
421  MethodCategory *methCat = (dynamic_cast<MethodCategory*>(im)); // DSMTEST
422  if (!methCat) // DSMTEST
423  Log() << kFATAL << "Method with type kCategory cannot be casted to MethodCategory. /Factory" << Endl; // DSMTEST
424 
425  if(fModelPersistence) methCat->SetWeightFileDir(fFileDir);
427  methCat->fDataSetManager = loader->fDataSetManager; // DSMTEST
428  methCat->SetFile(fgTargetFile);
429  methCat->SetSilentFile(IsSilentFile());
430  } // DSMTEST
431 
432 
433  if (!method->HasAnalysisType( fAnalysisType,
434  loader->DefaultDataSetInfo().GetNClasses(),
435  loader->DefaultDataSetInfo().GetNTargets() )) {
436  Log() << kWARNING << "Method " << method->GetMethodTypeName() << " is not capable of handling " ;
438  Log() << "regression with " << loader->DefaultDataSetInfo().GetNTargets() << " targets." << Endl;
439  }
440  else if (fAnalysisType == Types::kMulticlass ) {
441  Log() << "multiclass classification with " << loader->DefaultDataSetInfo().GetNClasses() << " classes." << Endl;
442  }
443  else {
444  Log() << "classification with " << loader->DefaultDataSetInfo().GetNClasses() << " classes." << Endl;
445  }
446  return 0;
447  }
448 
449  if(fModelPersistence) method->SetWeightFileDir(fFileDir);
451  method->SetAnalysisType( fAnalysisType );
452  method->SetupMethod();
453  method->ParseOptions();
454  method->ProcessSetup();
455  method->SetFile(fgTargetFile);
456  method->SetSilentFile(IsSilentFile());
457 
458  // check-for-unused-options is performed; may be overridden by derived classes
459  method->CheckSetup();
460 
461  if(fMethodsMap.find(datasetname)==fMethodsMap.end())
462  {
463  MVector *mvector=new MVector;
464  fMethodsMap[datasetname]=mvector;
465  }
466  fMethodsMap[datasetname]->push_back( method );
467  return method;
468 }
469 
470 //_______________________________________________________________________
472 {
473  // books MVA method; the option configuration string is custom for each MVA
474  // the TString field "theNameAppendix" serves to define (and distinguish)
475  // several instances of a given MVA, eg, when one wants to compare the
476  // performance of various configurations
477  return BookMethod(loader, Types::Instance().GetMethodName( theMethod ), methodTitle, theOption );
478 }
479 
480 //_______________________________________________________________________
481 TMVA::IMethod* TMVA::Factory::GetMethod(const TString& datasetname, const TString &methodTitle ) const
482 {
483  // returns pointer to MVA that corresponds to given method title
484 
485  if(fMethodsMap.find(datasetname)==fMethodsMap.end()) return 0;
486 
487  MVector *methods=fMethodsMap.find(datasetname)->second;
488 
489  MVector::const_iterator itrMethod;
490  //
491  for (itrMethod = methods->begin(); itrMethod != methods->end(); itrMethod++) {
492  MethodBase* mva = dynamic_cast<MethodBase*>(*itrMethod);
493  if ( (mva->GetMethodName())==methodTitle ) return mva;
494  }
495  return 0;
496 }
497 
498 //_______________________________________________________________________
500 {
501  RootBaseDir()->cd();
502 
503  if(!RootBaseDir()->GetDirectory(fDataSetInfo.GetName())) RootBaseDir()->mkdir(fDataSetInfo.GetName());
504  else return; //loader is now in the output file, we dont need to save again
505 
506  RootBaseDir()->cd(fDataSetInfo.GetName());
507  fDataSetInfo.GetDataSet(); // builds dataset (including calculation of correlation matrix)
508 
509 
510  // correlation matrix of the default DS
511  const TMatrixD* m(0);
512  const TH2* h(0);
513 
515  for (UInt_t cls = 0; cls < fDataSetInfo.GetNClasses() ; cls++) {
516  m = fDataSetInfo.CorrelationMatrix(fDataSetInfo.GetClassInfo(cls)->GetName());
517  h = fDataSetInfo.CreateCorrelationMatrixHist(m, TString("CorrelationMatrix")+fDataSetInfo.GetClassInfo(cls)->GetName(),
518  TString("Correlation Matrix (")+ fDataSetInfo.GetClassInfo(cls)->GetName() +TString(")"));
519  if (h!=0) {
520  h->Write();
521  delete h;
522  }
523  }
524  }
525  else{
526  m = fDataSetInfo.CorrelationMatrix( "Signal" );
527  h = fDataSetInfo.CreateCorrelationMatrixHist(m, "CorrelationMatrixS", "Correlation Matrix (signal)");
528  if (h!=0) {
529  h->Write();
530  delete h;
531  }
532 
533  m = fDataSetInfo.CorrelationMatrix( "Background" );
534  h = fDataSetInfo.CreateCorrelationMatrixHist(m, "CorrelationMatrixB", "Correlation Matrix (background)");
535  if (h!=0) {
536  h->Write();
537  delete h;
538  }
539 
540  m = fDataSetInfo.CorrelationMatrix( "Regression" );
541  h = fDataSetInfo.CreateCorrelationMatrixHist(m, "CorrelationMatrix", "Correlation Matrix");
542  if (h!=0) {
543  h->Write();
544  delete h;
545  }
546  }
547 
548  // some default transformations to evaluate
549  // NOTE: all transformations are destroyed after this test
550  TString processTrfs = "I"; //"I;N;D;P;U;G,D;"
551 
552  // plus some user defined transformations
553  processTrfs = fTransformations;
554 
555  // remove any trace of identity transform - if given (avoid to apply it twice)
556  std::vector<TMVA::TransformationHandler*> trfs;
557  TransformationHandler* identityTrHandler = 0;
558 
559  std::vector<TString> trfsDef = gTools().SplitString(processTrfs,';');
560  std::vector<TString>::iterator trfsDefIt = trfsDef.begin();
561  for (; trfsDefIt!=trfsDef.end(); trfsDefIt++) {
562  trfs.push_back(new TMVA::TransformationHandler(fDataSetInfo, "Factory"));
563  TString trfS = (*trfsDefIt);
564 
565  //Log() << kINFO << Endl;
566  Log() << kDEBUG << "current transformation string: '" << trfS.Data() << "'" << Endl;
568  fDataSetInfo,
569  *(trfs.back()),
570  Log() );
571 
572  if (trfS.BeginsWith('I')) identityTrHandler = trfs.back();
573  }
574 
575  const std::vector<Event*>& inputEvents = fDataSetInfo.GetDataSet()->GetEventCollection();
576 
577  // apply all transformations
578  std::vector<TMVA::TransformationHandler*>::iterator trfIt = trfs.begin();
579 
580  for (;trfIt != trfs.end(); trfIt++) {
581  // setting a Root dir causes the variables distributions to be saved to the root file
582  (*trfIt)->SetRootDir(RootBaseDir()->GetDirectory(fDataSetInfo.GetName()));// every dataloader have its own dir
583  (*trfIt)->CalcTransformations(inputEvents);
584  }
585  if(identityTrHandler) identityTrHandler->PrintVariableRanking();
586 
587  // clean up
588  for (trfIt = trfs.begin(); trfIt != trfs.end(); trfIt++) delete *trfIt;
589 }
590 
591 ////////////////////////////////////////////////////////////////////////////////
592 /// iterates through all booked methods and sees if they use parameter tuning and if so..
593 /// does just that i.e. calls "Method::Train()" for different parameter setttings and
594 /// keeps in mind the "optimal one"... and that's the one that will later on be used
595 /// in the main training loop.
596 
597 std::map<TString,Double_t> TMVA::Factory::OptimizeAllMethods(TString fomType, TString fitType)
598 {
599 
600  std::map<TString,MVector*>::iterator itrMap;
601  std::map<TString,Double_t> TunedParameters;
602  for(itrMap = fMethodsMap.begin();itrMap != fMethodsMap.end();itrMap++)
603  {
604  MVector *methods=itrMap->second;
605 
606  MVector::iterator itrMethod;
607 
608  // iterate over methods and optimize
609  for( itrMethod = methods->begin(); itrMethod != methods->end(); itrMethod++ ) {
611  MethodBase* mva = dynamic_cast<MethodBase*>(*itrMethod);
612  if (!mva) {
613  Log() << kFATAL << "Dynamic cast to MethodBase failed" <<Endl;
614  return TunedParameters;
615  }
616 
617  if (mva->Data()->GetNTrainingEvents() < MinNoTrainingEvents) {
618  Log() << kWARNING << "Method " << mva->GetMethodName()
619  << " not trained (training tree has less entries ["
620  << mva->Data()->GetNTrainingEvents()
621  << "] than required [" << MinNoTrainingEvents << "]" << Endl;
622  continue;
623  }
624 
625  Log() << kINFO << "Optimize method: " << mva->GetMethodName() << " for "
626  << (fAnalysisType == Types::kRegression ? "Regression" :
627  (fAnalysisType == Types::kMulticlass ? "Multiclass classification" : "Classification")) << Endl;
628 
629  TunedParameters = mva->OptimizeTuningParameters(fomType,fitType);
630  Log() << kINFO << "Optimization of tuning paremters finished for Method:"<<mva->GetName() << Endl;
631  }
632  }
633 
634  return TunedParameters;
635 
636 }
637 
638 //_______________________________________________________________________
640 {
641  return GetROCIntegral((TString)loader->GetName(),theMethodName);
642 }
643 
644 //_______________________________________________________________________
646 {
647  if (fMethodsMap.find(datasetname) == fMethodsMap.end()) {
648  Log() << kERROR << Form("DataSet = %s not found in methods map.", datasetname.Data()) << Endl;
649  return 0;
650  }
651  MVector *methods = fMethodsMap[datasetname.Data()];
652  MVector::iterator itrMethod = methods->begin();
653  TMVA::MethodBase *method = 0;
654  while (itrMethod != methods->end()) {
655  TMVA::MethodBase *cmethod = dynamic_cast<TMVA::MethodBase *>(*itrMethod);
656  if (!cmethod) {
657  //msg of error here
658  itrMethod++;
659  continue;
660  }
661  if (cmethod->GetMethodName() == theMethodName) {
662  method = cmethod;
663  break;
664  }
665  itrMethod++;
666  }
667 
668  if (!method) {
669  Log() << kERROR << Form("Method = %s not found with Dataset = %s ", theMethodName.Data(), datasetname.Data()) << Endl;
670  return 0;
671  }
672 
674 
675  std::vector<Float_t> *mvaRes = dynamic_cast<ResultsClassification *>(results)->GetValueVector();
676  std::vector<Bool_t> *mvaResType = dynamic_cast<ResultsClassification *>(results)->GetValueVectorTypes();
677 
678  TMVA::ROCCurve *fROCCurve = new TMVA::ROCCurve(*mvaRes, *mvaResType);
679  if (!fROCCurve) Log() << kFATAL << Form("ROCCurve object was not created in Method = %s not found with Dataset = %s ", theMethodName.Data(), datasetname.Data()) << Endl;
680 
681  Double_t fROCalcValue = fROCCurve->GetROCIntegral();
682 
683  return fROCalcValue;
684 }
685 
686 //_______________________________________________________________________
688 {
689  return GetROCCurve((TString)loader->GetName(),theMethodName,fLegend);
690 }
691 
692 //_______________________________________________________________________
693 TGraph* TMVA::Factory::GetROCCurve(TString datasetname,TString theMethodName,Bool_t fLegend)
694 {
695  if (fMethodsMap.find(datasetname) == fMethodsMap.end()) {
696  Log() << kERROR << Form("DataSet = %s not found in methods map.", datasetname.Data()) << Endl;
697  return 0;
698  }
699  MVector *methods = fMethodsMap[datasetname.Data()];
700  MVector::iterator itrMethod = methods->begin();
701  TMVA::MethodBase *method = 0;
702  while (itrMethod != methods->end()) {
703  TMVA::MethodBase *cmethod = dynamic_cast<TMVA::MethodBase *>(*itrMethod);
704  if (!cmethod) {
705  //msg of error here
706  itrMethod++;
707  continue;
708  }
709  if (cmethod->GetMethodName() == theMethodName) {
710  method = cmethod;
711  break;
712  }
713  itrMethod++;
714  }
715 
716  if (!method) {
717  Log() << kERROR << Form("Method = %s not found with Dataset = %s ", theMethodName.Data(), datasetname.Data()) << Endl;
718  return 0;
719  }
720 
722 
723  std::vector<Float_t> *mvaRes = dynamic_cast<ResultsClassification *>(results)->GetValueVector();
724  std::vector<Bool_t> *mvaResType = dynamic_cast<ResultsClassification *>(results)->GetValueVectorTypes();
725 
726  TMVA::ROCCurve *fROCCurve = new TMVA::ROCCurve(*mvaRes, *mvaResType);
727  if (!fROCCurve) Log() << kFATAL << Form("ROCCurve object was not created in Method = %s not found with Dataset = %s ", theMethodName.Data(), datasetname.Data()) << Endl;
728 
729  TGraph *fGraph = (TGraph *)fROCCurve->GetROCCurve()->Clone();
730  if(fLegend)
731  {
732  fGraph->GetYaxis()->SetTitle("Background Rejection");
733  fGraph->GetXaxis()->SetTitle("Signal Efficiency");
734  fGraph->SetTitle(Form("Background Rejection vs. Signal Efficiency (%s)",method->GetMethodName().Data()));
735  }
736  delete fROCCurve;
737  return fGraph;
738 }
739 
740 
741 //_______________________________________________________________________
743 {
744  return GetROCCurve((TString)loader->GetName());
745 }
746 
747 //_______________________________________________________________________
749 {
750  // Lookup dataset.
751  if (fMethodsMap.find(datasetname) == fMethodsMap.end()) {
752  Log() << kERROR << Form("DataSet = %s not found in methods map.", datasetname.Data()) << Endl;
753  return 0;
754  }
755 
756  // Create canvas.
757  TString name("ROCCurve ");
758  name += datasetname;
759  TCanvas *fCanvas = new TCanvas(name,"ROC Curve",200,10,700,500);
760  fCanvas->SetGrid();
761  UInt_t line_color = 0; //Count line colors in canvas.
762 
763  TLegend *fLegend = new TLegend(0.15, 0.15, 0.35, 0.3, "MVA Method");
764  TGraph *fGraph = nullptr;
765 
766  // Loop over dataset.
767  MVector *methods = fMethodsMap[datasetname.Data()];
768  MVector::iterator itr = methods->begin();
769 
770  while (itr != methods->end()) {
771 
772  TMVA::MethodBase *method = dynamic_cast<TMVA::MethodBase *>(*itr);
773  itr++;
774  if (!method) {
775  continue;
776  }
777  // Get results.
778  TMVA::Results *results = method->Data()->GetResults(method->GetMethodName(),
781 
782  std::vector<Float_t> *mvaRes =
783  dynamic_cast<ResultsClassification *>(results)->GetValueVector();
784  std::vector<Bool_t> *mvaResType =
785  dynamic_cast<ResultsClassification *>(results)->GetValueVectorTypes();
786 
787  // Generate ROCCurve.
788  TMVA::ROCCurve *fROCCurve = new TMVA::ROCCurve(*mvaRes, *mvaResType);
789  if (!fROCCurve)
790  Log() << kFATAL << Form("ROCCurve object was not created in Method = %s not found with Dataset = %s ", method->GetMethodName().Data(), datasetname.Data()) << Endl;
791  fGraph=(TGraph*)fROCCurve->GetROCCurve()->Clone();
792  delete fROCCurve;
793  // Draw axes.
794  if (line_color == 0)
795  {
796  fGraph->GetYaxis()->SetTitle("Background Rejection");
797  fGraph->GetXaxis()->SetTitle("Signal Efficiency");
798  fGraph->SetTitle("Background Rejection vs. Signal Efficiency");
799  fGraph->Draw("AC");
800  }
801  else
802  fGraph->Draw("C");
803 
804  fGraph->SetLineWidth(2);
805  fGraph->SetLineColor(++line_color);
806 
807  fLegend->AddEntry(fGraph, method->GetMethodName(), "l");
808  }
809 
810  // Draw legend.
811  fLegend->Draw();
812 
813  return fCanvas;
814 }
815 
816 
817 
818 
819 ////////////////////////////////////////////////////////////////////////////////
820 /// iterates through all booked methods and calls training
821 
823 {
824  Log() << kHEADER << gTools().Color("bold") << "Train all methods" << gTools().Color("reset") << Endl;
825  // iterates over all MVAs that have been booked, and calls their training methods
826 
827 
828  // don't do anything if no method booked
829  if (fMethodsMap.empty()) {
830  Log() << kINFO << "...nothing found to train" << Endl;
831  return;
832  }
833 
834  // here the training starts
835  //Log() << kINFO << " " << Endl;
836  Log() << kDEBUG << "Train all methods for "
837  << (fAnalysisType == Types::kRegression ? "Regression" :
838  (fAnalysisType == Types::kMulticlass ? "Multiclass" : "Classification") ) << " ..." << Endl;
839 
840  std::map<TString,MVector*>::iterator itrMap;
841 
842  for(itrMap = fMethodsMap.begin();itrMap != fMethodsMap.end();itrMap++)
843  {
844  MVector *methods=itrMap->second;
845  MVector::iterator itrMethod;
846 
847  // iterate over methods and train
848  for( itrMethod = methods->begin(); itrMethod != methods->end(); itrMethod++ ) {
850  MethodBase* mva = dynamic_cast<MethodBase*>(*itrMethod);
851 
852  if(mva==0) continue;
853 
854  if(mva->DataInfo().GetDataSetManager()->DataInput().GetEntries() <=1) { // 0 entries --> 0 events, 1 entry --> dynamical dataset (or one entry)
855  Log() << kFATAL << "No input data for the training provided!" << Endl;
856  }
857 
858  if(fAnalysisType == Types::kRegression && mva->DataInfo().GetNTargets() < 1 )
859  Log() << kFATAL << "You want to do regression training without specifying a target." << Endl;
860  else if( (fAnalysisType == Types::kMulticlass || fAnalysisType == Types::kClassification)
861  && mva->DataInfo().GetNClasses() < 2 )
862  Log() << kFATAL << "You want to do classification training, but specified less than two classes." << Endl;
863 
864  // first print some information about the default dataset
866 
867 
868  if (mva->Data()->GetNTrainingEvents() < MinNoTrainingEvents) {
869  Log() << kWARNING << "Method " << mva->GetMethodName()
870  << " not trained (training tree has less entries ["
871  << mva->Data()->GetNTrainingEvents()
872  << "] than required [" << MinNoTrainingEvents << "]" << Endl;
873  continue;
874  }
875 
876  Log() << kHEADER << "Train method: " << mva->GetMethodName() << " for "
877  << (fAnalysisType == Types::kRegression ? "Regression" :
878  (fAnalysisType == Types::kMulticlass ? "Multiclass classification" : "Classification")) << Endl << Endl;
879  mva->TrainMethod();
880  Log() << kHEADER << "Training finished" << Endl << Endl;
881  }
882 
883  if (fAnalysisType != Types::kRegression) {
884 
885  // variable ranking
886  //Log() << Endl;
887  Log() << kINFO << "Ranking input variables (method specific)..." << Endl;
888  for (itrMethod = methods->begin(); itrMethod != methods->end(); itrMethod++) {
889  MethodBase* mva = dynamic_cast<MethodBase*>(*itrMethod);
890  if (mva && mva->Data()->GetNTrainingEvents() >= MinNoTrainingEvents) {
891 
892  // create and print ranking
893  const Ranking* ranking = (*itrMethod)->CreateRanking();
894  if (ranking != 0) ranking->Print();
895  else Log() << kINFO << "No variable ranking supplied by classifier: "
896  << dynamic_cast<MethodBase*>(*itrMethod)->GetMethodName() << Endl;
897  }
898  }
899  }
900 
901  // delete all methods and recreate them from weight file - this ensures that the application
902  // of the methods (in TMVAClassificationApplication) is consistent with the results obtained
903  // in the testing
904  //Log() << Endl;
905  if (fModelPersistence) {
906 
907  Log() << kHEADER << "=== Destroy and recreate all methods via weight files for testing ===" << Endl << Endl;
908 
909  if(!IsSilentFile())RootBaseDir()->cd();
910 
911  // iterate through all booked methods
912  for (UInt_t i=0; i<methods->size(); i++) {
913 
914  MethodBase* m = dynamic_cast<MethodBase*>((*methods)[i]);
915  if(m==0) continue;
916 
917  TMVA::Types::EMVA methodType = m->GetMethodType();
918  TString weightfile = m->GetWeightFileName();
919 
920  // decide if .txt or .xml file should be read:
921  if (READXML) weightfile.ReplaceAll(".txt",".xml");
922 
923  DataSetInfo& dataSetInfo = m->DataInfo();
924  TString testvarName = m->GetTestvarName();
925  delete m; //itrMethod[i];
926 
927  // recreate
928  m = dynamic_cast<MethodBase*>( ClassifierFactory::Instance()
929  .Create( std::string(Types::Instance().GetMethodName(methodType)),
930  dataSetInfo, weightfile ) );
931  if( m->GetMethodType() == Types::kCategory ){
932  MethodCategory *methCat = (dynamic_cast<MethodCategory*>(m));
933  if( !methCat ) Log() << kFATAL << "Method with type kCategory cannot be casted to MethodCategory. /Factory" << Endl;
934  else methCat->fDataSetManager = m->DataInfo().GetDataSetManager();
935  }
936  //ToDo, Do we need to fill the DataSetManager of MethodBoost here too?
937 
938 
939  TString fFileDir= m->DataInfo().GetName();
940  fFileDir+="/"+gConfig().GetIONames().fWeightFileDir;
941  m->SetWeightFileDir(fFileDir);
944  m->SetAnalysisType(fAnalysisType);
945  m->SetupMethod();
946  m->ReadStateFromFile();
947  m->SetTestvarName(testvarName);
948 
949  // replace trained method by newly created one (from weight file) in methods vector
950  (*methods)[i] = m;
951  }
952  }
953  }
954 }
955 
956 ////////////////////////////////////////////////////////////////////////////////
957 
959 {
960  Log() << kHEADER << gTools().Color("bold") << "Test all methods" << gTools().Color("reset") << Endl;
961 
962  // don't do anything if no method booked
963  if (fMethodsMap.empty()) {
964  Log() << kINFO << "...nothing found to test" << Endl;
965  return;
966  }
967  std::map<TString,MVector*>::iterator itrMap;
968 
969  for(itrMap = fMethodsMap.begin();itrMap != fMethodsMap.end();itrMap++)
970  {
971  MVector *methods=itrMap->second;
972  MVector::iterator itrMethod;
973 
974  // iterate over methods and test
975  for( itrMethod = methods->begin(); itrMethod != methods->end(); itrMethod++ ) {
977  MethodBase* mva = dynamic_cast<MethodBase*>(*itrMethod);
978  if(mva==0) continue;
979  Types::EAnalysisType analysisType = mva->GetAnalysisType();
980  Log() << kHEADER << "Test method: " << mva->GetMethodName() << " for "
981  << (analysisType == Types::kRegression ? "Regression" :
982  (analysisType == Types::kMulticlass ? "Multiclass classification" : "Classification")) << " performance" << Endl << Endl;
983  mva->AddOutput( Types::kTesting, analysisType );
984  }
985  }
986 }
987 
988 //_______________________________________________________________________
989 void TMVA::Factory::MakeClass(const TString& datasetname , const TString& methodTitle ) const
990 {
991  if (methodTitle != "") {
992  IMethod* method = GetMethod(datasetname, methodTitle);
993  if (method) method->MakeClass();
994  else {
995  Log() << kWARNING << "<MakeClass> Could not find classifier \"" << methodTitle
996  << "\" in list" << Endl;
997  }
998  }
999  else {
1000 
1001  // no classifier specified, print all hepl messages
1002  MVector *methods=fMethodsMap.find(datasetname)->second;
1003  MVector::const_iterator itrMethod;
1004  for (itrMethod = methods->begin(); itrMethod != methods->end(); itrMethod++) {
1005  MethodBase* method = dynamic_cast<MethodBase*>(*itrMethod);
1006  if(method==0) continue;
1007  Log() << kINFO << "Make response class for classifier: " << method->GetMethodName() << Endl;
1008  method->MakeClass();
1009  }
1010  }
1011 }
1012 
1013 ////////////////////////////////////////////////////////////////////////////////
1014 /// Print predefined help message of classifier
1015 /// iterate over methods and test
1016 
1017 void TMVA::Factory::PrintHelpMessage(const TString& datasetname , const TString& methodTitle ) const
1018 {
1019  if (methodTitle != "") {
1020  IMethod* method = GetMethod(datasetname , methodTitle );
1021  if (method) method->PrintHelpMessage();
1022  else {
1023  Log() << kWARNING << "<PrintHelpMessage> Could not find classifier \"" << methodTitle
1024  << "\" in list" << Endl;
1025  }
1026  }
1027  else {
1028 
1029  // no classifier specified, print all hepl messages
1030  MVector *methods=fMethodsMap.find(datasetname)->second;
1031  MVector::const_iterator itrMethod ;
1032  for (itrMethod = methods->begin(); itrMethod != methods->end(); itrMethod++) {
1033  MethodBase* method = dynamic_cast<MethodBase*>(*itrMethod);
1034  if(method==0) continue;
1035  Log() << kINFO << "Print help message for classifier: " << method->GetMethodName() << Endl;
1036  method->PrintHelpMessage();
1037  }
1038  }
1039 }
1040 
1041 ////////////////////////////////////////////////////////////////////////////////
1042 /// iterates over all MVA input varables and evaluates them
1043 
1045 {
1046  Log() << kINFO << "Evaluating all variables..." << Endl;
1048 
1049  for (UInt_t i=0; i<loader->DefaultDataSetInfo().GetNVariables(); i++) {
1051  if (options.Contains("V")) s += ":V";
1052  this->BookMethod(loader, "Variable", s );
1053  }
1054 }
1055 
1056 ////////////////////////////////////////////////////////////////////////////////
1057 /// iterates over all MVAs that have been booked, and calls their evaluation methods
1058 
1060 {
1061  Log() << kHEADER << gTools().Color("bold") << "Evaluate all methods" << gTools().Color("reset") << Endl;
1062 
1063  // don't do anything if no method booked
1064  if (fMethodsMap.empty()) {
1065  Log() << kINFO << "...nothing found to evaluate" << Endl;
1066  return;
1067  }
1068  std::map<TString,MVector*>::iterator itrMap;
1069 
1070  for(itrMap = fMethodsMap.begin();itrMap != fMethodsMap.end();itrMap++)
1071  {
1072  MVector *methods=itrMap->second;
1073 
1074  // -----------------------------------------------------------------------
1075  // First part of evaluation process
1076  // --> compute efficiencies, and other separation estimators
1077  // -----------------------------------------------------------------------
1078 
1079  // although equal, we now want to seperate the outpuf for the variables
1080  // and the real methods
1081  Int_t isel; // will be 0 for a Method; 1 for a Variable
1082  Int_t nmeth_used[2] = {0,0}; // 0 Method; 1 Variable
1083 
1084  std::vector<std::vector<TString> > mname(2);
1085  std::vector<std::vector<Double_t> > sig(2), sep(2), roc(2);
1086  std::vector<std::vector<Double_t> > eff01(2), eff10(2), eff30(2), effArea(2);
1087  std::vector<std::vector<Double_t> > eff01err(2), eff10err(2), eff30err(2);
1088  std::vector<std::vector<Double_t> > trainEff01(2), trainEff10(2), trainEff30(2);
1089 
1090  std::vector<std::vector<Float_t> > multiclass_testEff;
1091  std::vector<std::vector<Float_t> > multiclass_trainEff;
1092  std::vector<std::vector<Float_t> > multiclass_testPur;
1093  std::vector<std::vector<Float_t> > multiclass_trainPur;
1094 
1095  std::vector<std::vector<Double_t> > biastrain(1); // "bias" of the regression on the training data
1096  std::vector<std::vector<Double_t> > biastest(1); // "bias" of the regression on test data
1097  std::vector<std::vector<Double_t> > devtrain(1); // "dev" of the regression on the training data
1098  std::vector<std::vector<Double_t> > devtest(1); // "dev" of the regression on test data
1099  std::vector<std::vector<Double_t> > rmstrain(1); // "rms" of the regression on the training data
1100  std::vector<std::vector<Double_t> > rmstest(1); // "rms" of the regression on test data
1101  std::vector<std::vector<Double_t> > minftrain(1); // "minf" of the regression on the training data
1102  std::vector<std::vector<Double_t> > minftest(1); // "minf" of the regression on test data
1103  std::vector<std::vector<Double_t> > rhotrain(1); // correlation of the regression on the training data
1104  std::vector<std::vector<Double_t> > rhotest(1); // correlation of the regression on test data
1105 
1106  // same as above but for 'truncated' quantities (computed for events within 2sigma of RMS)
1107  std::vector<std::vector<Double_t> > biastrainT(1);
1108  std::vector<std::vector<Double_t> > biastestT(1);
1109  std::vector<std::vector<Double_t> > devtrainT(1);
1110  std::vector<std::vector<Double_t> > devtestT(1);
1111  std::vector<std::vector<Double_t> > rmstrainT(1);
1112  std::vector<std::vector<Double_t> > rmstestT(1);
1113  std::vector<std::vector<Double_t> > minftrainT(1);
1114  std::vector<std::vector<Double_t> > minftestT(1);
1115 
1116  // following vector contains all methods - with the exception of Cuts, which are special
1117  MVector methodsNoCuts;
1118 
1119  Bool_t doRegression = kFALSE;
1120  Bool_t doMulticlass = kFALSE;
1121 
1122  // iterate over methods and evaluate
1123  for (MVector::iterator itrMethod =methods->begin(); itrMethod != methods->end(); itrMethod++) {
1125  MethodBase* theMethod = dynamic_cast<MethodBase*>(*itrMethod);
1126  if(theMethod==0) continue;
1127  theMethod->SetFile(fgTargetFile);
1128  theMethod->SetSilentFile(IsSilentFile());
1129  if (theMethod->GetMethodType() != Types::kCuts) methodsNoCuts.push_back( *itrMethod );
1130 
1131  if (theMethod->DoRegression()) {
1132  doRegression = kTRUE;
1133 
1134  Log() << kINFO << "Evaluate regression method: " << theMethod->GetMethodName() << Endl;
1135  Double_t bias, dev, rms, mInf;
1136  Double_t biasT, devT, rmsT, mInfT;
1137  Double_t rho;
1138 
1139  theMethod->TestRegression( bias, biasT, dev, devT, rms, rmsT, mInf, mInfT, rho, TMVA::Types::kTesting );
1140  biastest[0] .push_back( bias );
1141  devtest[0] .push_back( dev );
1142  rmstest[0] .push_back( rms );
1143  minftest[0] .push_back( mInf );
1144  rhotest[0] .push_back( rho );
1145  biastestT[0] .push_back( biasT );
1146  devtestT[0] .push_back( devT );
1147  rmstestT[0] .push_back( rmsT );
1148  minftestT[0] .push_back( mInfT );
1149 
1150  theMethod->TestRegression( bias, biasT, dev, devT, rms, rmsT, mInf, mInfT, rho, TMVA::Types::kTraining );
1151  biastrain[0] .push_back( bias );
1152  devtrain[0] .push_back( dev );
1153  rmstrain[0] .push_back( rms );
1154  minftrain[0] .push_back( mInf );
1155  rhotrain[0] .push_back( rho );
1156  biastrainT[0].push_back( biasT );
1157  devtrainT[0] .push_back( devT );
1158  rmstrainT[0] .push_back( rmsT );
1159  minftrainT[0].push_back( mInfT );
1160 
1161  mname[0].push_back( theMethod->GetMethodName() );
1162  nmeth_used[0]++;
1163  if(!IsSilentFile())
1164  {
1165  Log() << kDEBUG << "\tWrite evaluation histograms to file" << Endl;
1168  }
1169  }
1170  else if (theMethod->DoMulticlass()) {
1171  doMulticlass = kTRUE;
1172  Log() << kINFO << "Evaluate multiclass classification method: " << theMethod->GetMethodName() << Endl;
1173  if(!IsSilentFile())
1174  {
1175  Log() << kDEBUG << "\tWrite evaluation histograms to file" << Endl;
1178  }
1179  theMethod->TestMulticlass();
1180  multiclass_testEff.push_back(theMethod->GetMulticlassEfficiency(multiclass_testPur));
1181 
1182  nmeth_used[0]++;
1183  mname[0].push_back( theMethod->GetMethodName() );
1184  }
1185  else {
1186 
1187  Log() << kHEADER << "Evaluate classifier: " << theMethod->GetMethodName() << Endl << Endl;
1188  isel = (theMethod->GetMethodTypeName().Contains("Variable")) ? 1 : 0;
1189 
1190  // perform the evaluation
1191  theMethod->TestClassification();
1192 
1193 
1194  // evaluate the classifier
1195  mname[isel].push_back( theMethod->GetMethodName() );
1196  sig[isel].push_back ( theMethod->GetSignificance() );
1197  sep[isel].push_back ( theMethod->GetSeparation() );
1198  roc[isel].push_back ( theMethod->GetROCIntegral() );
1199 
1200  Double_t err;
1201  eff01[isel].push_back( theMethod->GetEfficiency("Efficiency:0.01", Types::kTesting, err) );
1202  eff01err[isel].push_back( err );
1203  eff10[isel].push_back( theMethod->GetEfficiency("Efficiency:0.10", Types::kTesting, err) );
1204  eff10err[isel].push_back( err );
1205  eff30[isel].push_back( theMethod->GetEfficiency("Efficiency:0.30", Types::kTesting, err) );
1206  eff30err[isel].push_back( err );
1207  effArea[isel].push_back( theMethod->GetEfficiency("", Types::kTesting, err) ); // computes the area (average)
1208 
1209  trainEff01[isel].push_back( theMethod->GetTrainingEfficiency("Efficiency:0.01") ); // the first pass takes longer
1210  trainEff10[isel].push_back( theMethod->GetTrainingEfficiency("Efficiency:0.10") );
1211  trainEff30[isel].push_back( theMethod->GetTrainingEfficiency("Efficiency:0.30") );
1212 
1213  nmeth_used[isel]++;
1214 
1215  if(!IsSilentFile())
1216  {
1217  Log() << kDEBUG << "\tWrite evaluation histograms to file" << Endl;
1220  }
1221  }
1222  }
1223  if (doRegression) {
1224 
1225  std::vector<TString> vtemps = mname[0];
1226  std::vector< std::vector<Double_t> > vtmp;
1227  vtmp.push_back( devtest[0] ); // this is the vector that is ranked
1228  vtmp.push_back( devtrain[0] );
1229  vtmp.push_back( biastest[0] );
1230  vtmp.push_back( biastrain[0] );
1231  vtmp.push_back( rmstest[0] );
1232  vtmp.push_back( rmstrain[0] );
1233  vtmp.push_back( minftest[0] );
1234  vtmp.push_back( minftrain[0] );
1235  vtmp.push_back( rhotest[0] );
1236  vtmp.push_back( rhotrain[0] );
1237  vtmp.push_back( devtestT[0] ); // this is the vector that is ranked
1238  vtmp.push_back( devtrainT[0] );
1239  vtmp.push_back( biastestT[0] );
1240  vtmp.push_back( biastrainT[0]);
1241  vtmp.push_back( rmstestT[0] );
1242  vtmp.push_back( rmstrainT[0] );
1243  vtmp.push_back( minftestT[0] );
1244  vtmp.push_back( minftrainT[0]);
1245  gTools().UsefulSortAscending( vtmp, &vtemps );
1246  mname[0] = vtemps;
1247  devtest[0] = vtmp[0];
1248  devtrain[0] = vtmp[1];
1249  biastest[0] = vtmp[2];
1250  biastrain[0] = vtmp[3];
1251  rmstest[0] = vtmp[4];
1252  rmstrain[0] = vtmp[5];
1253  minftest[0] = vtmp[6];
1254  minftrain[0] = vtmp[7];
1255  rhotest[0] = vtmp[8];
1256  rhotrain[0] = vtmp[9];
1257  devtestT[0] = vtmp[10];
1258  devtrainT[0] = vtmp[11];
1259  biastestT[0] = vtmp[12];
1260  biastrainT[0] = vtmp[13];
1261  rmstestT[0] = vtmp[14];
1262  rmstrainT[0] = vtmp[15];
1263  minftestT[0] = vtmp[16];
1264  minftrainT[0] = vtmp[17];
1265  }
1266  else if (doMulticlass) {
1267  // TODO: fill in something meaningfull
1268 
1269  }
1270  else {
1271  // now sort the variables according to the best 'eff at Beff=0.10'
1272  for (Int_t k=0; k<2; k++) {
1273  std::vector< std::vector<Double_t> > vtemp;
1274  vtemp.push_back( effArea[k] ); // this is the vector that is ranked
1275  vtemp.push_back( eff10[k] );
1276  vtemp.push_back( eff01[k] );
1277  vtemp.push_back( eff30[k] );
1278  vtemp.push_back( eff10err[k] );
1279  vtemp.push_back( eff01err[k] );
1280  vtemp.push_back( eff30err[k] );
1281  vtemp.push_back( trainEff10[k] );
1282  vtemp.push_back( trainEff01[k] );
1283  vtemp.push_back( trainEff30[k] );
1284  vtemp.push_back( sig[k] );
1285  vtemp.push_back( sep[k] );
1286  vtemp.push_back( roc[k] );
1287  std::vector<TString> vtemps = mname[k];
1288  gTools().UsefulSortDescending( vtemp, &vtemps );
1289  effArea[k] = vtemp[0];
1290  eff10[k] = vtemp[1];
1291  eff01[k] = vtemp[2];
1292  eff30[k] = vtemp[3];
1293  eff10err[k] = vtemp[4];
1294  eff01err[k] = vtemp[5];
1295  eff30err[k] = vtemp[6];
1296  trainEff10[k] = vtemp[7];
1297  trainEff01[k] = vtemp[8];
1298  trainEff30[k] = vtemp[9];
1299  sig[k] = vtemp[10];
1300  sep[k] = vtemp[11];
1301  roc[k] = vtemp[12];
1302  mname[k] = vtemps;
1303  }
1304  }
1305 
1306  // -----------------------------------------------------------------------
1307  // Second part of evaluation process
1308  // --> compute correlations among MVAs
1309  // --> compute correlations between input variables and MVA (determines importsance)
1310  // --> count overlaps
1311  // -----------------------------------------------------------------------
1312  if(fCorrelations)
1313  {
1314  const Int_t nmeth = methodsNoCuts.size();
1315  MethodBase* method = dynamic_cast<MethodBase*>(methods[0][0]);
1316  const Int_t nvar = method->fDataSetInfo.GetNVariables();
1317  if (!doRegression && !doMulticlass ) {
1318 
1319  if (nmeth > 0) {
1320 
1321  // needed for correlations
1322  Double_t *dvec = new Double_t[nmeth+nvar];
1323  std::vector<Double_t> rvec;
1324 
1325  // for correlations
1326  TPrincipal* tpSig = new TPrincipal( nmeth+nvar, "" );
1327  TPrincipal* tpBkg = new TPrincipal( nmeth+nvar, "" );
1328 
1329  // set required tree branch references
1330  Int_t ivar = 0;
1331  std::vector<TString>* theVars = new std::vector<TString>;
1332  std::vector<ResultsClassification*> mvaRes;
1333  for (MVector::iterator itrMethod = methodsNoCuts.begin(); itrMethod != methodsNoCuts.end(); itrMethod++, ivar++) {
1334  MethodBase* m = dynamic_cast<MethodBase*>(*itrMethod);
1335  if(m==0) continue;
1336  theVars->push_back( m->GetTestvarName() );
1337  rvec.push_back( m->GetSignalReferenceCut() );
1338  theVars->back().ReplaceAll( "MVA_", "" );
1339  mvaRes.push_back( dynamic_cast<ResultsClassification*>( m->Data()->GetResults( m->GetMethodName(),
1340  Types::kTesting,
1342  }
1343 
1344  // for overlap study
1345  TMatrixD* overlapS = new TMatrixD( nmeth, nmeth );
1346  TMatrixD* overlapB = new TMatrixD( nmeth, nmeth );
1347  (*overlapS) *= 0; // init...
1348  (*overlapB) *= 0; // init...
1349 
1350  // loop over test tree
1351  DataSet* defDs = method->fDataSetInfo.GetDataSet();
1353  for (Int_t ievt=0; ievt<defDs->GetNEvents(); ievt++) {
1354  const Event* ev = defDs->GetEvent(ievt);
1355 
1356  // for correlations
1357  TMatrixD* theMat = 0;
1358  for (Int_t im=0; im<nmeth; im++) {
1359  // check for NaN value
1360  Double_t retval = (Double_t)(*mvaRes[im])[ievt][0];
1361  if (TMath::IsNaN(retval)) {
1362  Log() << kWARNING << "Found NaN return value in event: " << ievt
1363  << " for method \"" << methodsNoCuts[im]->GetName() << "\"" << Endl;
1364  dvec[im] = 0;
1365  }
1366  else dvec[im] = retval;
1367  }
1368  for (Int_t iv=0; iv<nvar; iv++) dvec[iv+nmeth] = (Double_t)ev->GetValue(iv);
1369  if (method->fDataSetInfo.IsSignal(ev)) { tpSig->AddRow( dvec ); theMat = overlapS; }
1370  else { tpBkg->AddRow( dvec ); theMat = overlapB; }
1371 
1372  // count overlaps
1373  for (Int_t im=0; im<nmeth; im++) {
1374  for (Int_t jm=im; jm<nmeth; jm++) {
1375  if ((dvec[im] - rvec[im])*(dvec[jm] - rvec[jm]) > 0) {
1376  (*theMat)(im,jm)++;
1377  if (im != jm) (*theMat)(jm,im)++;
1378  }
1379  }
1380  }
1381  }
1382 
1383  // renormalise overlap matrix
1384  (*overlapS) *= (1.0/defDs->GetNEvtSigTest()); // init...
1385  (*overlapB) *= (1.0/defDs->GetNEvtBkgdTest()); // init...
1386 
1387  tpSig->MakePrincipals();
1388  tpBkg->MakePrincipals();
1389 
1390  const TMatrixD* covMatS = tpSig->GetCovarianceMatrix();
1391  const TMatrixD* covMatB = tpBkg->GetCovarianceMatrix();
1392 
1393  const TMatrixD* corrMatS = gTools().GetCorrelationMatrix( covMatS );
1394  const TMatrixD* corrMatB = gTools().GetCorrelationMatrix( covMatB );
1395 
1396  // print correlation matrices
1397  if (corrMatS != 0 && corrMatB != 0) {
1398 
1399  // extract MVA matrix
1400  TMatrixD mvaMatS(nmeth,nmeth);
1401  TMatrixD mvaMatB(nmeth,nmeth);
1402  for (Int_t im=0; im<nmeth; im++) {
1403  for (Int_t jm=0; jm<nmeth; jm++) {
1404  mvaMatS(im,jm) = (*corrMatS)(im,jm);
1405  mvaMatB(im,jm) = (*corrMatB)(im,jm);
1406  }
1407  }
1408 
1409  // extract variables - to MVA matrix
1410  std::vector<TString> theInputVars;
1411  TMatrixD varmvaMatS(nvar,nmeth);
1412  TMatrixD varmvaMatB(nvar,nmeth);
1413  for (Int_t iv=0; iv<nvar; iv++) {
1414  theInputVars.push_back( method->fDataSetInfo.GetVariableInfo( iv ).GetLabel() );
1415  for (Int_t jm=0; jm<nmeth; jm++) {
1416  varmvaMatS(iv,jm) = (*corrMatS)(nmeth+iv,jm);
1417  varmvaMatB(iv,jm) = (*corrMatB)(nmeth+iv,jm);
1418  }
1419  }
1420 
1421  if (nmeth > 1) {
1422  Log() << kINFO << Endl;
1423  Log() << kINFO <<Form("Dataset[%s] : ",method->fDataSetInfo.GetName())<< "Inter-MVA correlation matrix (signal):" << Endl;
1424  gTools().FormattedOutput( mvaMatS, *theVars, Log() );
1425  Log() << kINFO << Endl;
1426 
1427  Log() << kINFO <<Form("Dataset[%s] : ",method->fDataSetInfo.GetName())<< "Inter-MVA correlation matrix (background):" << Endl;
1428  gTools().FormattedOutput( mvaMatB, *theVars, Log() );
1429  Log() << kINFO << Endl;
1430  }
1431 
1432  Log() << kINFO <<Form("Dataset[%s] : ",method->fDataSetInfo.GetName())<< "Correlations between input variables and MVA response (signal):" << Endl;
1433  gTools().FormattedOutput( varmvaMatS, theInputVars, *theVars, Log() );
1434  Log() << kINFO << Endl;
1435 
1436  Log() << kINFO <<Form("Dataset[%s] : ",method->fDataSetInfo.GetName())<< "Correlations between input variables and MVA response (background):" << Endl;
1437  gTools().FormattedOutput( varmvaMatB, theInputVars, *theVars, Log() );
1438  Log() << kINFO << Endl;
1439  }
1440  else Log() << kWARNING <<Form("Dataset[%s] : ",method->fDataSetInfo.GetName())<< "<TestAllMethods> cannot compute correlation matrices" << Endl;
1441 
1442  // print overlap matrices
1443  Log() << kINFO <<Form("Dataset[%s] : ",method->fDataSetInfo.GetName())<< "The following \"overlap\" matrices contain the fraction of events for which " << Endl;
1444  Log() << kINFO <<Form("Dataset[%s] : ",method->fDataSetInfo.GetName())<< "the MVAs 'i' and 'j' have returned conform answers about \"signal-likeness\"" << Endl;
1445  Log() << kINFO <<Form("Dataset[%s] : ",method->fDataSetInfo.GetName())<< "An event is signal-like, if its MVA output exceeds the following value:" << Endl;
1446  gTools().FormattedOutput( rvec, *theVars, "Method" , "Cut value", Log() );
1447  Log() << kINFO <<Form("Dataset[%s] : ",method->fDataSetInfo.GetName())<< "which correspond to the working point: eff(signal) = 1 - eff(background)" << Endl;
1448 
1449  // give notice that cut method has been excluded from this test
1450  if (nmeth != (Int_t)methods->size())
1451  Log() << kINFO <<Form("Dataset[%s] : ",method->fDataSetInfo.GetName())<< "Note: no correlations and overlap with cut method are provided at present" << Endl;
1452 
1453  if (nmeth > 1) {
1454  Log() << kINFO << Endl;
1455  Log() << kINFO <<Form("Dataset[%s] : ",method->fDataSetInfo.GetName())<< "Inter-MVA overlap matrix (signal):" << Endl;
1456  gTools().FormattedOutput( *overlapS, *theVars, Log() );
1457  Log() << kINFO << Endl;
1458 
1459  Log() << kINFO <<Form("Dataset[%s] : ",method->fDataSetInfo.GetName())<< "Inter-MVA overlap matrix (background):" << Endl;
1460  gTools().FormattedOutput( *overlapB, *theVars, Log() );
1461  }
1462 
1463  // cleanup
1464  delete tpSig;
1465  delete tpBkg;
1466  delete corrMatS;
1467  delete corrMatB;
1468  delete theVars;
1469  delete overlapS;
1470  delete overlapB;
1471  delete [] dvec;
1472  }
1473  }
1474  }
1475  // -----------------------------------------------------------------------
1476  // Third part of evaluation process
1477  // --> output
1478  // -----------------------------------------------------------------------
1479 
1480  if (doRegression) {
1481 
1482  Log() << kINFO << Endl;
1483  TString hLine = "--------------------------------------------------------------------------------------------------";
1484  Log() << kINFO << "Evaluation results ranked by smallest RMS on test sample:" << Endl;
1485  Log() << kINFO << "(\"Bias\" quotes the mean deviation of the regression from true target." << Endl;
1486  Log() << kINFO << " \"MutInf\" is the \"Mutual Information\" between regression and target." << Endl;
1487  Log() << kINFO << " Indicated by \"_T\" are the corresponding \"truncated\" quantities ob-" << Endl;
1488  Log() << kINFO << " tained when removing events deviating more than 2sigma from average.)" << Endl;
1489  Log() << kINFO << hLine << Endl;
1490  //Log() << kINFO << "DataSet Name: MVA Method: <Bias> <Bias_T> RMS RMS_T | MutInf MutInf_T" << Endl;
1491  Log() << kINFO << hLine << Endl;
1492 
1493  for (Int_t i=0; i<nmeth_used[0]; i++) {
1494  MethodBase* theMethod = dynamic_cast<MethodBase*>((*methods)[i]);
1495  if(theMethod==0) continue;
1496 
1497  Log() << kINFO << Form("%-20s %-15s:%#9.3g%#9.3g%#9.3g%#9.3g | %#5.3f %#5.3f",
1498  theMethod->fDataSetInfo.GetName(),
1499  (const char*)mname[0][i],
1500  biastest[0][i], biastestT[0][i],
1501  rmstest[0][i], rmstestT[0][i],
1502  minftest[0][i], minftestT[0][i] )
1503  << Endl;
1504  }
1505  Log() << kINFO << hLine << Endl;
1506  Log() << kINFO << Endl;
1507  Log() << kINFO << "Evaluation results ranked by smallest RMS on training sample:" << Endl;
1508  Log() << kINFO << "(overtraining check)" << Endl;
1509  Log() << kINFO << hLine << Endl;
1510  Log() << kINFO << "DataSet Name: MVA Method: <Bias> <Bias_T> RMS RMS_T | MutInf MutInf_T" << Endl;
1511  Log() << kINFO << hLine << Endl;
1512 
1513  for (Int_t i=0; i<nmeth_used[0]; i++) {
1514  MethodBase* theMethod = dynamic_cast<MethodBase*>((*methods)[i]);
1515  if(theMethod==0) continue;
1516  Log() << kINFO << Form("%-20s %-15s:%#9.3g%#9.3g%#9.3g%#9.3g | %#5.3f %#5.3f",
1517  theMethod->fDataSetInfo.GetName(),
1518  (const char*)mname[0][i],
1519  biastrain[0][i], biastrainT[0][i],
1520  rmstrain[0][i], rmstrainT[0][i],
1521  minftrain[0][i], minftrainT[0][i] )
1522  << Endl;
1523  }
1524  Log() << kINFO << hLine << Endl;
1525  Log() << kINFO << Endl;
1526  }
1527  else if( doMulticlass ){
1528  Log() << Endl;
1529  TString hLine = "-------------------------------------------------------------------------------------------------------";
1530  Log() << kINFO << "Evaluation results ranked by best signal efficiency times signal purity " << Endl;
1531  Log() << kINFO << hLine << Endl;
1532  // iterate over methods and evaluate
1533  for (MVector::iterator itrMethod = methods->begin(); itrMethod != methods->end(); itrMethod++) {
1534  MethodBase* theMethod = dynamic_cast<MethodBase*>(*itrMethod);
1535  if(theMethod==0) continue;
1536 
1537  TString header= "DataSet Name MVA Method ";
1538  for(UInt_t icls = 0; icls<theMethod->fDataSetInfo.GetNClasses(); ++icls){
1539  header += Form("%-12s ",theMethod->fDataSetInfo.GetClassInfo(icls)->GetName());
1540  }
1541  Log() << kINFO << header << Endl;
1542  Log() << kINFO << hLine << Endl;
1543  for (Int_t i=0; i<nmeth_used[0]; i++) {
1544  TString res = Form("[%-14s] %-15s",theMethod->fDataSetInfo.GetName(),(const char*)mname[0][i]);
1545  for(UInt_t icls = 0; icls<theMethod->fDataSetInfo.GetNClasses(); ++icls){
1546  res += Form("%#1.3f ",(multiclass_testEff[i][icls])*(multiclass_testPur[i][icls]));
1547  }
1548  Log() << kINFO << res << Endl;
1549  }
1550  Log() << kINFO << hLine << Endl;
1551  Log() << kINFO << Endl;
1552  }
1553  }
1554  else {
1555  if(fROC)
1556  {
1557  Log().EnableOutput();
1559  Log() << Endl;
1560  TString hLine = "-------------------------------------------------------------------------------------------------------------------";
1561  Log() << kINFO << "Evaluation results ranked by best signal efficiency and purity (area)" << Endl;
1562  Log() << kINFO << hLine << Endl;
1563  Log() << kINFO << "DataSet MVA " << Endl;
1564  Log() << kINFO << "Name: Method: ROC-integ" << Endl;
1565 
1566 // Log() << kDEBUG << "DataSet MVA Signal efficiency at bkg eff.(error): | Sepa- Signifi- " << Endl;
1567 // Log() << kDEBUG << "Name: Method: @B=0.01 @B=0.10 @B=0.30 ROC-integ ROCCurve| ration: cance: " << Endl;
1568  Log() << kDEBUG << hLine << Endl;
1569  for (Int_t k=0; k<2; k++) {
1570  if (k == 1 && nmeth_used[k] > 0) {
1571  Log() << kINFO << hLine << Endl;
1572  Log() << kINFO << "Input Variables: " << Endl << hLine << Endl;
1573  }
1574  for (Int_t i=0; i<nmeth_used[k]; i++) {
1575  if (k == 1) mname[k][i].ReplaceAll( "Variable_", "" );
1576 
1577  MethodBase* theMethod = dynamic_cast<MethodBase*>(GetMethod(itrMap->first,mname[k][i]));
1578  if(theMethod==0) continue;
1579  TMVA::Results *results=theMethod->Data()->GetResults(mname[k][i],Types::kTesting,Types::kClassification);
1580  std::vector<Float_t> *mvaRes = dynamic_cast<ResultsClassification *>(results)->GetValueVector();
1581  std::vector<Bool_t> *mvaResType = dynamic_cast<ResultsClassification *>(results)->GetValueVectorTypes();
1582  Double_t fROCalcValue = 0;
1583  TMVA::ROCCurve *fROCCurve = nullptr;
1584  if (mvaResType->size() != 0) {
1585  fROCCurve = new TMVA::ROCCurve(*mvaRes, *mvaResType);
1586  fROCalcValue = fROCCurve->GetROCIntegral();
1587  }
1588 
1589  if (sep[k][i] < 0 || sig[k][i] < 0) {
1590  // cannot compute separation/significance -> no MVA (usually for Cuts)
1591  Log() << kINFO << Form("%-13s %-15s: %#1.3f",
1592  itrMap->first.Data(),
1593  (const char*)mname[k][i],
1594  effArea[k][i]) << Endl;
1595 
1596 // Log() << kDEBUG << Form("%-20s %-15s: %#1.3f(%02i) %#1.3f(%02i) %#1.3f(%02i) %#1.3f %#1.3f | -- --",
1597 // itrMap->first.Data(),
1598 // (const char*)mname[k][i],
1599 // eff01[k][i], Int_t(1000*eff01err[k][i]),
1600 // eff10[k][i], Int_t(1000*eff10err[k][i]),
1601 // eff30[k][i], Int_t(1000*eff30err[k][i]),
1602 // effArea[k][i],fROCalcValue) << Endl;
1603  }
1604  else {
1605  Log() << kINFO << Form("%-13s %-15s: %#1.3f",
1606  itrMap->first.Data(),
1607  (const char*)mname[k][i],
1608  fROCalcValue) << Endl;
1609 // Log() << kDEBUG << Form("%-20s %-15s: %#1.3f(%02i) %#1.3f(%02i) %#1.3f(%02i) %#1.3f %#1.3f | %#1.3f %#1.3f",
1610 // itrMap->first.Data(),
1611 // (const char*)mname[k][i],
1612 // eff01[k][i], Int_t(1000*eff01err[k][i]),
1613 // eff10[k][i], Int_t(1000*eff10err[k][i]),
1614 // eff30[k][i], Int_t(1000*eff30err[k][i]),
1615 // effArea[k][i],fROCalcValue,
1616 // sep[k][i], sig[k][i]) << Endl;
1617  }
1618  if (fROCCurve) delete fROCCurve;
1619  }
1620  }
1621  Log() << kINFO << hLine << Endl;
1622  Log() << kINFO << Endl;
1623  Log() << kINFO << "Testing efficiency compared to training efficiency (overtraining check)" << Endl;
1624  Log() << kINFO << hLine << Endl;
1625  Log() << kINFO << "DataSet MVA Signal efficiency: from test sample (from training sample) " << Endl;
1626  Log() << kINFO << "Name: Method: @B=0.01 @B=0.10 @B=0.30 " << Endl;
1627  Log() << kINFO << hLine << Endl;
1628  for (Int_t k=0; k<2; k++) {
1629  if (k == 1 && nmeth_used[k] > 0) {
1630  Log() << kINFO << hLine << Endl;
1631  Log() << kINFO << "Input Variables: " << Endl << hLine << Endl;
1632  }
1633  for (Int_t i=0; i<nmeth_used[k]; i++) {
1634  if (k == 1) mname[k][i].ReplaceAll( "Variable_", "" );
1635  MethodBase* theMethod = dynamic_cast<MethodBase*>((*methods)[i]);
1636  if(theMethod==0) continue;
1637 
1638  Log() << kINFO << Form("%-20s %-15s: %#1.3f (%#1.3f) %#1.3f (%#1.3f) %#1.3f (%#1.3f)",
1639  theMethod->fDataSetInfo.GetName(),
1640  (const char*)mname[k][i],
1641  eff01[k][i],trainEff01[k][i],
1642  eff10[k][i],trainEff10[k][i],
1643  eff30[k][i],trainEff30[k][i]) << Endl;
1644  }
1645  }
1646  Log() << kINFO << hLine << Endl;
1647  Log() << kINFO << Endl;
1648 
1649  if (gTools().CheckForSilentOption( GetOptions() )) Log().InhibitOutput();
1650  }//end fROC
1651  }
1652  if(!IsSilentFile())
1653  {
1654  std::list<TString> datasets;
1655  for (Int_t k=0; k<2; k++) {
1656  for (Int_t i=0; i<nmeth_used[k]; i++) {
1657  MethodBase* theMethod = dynamic_cast<MethodBase*>((*methods)[i]);
1658  if(theMethod==0) continue;
1659  // write test/training trees
1660  RootBaseDir()->cd(theMethod->fDataSetInfo.GetName());
1661  if(std::find(datasets.begin(), datasets.end(), theMethod->fDataSetInfo.GetName()) == datasets.end())
1662  {
1665  datasets.push_back(theMethod->fDataSetInfo.GetName());
1666  }
1667  }
1668  }
1669  }
1670  }//end for MethodsMap
1671  // references for citation
1673 }
1674 
1675 ////////////////////////////////////////////////////////////////////////////////
1676 /// Evaluate Variable Importance
1677 
1678 TH1F* TMVA::Factory::EvaluateImportance(DataLoader *loader,VIType vitype, Types::EMVA theMethod, TString methodTitle, const char *theOption)
1679 {
1681  fSilentFile=kTRUE;//we need silent file here beause we need fast classification results
1682 
1683  //getting number of variables and variable names from loader
1684  const int nbits = loader->DefaultDataSetInfo().GetNVariables();
1685  if(vitype==VIType::kShort)
1686  return EvaluateImportanceShort(loader,theMethod,methodTitle,theOption);
1687  else if(vitype==VIType::kAll)
1688  return EvaluateImportanceAll(loader,theMethod,methodTitle,theOption);
1689  else if(vitype==VIType::kRandom&&nbits>10)
1690  {
1691  return EvaluateImportanceRandom(loader,pow(2,nbits),theMethod,methodTitle,theOption);
1692  }else
1693  {
1694  std::cerr<<"Error in Variable Importance: Random mode require more that 10 variables in the dataset."<<std::endl;
1695  return nullptr;
1696  }
1697 }
1698 
1699 TH1F* TMVA::Factory::EvaluateImportanceAll(DataLoader *loader, Types::EMVA theMethod, TString methodTitle, const char *theOption)
1700 {
1701 
1702  uint64_t x = 0;
1703  uint64_t y = 0;
1704 
1705  //getting number of variables and variable names from loader
1706  const int nbits = loader->DefaultDataSetInfo().GetNVariables();
1707  std::vector<TString> varNames = loader->DefaultDataSetInfo().GetListOfVariables();
1708 
1709  uint64_t range = pow(2, nbits);
1710 
1711  //vector to save importances
1712  std::vector<Double_t> importances(nbits);
1713  //vector to save ROC
1714  std::vector<Double_t> ROC(range);
1715  ROC[0]=0.5;
1716  for (int i = 0; i < nbits; i++)importances[i] = 0;
1717 
1718  Double_t SROC, SSROC; //computed ROC value
1719  for ( x = 1; x <range ; x++) {
1720 
1721  std::bitset<VIBITS> xbitset(x);
1722  if (x == 0) continue; //dataloader need at least one variable
1723 
1724  //creating loader for seed
1725  TMVA::DataLoader *seedloader = new TMVA::DataLoader(xbitset.to_string());
1726 
1727  //adding variables from seed
1728  for (int index = 0; index < nbits; index++) {
1729  if (xbitset[index]) seedloader->AddVariable(varNames[index], 'F');
1730  }
1731 
1732  DataLoaderCopy(seedloader,loader);
1733  seedloader->PrepareTrainingAndTestTree(loader->DefaultDataSetInfo().GetCut("Signal"), loader->DefaultDataSetInfo().GetCut("Background"), loader->DefaultDataSetInfo().GetSplitOptions());
1734 
1735  //Booking Seed
1736  BookMethod(seedloader, theMethod, methodTitle, theOption);
1737 
1738  //Train/Test/Evaluation
1739  TrainAllMethods();
1740  TestAllMethods();
1742 
1743  //getting ROC
1744  ROC[x] = GetROCIntegral(xbitset.to_string(), methodTitle);
1745 
1746  //cleaning information to process subseeds
1747  TMVA::MethodBase *smethod=dynamic_cast<TMVA::MethodBase*>(fMethodsMap[xbitset.to_string().c_str()][0][0]);
1748  TMVA::ResultsClassification *sresults = (TMVA::ResultsClassification*)smethod->Data()->GetResults(smethod->GetMethodName(), Types::kTesting, Types::kClassification);
1749  delete sresults;
1750  delete seedloader;
1751  this->DeleteAllMethods();
1752 
1753  fMethodsMap.clear();
1754  //removing global result because it is requiring alot amount of RAM for all seeds
1755  }
1756 
1757 
1758  for ( x = 0; x <range ; x++)
1759  {
1760  SROC=ROC[x];
1761  for (uint32_t i = 0; i < VIBITS; ++i) {
1762  if (x & (1 << i)) {
1763  y = x & ~(1 << i);
1764  std::bitset<VIBITS> ybitset(y);
1765  //need at least one variable
1766  //NOTE: if subssed is zero then is the special case
1767  //that count in xbitset is 1
1768  Double_t ny = log(x - y) / 0.693147;
1769  if (y == 0) {
1770  importances[ny] = SROC - 0.5;
1771  continue;
1772  }
1773 
1774  //getting ROC
1775  SSROC = ROC[y];
1776  importances[ny] += SROC - SSROC;
1777  //cleaning information
1778  }
1779 
1780  }
1781  }
1782  std::cout<<"--- Variable Importance Results (All)"<<std::endl;
1783  return GetImportance(nbits,importances,varNames);
1784 }
1785 
1786 static long int sum(long int i)
1787 {
1788  long int _sum=0;
1789  for(long int n=0;n<i;n++) _sum+=pow(2,n);
1790  return _sum;
1791 }
1792 
1793 
1794 TH1F* TMVA::Factory::EvaluateImportanceShort(DataLoader *loader, Types::EMVA theMethod, TString methodTitle, const char *theOption)
1795 {
1796  uint64_t x = 0;
1797  uint64_t y = 0;
1798 
1799  //getting number of variables and variable names from loader
1800  const int nbits = loader->DefaultDataSetInfo().GetNVariables();
1801  std::vector<TString> varNames = loader->DefaultDataSetInfo().GetListOfVariables();
1802 
1803  long int range = sum(nbits);
1804 // std::cout<<range<<std::endl;
1805  //vector to save importances
1806  std::vector<Double_t> importances(nbits);
1807  for (int i = 0; i < nbits; i++)importances[i] = 0;
1808 
1809  Double_t SROC, SSROC; //computed ROC value
1810 
1811  x = range;
1812 
1813  std::bitset<VIBITS> xbitset(x);
1814  if (x == 0) Log()<<kFATAL<<"Error: need at least one variable."; //dataloader need at least one variable
1815 
1816 
1817  //creating loader for seed
1818  TMVA::DataLoader *seedloader = new TMVA::DataLoader(xbitset.to_string());
1819 
1820  //adding variables from seed
1821  for (int index = 0; index < nbits; index++) {
1822  if (xbitset[index]) seedloader->AddVariable(varNames[index], 'F');
1823  }
1824 
1825  //Loading Dataset
1826  DataLoaderCopy(seedloader,loader);
1827 
1828  //Booking Seed
1829  BookMethod(seedloader, theMethod, methodTitle, theOption);
1830 
1831  //Train/Test/Evaluation
1832  TrainAllMethods();
1833  TestAllMethods();
1835 
1836  //getting ROC
1837  SROC = GetROCIntegral(xbitset.to_string(), methodTitle);
1838 
1839  //cleaning information to process subseeds
1840  TMVA::MethodBase *smethod=dynamic_cast<TMVA::MethodBase*>(fMethodsMap[xbitset.to_string().c_str()][0][0]);
1841  TMVA::ResultsClassification *sresults = (TMVA::ResultsClassification*)smethod->Data()->GetResults(smethod->GetMethodName(), Types::kTesting, Types::kClassification);
1842  delete sresults;
1843  delete seedloader;
1844  this->DeleteAllMethods();
1845  fMethodsMap.clear();
1846 
1847  //removing global result because it is requiring alot amount of RAM for all seeds
1848 
1849  for (uint32_t i = 0; i < VIBITS; ++i) {
1850  if (x & (1 << i)) {
1851  y = x & ~(1 << i);
1852  std::bitset<VIBITS> ybitset(y);
1853  //need at least one variable
1854  //NOTE: if subssed is zero then is the special case
1855  //that count in xbitset is 1
1856  Double_t ny = log(x - y) / 0.693147;
1857  if (y == 0) {
1858  importances[ny] = SROC - 0.5;
1859  continue;
1860  }
1861 
1862  //creating loader for subseed
1863  TMVA::DataLoader *subseedloader = new TMVA::DataLoader(ybitset.to_string());
1864  //adding variables from subseed
1865  for (int index = 0; index < nbits; index++) {
1866  if (ybitset[index]) subseedloader->AddVariable(varNames[index], 'F');
1867  }
1868 
1869  //Loading Dataset
1870  DataLoaderCopy(subseedloader,loader);
1871 
1872  //Booking SubSeed
1873  BookMethod(subseedloader, theMethod, methodTitle, theOption);
1874 
1875  //Train/Test/Evaluation
1876  TrainAllMethods();
1877  TestAllMethods();
1879 
1880  //getting ROC
1881  SSROC = GetROCIntegral(ybitset.to_string(), methodTitle);
1882  importances[ny] += SROC - SSROC;
1883 
1884  //cleaning information
1885  TMVA::MethodBase *ssmethod=dynamic_cast<TMVA::MethodBase*>(fMethodsMap[ybitset.to_string().c_str()][0][0]);
1887  delete ssresults;
1888  delete subseedloader;
1889  this->DeleteAllMethods();
1890  fMethodsMap.clear();
1891  }
1892  }
1893  std::cout<<"--- Variable Importance Results (Short)"<<std::endl;
1894  return GetImportance(nbits,importances,varNames);
1895 }
1896 
1897 TH1F* TMVA::Factory::EvaluateImportanceRandom(DataLoader *loader, UInt_t nseeds, Types::EMVA theMethod, TString methodTitle, const char *theOption)
1898 {
1899  TRandom3 *rangen = new TRandom3(0); //Random Gen.
1900 
1901  uint64_t x = 0;
1902  uint64_t y = 0;
1903 
1904  //getting number of variables and variable names from loader
1905  const int nbits = loader->DefaultDataSetInfo().GetNVariables();
1906  std::vector<TString> varNames = loader->DefaultDataSetInfo().GetListOfVariables();
1907 
1908  long int range = pow(2, nbits);
1909 
1910  //vector to save importances
1911  std::vector<Double_t> importances(nbits);
1912  Double_t importances_norm = 0;
1913  for (int i = 0; i < nbits; i++)importances[i] = 0;
1914 
1915  Double_t SROC, SSROC; //computed ROC value
1916  for (UInt_t n = 0; n < nseeds; n++) {
1917  x = rangen -> Integer(range);
1918 
1919  std::bitset<32> xbitset(x);
1920  if (x == 0) continue; //dataloader need at least one variable
1921 
1922 
1923  //creating loader for seed
1924  TMVA::DataLoader *seedloader = new TMVA::DataLoader(xbitset.to_string());
1925 
1926  //adding variables from seed
1927  for (int index = 0; index < nbits; index++) {
1928  if (xbitset[index]) seedloader->AddVariable(varNames[index], 'F');
1929  }
1930 
1931  //Loading Dataset
1932  DataLoaderCopy(seedloader,loader);
1933 
1934  //Booking Seed
1935  BookMethod(seedloader, theMethod, methodTitle, theOption);
1936 
1937  //Train/Test/Evaluation
1938  TrainAllMethods();
1939  TestAllMethods();
1941 
1942  //getting ROC
1943  SROC = GetROCIntegral(xbitset.to_string(), methodTitle);
1944 // std::cout << "Seed: n " << n << " x " << x << " xbitset:" << xbitset << " ROC " << SROC << std::endl;
1945 
1946  //cleaning information to process subseeds
1947  TMVA::MethodBase *smethod=dynamic_cast<TMVA::MethodBase*>(fMethodsMap[xbitset.to_string().c_str()][0][0]);
1948  TMVA::ResultsClassification *sresults = (TMVA::ResultsClassification*)smethod->Data()->GetResults(smethod->GetMethodName(), Types::kTesting, Types::kClassification);
1949  delete sresults;
1950  delete seedloader;
1951  this->DeleteAllMethods();
1952  fMethodsMap.clear();
1953 
1954  //removing global result because it is requiring alot amount of RAM for all seeds
1955 
1956  for (uint32_t i = 0; i < 32; ++i) {
1957  if (x & (1 << i)) {
1958  y = x & ~(1 << i);
1959  std::bitset<32> ybitset(y);
1960  //need at least one variable
1961  //NOTE: if subssed is zero then is the special case
1962  //that count in xbitset is 1
1963  Double_t ny = log(x - y) / 0.693147;
1964  if (y == 0) {
1965  importances[ny] = SROC - 0.5;
1966  importances_norm += importances[ny];
1967  // std::cout << "SubSeed: " << y << " y:" << ybitset << "ROC " << 0.5 << std::endl;
1968  continue;
1969  }
1970 
1971  //creating loader for subseed
1972  TMVA::DataLoader *subseedloader = new TMVA::DataLoader(ybitset.to_string());
1973  //adding variables from subseed
1974  for (int index = 0; index < nbits; index++) {
1975  if (ybitset[index]) subseedloader->AddVariable(varNames[index], 'F');
1976  }
1977 
1978  //Loading Dataset
1979  DataLoaderCopy(subseedloader,loader);
1980 
1981  //Booking SubSeed
1982  BookMethod(subseedloader, theMethod, methodTitle, theOption);
1983 
1984  //Train/Test/Evaluation
1985  TrainAllMethods();
1986  TestAllMethods();
1988 
1989  //getting ROC
1990  SSROC = GetROCIntegral(ybitset.to_string(), methodTitle);
1991  importances[ny] += SROC - SSROC;
1992  //std::cout << "SubSeed: " << y << " y:" << ybitset << " x-y " << x - y << " " << std::bitset<32>(x - y) << " ny " << ny << " SROC " << SROC << " SSROC " << SSROC << " Importance = " << importances[ny] << std::endl;
1993  //cleaning information
1994  TMVA::MethodBase *ssmethod=dynamic_cast<TMVA::MethodBase*>(fMethodsMap[ybitset.to_string().c_str()][0][0]);
1996  delete ssresults;
1997  delete subseedloader;
1998  this->DeleteAllMethods();
1999  fMethodsMap.clear();
2000  }
2001  }
2002  }
2003  std::cout<<"--- Variable Importance Results (Random)"<<std::endl;
2004  return GetImportance(nbits,importances,varNames);
2005 }
2006 
2007 
2008 
2009 TH1F* TMVA::Factory::GetImportance(const int nbits,std::vector<Double_t> importances,std::vector<TString> varNames)
2010 {
2011  TH1F *vih1 = new TH1F("vih1", "", nbits, 0, nbits);
2012 
2013  gStyle->SetOptStat(000000);
2014 
2015  Float_t normalization = 0.0;
2016  for (int i = 0; i < nbits; i++) {
2017  normalization = normalization + importances[i];
2018  }
2019 
2020  Float_t roc = 0.0;
2021 
2022  gStyle->SetTitleXOffset(0.4);
2023  gStyle->SetTitleXOffset(1.2);
2024 
2025 
2026  Double_t x_ie[nbits], y_ie[nbits];
2027  for (Int_t i = 1; i < nbits + 1; i++) {
2028  x_ie[i - 1] = (i - 1) * 1.;
2029  roc = 100.0 * importances[i - 1] / normalization;
2030  y_ie[i - 1] = roc;
2031  std::cout<<"--- "<<varNames[i-1]<<" = "<<roc<<" %"<<std::endl;
2032  vih1->GetXaxis()->SetBinLabel(i, varNames[i - 1].Data());
2033  vih1->SetBinContent(i, roc);
2034  }
2035  TGraph *g_ie = new TGraph(nbits + 2, x_ie, y_ie);
2036  g_ie->SetTitle("");
2037 
2038  vih1->LabelsOption("v >", "X");
2039  vih1->SetBarWidth(0.97);
2040  Int_t ca = TColor::GetColor("#006600");
2041  vih1->SetFillColor(ca);
2042  //Int_t ci = TColor::GetColor("#990000");
2043 
2044  vih1->GetYaxis()->SetTitle("Importance (%)");
2045  vih1->GetYaxis()->SetTitleSize(0.045);
2046  vih1->GetYaxis()->CenterTitle();
2047  vih1->GetYaxis()->SetTitleOffset(1.24);
2048 
2049  vih1->GetYaxis()->SetRangeUser(-7, 50);
2050  vih1->SetDirectory(0);
2051 
2052 // vih1->Draw("B");
2053  return vih1;
2054 }
2055 
IMethod * Create(const std::string &name, const TString &job, const TString &title, DataSetInfo &dsi, const TString &option)
creates the method if needed based on the method name using the creator function the factory has stor...
static ClassifierFactory & Instance()
access to the ClassifierFactory singleton creates the instance if needed
Config & gConfig()
Definition: Config.cxx:43
virtual void SetTitleOffset(Float_t offset=1)
Set distance between the axis and the axis title Offset is a correction factor with respect to the "s...
Definition: TAttAxis.cxx:262
void SetModelPersistence(Bool_t status)
Definition: MethodBase.h:378
virtual const char * GetName() const
Returns name of object.
Definition: TNamed.h:51
virtual Int_t Write(const char *name=0, Int_t option=0, Int_t bufsize=0)
Write this object to the current directory.
Definition: TObject.cxx:830
TH1F * GetImportance(const int nbits, std::vector< Double_t > importances, std::vector< TString > varNames)
Definition: Factory.cxx:2009
Double_t GetROCIntegral(DataLoader *loader, TString theMethodName)
Definition: Factory.cxx:639
virtual void MakeClass(const TString &classFileName=TString("")) const
create reader class for method (classification only at present)
UInt_t GetNVariables() const
Definition: DataSetInfo.h:128
DataSetManager * fDataSetManager
Definition: DataLoader.h:197
static long int sum(long int i)
Definition: Factory.cxx:1786
Principal Components Analysis (PCA)
Definition: TPrincipal.h:28
void UsefulSortDescending(std::vector< std::vector< Double_t > > &, std::vector< TString > *vs=0)
sort 2D vector (AND in parallel a TString vector) in such a way that the "first vector is sorted" and...
Definition: Tools.cxx:573
MethodBase * BookMethod(DataLoader *loader, TString theMethodName, TString methodTitle, TString theOption="")
Definition: Factory.cxx:337
Random number generator class based on M.
Definition: TRandom3.h:29
MsgLogger & Endl(MsgLogger &ml)
Definition: MsgLogger.h:162
void AddOutput(Types::ETreeType type, Types::EAnalysisType analysisType)
This class displays a legend box (TPaveText) containing several legend entries.
Definition: TLegend.h:27
std::vector< TMVA::VariableTransformBase * > fDefaultTrfs
ROOT output file.
Definition: Factory.h:195
virtual void LabelsOption(Option_t *option="h", Option_t *axis="X")
Set option(s) to draw axis with labels.
Definition: TH1.cxx:4934
void EvaluateAllVariables(DataLoader *loader, TString options="")
iterates over all MVA input varables and evaluates them
Definition: Factory.cxx:1044
Bool_t fROC
enable to calculate corelations
Definition: Factory.h:202
float Float_t
Definition: RtypesCore.h:53
virtual void SetDirectory(TDirectory *dir)
By default when an histogram is created, it is added to the list of histogram objects in the current ...
Definition: TH1.cxx:8051
void ROOTVersionMessage(MsgLogger &logger)
prints the ROOT release number and date
Definition: Tools.cxx:1333
VIType
Definition: Types.h:75
TString & ReplaceAll(const TString &s1, const TString &s2)
Definition: TString.h:635
R__EXTERN TStyle * gStyle
Definition: TStyle.h:418
static Types & Instance()
the the single instance of "Types" if existin already, or create it (Signleton)
Definition: Types.cxx:64
virtual void WriteEvaluationHistosToFile(Types::ETreeType treetype)
writes all MVA evaluation histograms to file
TH1F * EvaluateImportanceShort(DataLoader *loader, Types::EMVA theMethod, TString methodTitle, const char *theOption="")
Definition: Factory.cxx:1794
virtual std::map< TString, Double_t > OptimizeTuningParameters(TString fomType="ROCIntegral", TString fitType="FitGA")
call the Optimzier with the set of paremeters and ranges that are meant to be tuned.
Definition: MethodBase.cxx:617
virtual void Draw(Option_t *option="")
Draw this legend with its current attributes.
Definition: TLegend.cxx:373
THist< 1, float, THistStatContent, THistStatUncertainty > TH1F
Definition: THist.hxx:302
TH1 * h
Definition: legend2.C:5
MsgLogger & Log() const
Definition: Configurable.h:128
std::vector< TString > GetListOfVariables() const
returns list of variables
OptionBase * DeclareOptionRef(T &ref, const TString &name, const TString &desc="")
A ROOT file is a suite of consecutive data records (TKey instances) with a well defined format...
Definition: TFile.h:50
Bool_t Verbose(void) const
Definition: Factory.h:138
EAnalysisType
Definition: Types.h:129
virtual int MakeDirectory(const char *name)
Make a directory.
Definition: TSystem.cxx:822
DataSetInfo & DefaultDataSetInfo()
Definition: DataLoader.cxx:492
virtual void MakeClass(const TString &classFileName=TString("")) const =0
#define gROOT
Definition: TROOT.h:364
TString fTransformations
option string given by construction (presently only "V")
Definition: Factory.h:199
Basic string class.
Definition: TString.h:137
tomato 1-D histogram with a float per channel (see TH1 documentation)}
Definition: TH1.h:575
void ToLower()
Change string to lower-case.
Definition: TString.cxx:1089
int Int_t
Definition: RtypesCore.h:41
virtual TDirectory * mkdir(const char *name, const char *title="")
Create a sub-directory and return a pointer to the created directory.
Definition: TDirectory.cxx:957
bool Bool_t
Definition: RtypesCore.h:59
void TrainAllMethods()
iterates through all booked methods and calls training
Definition: Factory.cxx:822
const Bool_t kFALSE
Definition: Rtypes.h:92
virtual void TestMulticlass()
test multiclass classification
virtual void SetTitle(const char *title="")
Set graph title.
Definition: TGraph.cxx:2176
UInt_t GetNClasses() const
Definition: DataSetInfo.h:154
void DataLoaderCopy(TMVA::DataLoader *des, TMVA::DataLoader *src)
Definition: DataLoader.cxx:802
void WriteDataInformation(DataSetInfo &fDataSetInfo)
Definition: Factory.cxx:499
const std::vector< Event * > & GetEventCollection(Types::ETreeType type=Types::kMaxTreeType) const
Definition: DataSet.h:239
const TString & GetLabel() const
Definition: VariableInfo.h:67
void SetSilentFile(Bool_t status)
Definition: MethodBase.h:374
virtual Bool_t HasAnalysisType(Types::EAnalysisType type, UInt_t numberClasses, UInt_t numberTargets)=0
void CenterTitle(Bool_t center=kTRUE)
Center axis title.
Definition: TAxis.h:190
virtual Double_t GetROCIntegral(TH1D *histS, TH1D *histB) const
calculate the area (integral) under the ROC curve as a overall quality measure of the classification ...
MsgLogger * fLogger
Definition: Configurable.h:134
void AddVariable(const TString &expression, const TString &title, const TString &unit, char type='F', Double_t min=0, Double_t max=0)
Definition: DataLoader.cxx:456
TH1F * EvaluateImportanceRandom(DataLoader *loader, UInt_t nseeds, Types::EMVA theMethod, TString methodTitle, const char *theOption="")
Definition: Factory.cxx:1897
overwrite existing object with same name
Definition: TObject.h:77
static void AddDirectory(Bool_t add=kTRUE)
Sets the flag controlling the automatic add of histograms in memory.
Definition: TH1.cxx:1218
virtual void SetBarWidth(Float_t width=0.5)
Definition: TH1.h:362
virtual void SetRangeUser(Double_t ufirst, Double_t ulast)
Set the viewing range for the axis from ufirst to ulast (in user coordinates).
Definition: TAxis.cxx:925
static void InhibitOutput()
Definition: MsgLogger.cxx:69
static void SetIsTraining(Bool_t)
when this static function is called, it sets the flag whether events with negative event weight shoul...
Definition: Event.cxx:388
Tools & gTools()
Definition: Tools.cxx:79
#define READXML
Definition: Factory.cxx:102
Double_t x[n]
Definition: legend1.C:17
DataSetInfo & fDataSetInfo
Definition: MethodBase.h:601
TH2 * CreateCorrelationMatrixHist(const TMatrixD *m, const TString &hName, const TString &hTitle) const
TH1F * EvaluateImportance(DataLoader *loader, VIType vitype, Types::EMVA theMethod, TString methodTitle, const char *theOption="")
Evaluate Variable Importance.
Definition: Factory.cxx:1678
const int ny
Definition: kalman.C:17
virtual void SetGrid(Int_t valuex=1, Int_t valuey=1)
Definition: TPad.h:318
Bool_t fModelPersistence
the training type
Definition: Factory.h:208
DataSet * Data() const
Definition: MethodBase.h:405
TTree * GetTree(Types::ETreeType type)
create the test/trainings tree with all the variables, the weights, the classes, the targets...
Definition: DataSet.cxx:602
TString fWeightFileDir
Definition: Config.h:100
void ReadStateFromFile()
Function to write options and weights to file.
double pow(double, double)
void PrintHelpMessage() const
prints out method-specific help method
std::vector< std::vector< double > > Data
IONames & GetIONames()
Definition: Config.h:78
virtual void ParseOptions()
options parser
void SetupMethod()
setup of methods
Definition: MethodBase.cxx:403
DataSetInfo & DataInfo() const
Definition: MethodBase.h:406
Bool_t DoRegression() const
Definition: MethodBase.h:434
void SetMinType(EMsgType minType)
Definition: MsgLogger.h:76
void SetDrawProgressBar(Bool_t d)
Definition: Config.h:70
TH1F * EvaluateImportanceAll(DataLoader *loader, Types::EMVA theMethod, TString methodTitle, const char *theOption="")
Definition: Factory.cxx:1699
Bool_t IsModelPersistence()
Definition: Factory.cxx:284
std::map< TString, MVector * > fMethodsMap
Definition: Factory.h:91
Bool_t fSilentFile
enable to calculate ROC values
Definition: Factory.h:203
Long64_t GetNTrainingEvents() const
Definition: DataSet.h:93
virtual Double_t GetEfficiency(const TString &, Types::ETreeType, Double_t &err)
fill background efficiency (resp.
void CreateVariableTransforms(const TString &trafoDefinition, TMVA::DataSetInfo &dataInfo, TMVA::TransformationHandler &transformationHandler, TMVA::MsgLogger &log)
virtual std::vector< Float_t > GetMulticlassEfficiency(std::vector< std::vector< Float_t > > &purity)
TMatrixT< Double_t > TMatrixD
Definition: TMatrixDfwd.h:24
Bool_t DoMulticlass() const
Definition: MethodBase.h:435
static void DestroyInstance()
Definition: Tools.cxx:95
const Int_t MinNoTrainingEvents
Definition: Factory.cxx:97
virtual ~Factory()
destructor delete fATreeEvent;
Definition: Factory.cxx:293
std::map< TString, Double_t > OptimizeAllMethods(TString fomType="ROCIntegral", TString fitType="FitGA")
iterates through all booked methods and sees if they use parameter tuning and if so.
Definition: Factory.cxx:597
TDirectory * RootBaseDir()
Definition: Factory.h:153
virtual Int_t Write(const char *name=0, Int_t option=0, Int_t bufsize=0)
Write this object to the current directory.
Definition: TTree.cxx:9042
UInt_t GetNTargets() const
Definition: DataSetInfo.h:129
Results * GetResults(const TString &, Types::ETreeType type, Types::EAnalysisType analysistype)
TString info(resultsName+"/"); switch(type) { case Types::kTraining: info += "kTraining/"; break; cas...
Definition: DataSet.cxx:286
std::string GetMethodName(TCppMethod_t)
Definition: Cppyy.cxx:728
DataSetManager * GetDataSetManager()
Definition: DataSetInfo.h:193
Service class for 2-Dim histogram classes.
Definition: TH2.h:36
Double_t GetROCIntegral()
ROC Integral (AUC)
Definition: ROCCurve.cxx:72
R__EXTERN TSystem * gSystem
Definition: TSystem.h:549
virtual void AddRow(const Double_t *x)
Add a data point and update the covariance matrix.
Definition: TPrincipal.cxx:410
SVector< double, 2 > v
Definition: Dict.h:5
void TMVAWelcomeMessage()
direct output, eg, when starting ROOT session -> no use of Logger here
Definition: Tools.cxx:1310
const char * GetName() const
Definition: MethodBase.h:330
Long64_t GetNEvtSigTest()
return number of signal test events in dataset
Definition: DataSet.cxx:419
ClassInfo * GetClassInfo(Int_t clNum) const
virtual void SetFillColor(Color_t fcolor)
Set the fill area color.
Definition: TAttFill.h:42
const TMatrixD * CorrelationMatrix(const TString &className) const
Bool_t fCorrelations
verbose mode
Definition: Factory.h:201
void EvaluateAllMethods(void)
iterates over all MVAs that have been booked, and calls their evaluation methods
Definition: Factory.cxx:1059
Bool_t BeginsWith(const char *s, ECaseCompare cmp=kExact) const
Definition: TString.h:558
virtual void SetBinContent(Int_t bin, Double_t content)
Set bin content see convention for numbering bins in TH1::GetBin In case the bin number is greater th...
Definition: TH1.cxx:8323
void TestAllMethods()
Definition: Factory.cxx:958
unsigned int UInt_t
Definition: RtypesCore.h:42
TMarker * m
Definition: textangle.C:8
char * Form(const char *fmt,...)
DataSetManager * fDataSetManager
const TMatrixD * GetCovarianceMatrix() const
Definition: TPrincipal.h:66
const TString & GetMethodName() const
Definition: MethodBase.h:327
static Int_t GetColor(const char *hexcolor)
Static method returning color number for color specified by hex color string of form: "#rrggbb"...
Definition: TColor.cxx:1706
TAxis * GetYaxis()
Definition: TH1.h:325
void Greetings()
print welcome message options are: kLogoWelcomeMsg, kIsometricWelcomeMsg, kLeanWelcomeMsg ...
Definition: Factory.cxx:270
virtual void MakePrincipals()
Perform the principal components analysis.
Definition: TPrincipal.cxx:862
void SetBoostedMethodName(TString methodName)
Definition: MethodBoost.h:85
void SetVerbose(Bool_t v=kTRUE)
Definition: Factory.cxx:331
TFile * fgTargetFile
Definition: Factory.h:192
virtual Double_t GetSignificance() const
compute significance of mean difference significance = |<S> - |/Sqrt(RMS_S2 + RMS_B2) ...
Long64_t GetNEvtBkgdTest()
return number of background test events in dataset
Definition: DataSet.cxx:427
TString GetWeightFileName() const
retrieve weight file name
virtual void SetTitleSize(Float_t size=0.04)
Set size of axis title The size is expressed in per cent of the pad width.
Definition: TAttAxis.cxx:272
Float_t GetValue(UInt_t ivar) const
return value of i&#39;th variable
Definition: Event.cxx:233
virtual void Print() const
get maximum length of variable names
Definition: Ranking.cxx:111
The Canvas class.
Definition: TCanvas.h:41
void PrepareTrainingAndTestTree(const TCut &cut, const TString &splitOpt)
Definition: DataLoader.cxx:580
virtual void CheckSetup()
check may be overridden by derived class (sometimes, eg, fitters are used which can only be implement...
Definition: MethodBase.cxx:430
#define ClassImp(name)
Definition: Rtypes.h:279
double Double_t
Definition: RtypesCore.h:55
TLegendEntry * AddEntry(const TObject *obj, const char *label="", Option_t *option="lpf")
Add a new entry to this legend.
Definition: TLegend.cxx:280
virtual void PrintHelpMessage() const =0
virtual Double_t GetSeparation(TH1 *, TH1 *) const
compute "separation" defined as <s2> = (1/2) Int_-oo..+oo { (S(x) - B(x))^2/(S(x) + B(x)) dx } ...
const TMatrixD * GetCorrelationMatrix(const TMatrixD *covMat)
turns covariance into correlation matrix
Definition: Tools.cxx:337
void SetFile(TFile *file)
Definition: MethodBase.h:371
Double_t y[n]
Definition: legend1.C:17
static void DestroyInstance()
static function: destroy TMVA instance
Definition: Config.cxx:81
void SetCurrentType(Types::ETreeType type) const
Definition: DataSet.h:114
Bool_t Contains(const char *pat, ECaseCompare cmp=kExact) const
Definition: TString.h:567
UInt_t GetEntries(const TString &name) const
void UsefulSortAscending(std::vector< std::vector< Double_t > > &, std::vector< TString > *vs=0)
sort 2D vector (AND in parallel a TString vector) in such a way that the "first vector is sorted" and...
Definition: Tools.cxx:547
VariableInfo & GetVariableInfo(Int_t i)
Definition: DataSetInfo.h:114
virtual void SetBinLabel(Int_t bin, const char *label)
Set label for bin.
Definition: TAxis.cxx:808
void AddPreDefVal(const T &)
Definition: Configurable.h:174
void TMVAVersionMessage(MsgLogger &logger)
prints the TMVA release number and date
Definition: Tools.cxx:1324
virtual const char * GetName() const
Returns name of object.
Definition: DataSetInfo.h:85
void ProcessSetup()
process all options the "CheckForUnusedOptions" is done in an independent call, since it may be overr...
Definition: MethodBase.cxx:420
void PrintVariableRanking() const
prints ranking of input variables
const TString & GetOptions() const
Definition: Configurable.h:90
virtual TObject * Clone(const char *newname="") const
Make a clone of an object using the Streamer facility.
Definition: TNamed.cxx:65
void FormattedOutput(const std::vector< Double_t > &, const std::vector< TString > &, const TString titleVars, const TString titleValues, MsgLogger &logger, TString format="%+1.3f")
formatted output of simple table
Definition: Tools.cxx:896
const TString & Color(const TString &)
human readable color strings
Definition: Tools.cxx:837
void SetUseColor(Bool_t uc)
Definition: Config.h:61
void SetConfigName(const char *n)
Definition: Configurable.h:69
void SetSource(const std::string &source)
Definition: MsgLogger.h:74
TGraph * GetROCCurve(DataLoader *loader, TString theMethodName, Bool_t fLegend=kTRUE)
Definition: Factory.cxx:687
#define VIBITS
Definition: Factory.cxx:105
void SetTitleXOffset(Float_t offset=1)
Definition: TStyle.h:398
void PrintHelpMessage(const TString &datasetname, const TString &methodTitle="") const
Print predefined help message of classifier iterate over methods and test.
Definition: Factory.cxx:1017
TGraph * GetROCCurve(const UInt_t points=100)
Definition: ROCCurve.cxx:120
virtual void TestRegression(Double_t &bias, Double_t &biasT, Double_t &dev, Double_t &devT, Double_t &rms, Double_t &rmsT, Double_t &mInf, Double_t &mInfT, Double_t &corr, Types::ETreeType type)
calculate <sum-of-deviation-squared> of regression output versus "true" value from test sample ...
Definition: MethodBase.cxx:960
DataSetManager * fDataSetManager
Definition: MethodBoost.h:192
virtual Bool_t cd(const char *path=0)
Change current directory to "this" directory.
Definition: TDirectory.cxx:435
const TString & GetSplitOptions() const
Definition: DataSetInfo.h:185
Factory(TString theJobName, TFile *theTargetFile, TString theOption="")
standard constructor jobname : this name will appear in all weight file names produced by the MVAs th...
Definition: Factory.cxx:116
DataInputHandler & DataInput()
TString GetMethodTypeName() const
Definition: MethodBase.h:328
void SetSilent(Bool_t s)
Definition: Config.h:64
const TCut & GetCut(Int_t i) const
Definition: DataSetInfo.h:167
Int_t IsNaN(Double_t x)
Definition: TMath.h:613
void SetWeightFileDir(TString fileDir)
set directory of weight file
TString fJobName
used in contructor wihtout file
Definition: Factory.h:205
Double_t GetSignalReferenceCut() const
Definition: MethodBase.h:356
A Graph is a graphics object made of two arrays X and Y with npoints each.
Definition: TGraph.h:53
void DeleteAllMethods(void)
delete methods
Definition: Factory.cxx:311
void SetOptStat(Int_t stat=1)
The type of information printed in the histogram statistics box can be selected via the parameter mod...
Definition: TStyle.cxx:1257
#define NULL
Definition: Rtypes.h:82
std::vector< TString > SplitString(const TString &theOpt, const char separator) const
splits the option string at &#39;separator&#39; and fills the list &#39;splitV&#39; with the primitive strings ...
Definition: Tools.cxx:1207
Long64_t GetNEvents(Types::ETreeType type=Types::kMaxTreeType) const
Definition: DataSet.h:229
virtual Double_t GetTrainingEfficiency(const TString &)
Bool_t IsSignal(const Event *ev) const
Bool_t IsSilentFile()
Definition: Factory.cxx:278
Types::EAnalysisType GetAnalysisType() const
Definition: MethodBase.h:433
Types::EAnalysisType fAnalysisType
jobname, used as extension in weight file names
Definition: Factory.h:207
void TMVACitation(MsgLogger &logger, ECitation citType=kPlainText)
kinds of TMVA citation
Definition: Tools.cxx:1449
std::vector< IMethod * > MVector
Definition: Factory.h:90
virtual const char * GetName() const
Returns name of object.
Definition: Factory.h:102
static void EnableOutput()
Definition: MsgLogger.cxx:70
virtual void MakeClass(const TString &datasetname, const TString &methodTitle="") const
Definition: Factory.cxx:989
const TString & GetTestvarName() const
Definition: MethodBase.h:331
IMethod * GetMethod(const TString &datasetname, const TString &title) const
Definition: Factory.cxx:481
const Bool_t kTRUE
Definition: Rtypes.h:91
virtual void SetTitle(const char *title="")
Set the title of the TNamed.
Definition: TNamed.cxx:155
void SetTestvarName(const TString &v="")
Definition: MethodBase.h:337
DataSet * GetDataSet() const
returns data set
Types::EMVA GetMethodType() const
Definition: MethodBase.h:329
void CheckForUnusedOptions() const
checks for unused options in option string
const Int_t n
Definition: legend1.C:16
virtual void TestClassification()
initialization
const Event * GetEvent() const
Definition: DataSet.cxx:211
virtual void SetAnalysisType(Types::EAnalysisType type)
Definition: MethodBase.h:432
char name[80]
Definition: TGX11.cxx:109
double log(double)
TAxis * GetXaxis()
Definition: TH1.h:324
void SetConfigDescription(const char *d)
Definition: Configurable.h:70
Bool_t fVerbose
List of transformations to test.
Definition: Factory.h:200
const char * Data() const
Definition: TString.h:349