ROOT  6.06/09
Reference Guide
MethodBase.h
Go to the documentation of this file.
1 // @(#)root/tmva $Id$
2 // Author: Andreas Hoecker, Peter Speckmayer, Joerg Stelzer, Helge Voss, Kai Voss, Eckhard von Toerne, Jan Therhaag
3 
4 /**********************************************************************************
5  * Project: TMVA - a Root-integrated toolkit for multivariate data analysis *
6  * Package: TMVA *
7  * Class : MethodBase *
8  * Web : http://tmva.sourceforge.net *
9  * *
10  * Description: *
11  * Virtual base class for all MVA method *
12  * *
13  * Authors (alphabetical): *
14  * Andreas Hoecker <Andreas.Hocker@cern.ch> - CERN, Switzerland *
15  * Peter Speckmayer <peter.speckmayer@cern.ch> - CERN, Switzerland *
16  * Joerg Stelzer <Joerg.Stelzer@cern.ch> - CERN, Switzerland *
17  * Jan Therhaag <Jan.Therhaag@cern.ch> - U of Bonn, Germany *
18  * Eckhard v. Toerne <evt@uni-bonn.de> - U of Bonn, Germany *
19  * Helge Voss <Helge.Voss@cern.ch> - MPI-K Heidelberg, Germany *
20  * Kai Voss <Kai.Voss@cern.ch> - U. of Victoria, Canada *
21  * *
22  * Copyright (c) 2005-2011: *
23  * CERN, Switzerland *
24  * U. of Victoria, Canada *
25  * MPI-K Heidelberg, Germany *
26  * U. of Bonn, Germany *
27  * *
28  * Redistribution and use in source and binary forms, with or without *
29  * modification, are permitted according to the terms listed in LICENSE *
30  * (http://tmva.sourceforge.net/LICENSE) *
31  **********************************************************************************/
32 
33 #ifndef ROOT_TMVA_MethodBase
34 #define ROOT_TMVA_MethodBase
35 
36 //////////////////////////////////////////////////////////////////////////
37 // //
38 // MethodBase //
39 // //
40 // Virtual base class for all TMVA method //
41 // //
42 //////////////////////////////////////////////////////////////////////////
43 
44 #include <iosfwd>
45 #include <vector>
46 #include <map>
47 #include "assert.h"
48 
49 #ifndef ROOT_TString
50 #include "TString.h"
51 #endif
52 
53 #ifndef ROOT_TMVA_IMethod
54 #include "TMVA/IMethod.h"
55 #endif
56 #ifndef ROOT_TMVA_Configurable
57 #include "TMVA/Configurable.h"
58 #endif
59 #ifndef ROOT_TMVA_Types
60 #include "TMVA/Types.h"
61 #endif
62 #ifndef ROOT_TMVA_DataSet
63 #include "TMVA/DataSet.h"
64 #endif
65 #ifndef ROOT_TMVA_Event
66 #include "TMVA/Event.h"
67 #endif
68 #ifndef ROOT_TMVA_TransformationHandler
70 #endif
71 #ifndef ROOT_TMVA_OptimizeConfigParameters
73 #endif
74 
75 class TGraph;
76 class TTree;
77 class TDirectory;
78 class TSpline;
79 class TH1F;
80 class TH1D;
81 
82 namespace TMVA {
83 
84  class Ranking;
85  class PDF;
86  class TSpline1;
87  class MethodCuts;
88  class MethodBoost;
89  class DataSetInfo;
90 
91  class MethodBase : virtual public IMethod, public Configurable {
92 
93  friend class Factory;
94 
95  public:
96 
98 
99  // default constructur
100  MethodBase( const TString& jobName,
101  Types::EMVA methodType,
102  const TString& methodTitle,
103  DataSetInfo& dsi,
104  const TString& theOption = "",
105  TDirectory* theBaseDir = 0 );
106 
107  // constructor used for Testing + Application of the MVA, only (no training),
108  // using given weight file
109  MethodBase( Types::EMVA methodType,
110  DataSetInfo& dsi,
111  const TString& weightFile,
112  TDirectory* theBaseDir = 0 );
113 
114  // default destructur
115  virtual ~MethodBase();
116 
117  // declaration, processing and checking of configuration options
118  void SetupMethod();
119  void ProcessSetup();
120  virtual void CheckSetup(); // may be overwritten by derived classes
121 
122  // ---------- main training and testing methods ------------------------------
123 
124  // prepare tree branch with the method's discriminating variable
125  void AddOutput( Types::ETreeType type, Types::EAnalysisType analysisType );
126 
127  // performs classifier training
128  // calls methods Train() implemented by derived classes
129  void TrainMethod();
130 
131  // optimize tuning parameters
132  virtual std::map<TString,Double_t> OptimizeTuningParameters(TString fomType="ROCIntegral", TString fitType="FitGA");
133  virtual void SetTuneParameters(std::map<TString,Double_t> tuneParameters);
134 
135  virtual void Train() = 0;
136 
137  // store and retrieve time used for training
138  void SetTrainTime( Double_t trainTime ) { fTrainTime = trainTime; }
139  Double_t GetTrainTime() const { return fTrainTime; }
140 
141  // store and retrieve time used for testing
142  void SetTestTime ( Double_t testTime ) { fTestTime = testTime; }
143  Double_t GetTestTime () const { return fTestTime; }
144 
145  // performs classifier testing
146  virtual void TestClassification();
147  virtual Double_t GetKSTrainingVsTest(Char_t SorB, TString opt="X");
148 
149  // performs multiclass classifier testing
150  virtual void TestMulticlass();
151 
152  // performs regression testing
153  virtual void TestRegression( Double_t& bias, Double_t& biasT,
154  Double_t& dev, Double_t& devT,
155  Double_t& rms, Double_t& rmsT,
156  Double_t& mInf, Double_t& mInfT, // mutual information
157  Double_t& corr,
159 
160  // options treatment
161  virtual void Init() = 0;
162  virtual void DeclareOptions() = 0;
163  virtual void ProcessOptions() = 0;
164  virtual void DeclareCompatibilityOptions(); // declaration of past options
165 
166  // reset the Method --> As if it was not yet trained, just instantiated
167  // virtual void Reset() = 0;
168  //for the moment, I provide a dummy (that would not work) default, just to make
169  // compilation/running w/o parameter optimisation still possible
170  virtual void Reset(){return;}
171 
172  // classifier response:
173  // some methods may return a per-event error estimate
174  // error calculation is skipped if err==0
175  virtual Double_t GetMvaValue( Double_t* errLower = 0, Double_t* errUpper = 0) = 0;
176 
177  // signal/background classification response
178  Double_t GetMvaValue( const TMVA::Event* const ev, Double_t* err = 0, Double_t* errUpper = 0 );
179 
180  protected:
181  // helper function to set errors to -1
182  void NoErrorCalc(Double_t* const err, Double_t* const errUpper);
183 
184  public:
185  // regression response
186  const std::vector<Float_t>& GetRegressionValues(const TMVA::Event* const ev){
187  fTmpEvent = ev;
188  const std::vector<Float_t>* ptr = &GetRegressionValues();
189  fTmpEvent = 0;
190  return (*ptr);
191  }
192 
193  virtual const std::vector<Float_t>& GetRegressionValues() {
194  std::vector<Float_t>* ptr = new std::vector<Float_t>(0);
195  return (*ptr);
196  }
197 
198  // multiclass classification response
199  virtual const std::vector<Float_t>& GetMulticlassValues() {
200  std::vector<Float_t>* ptr = new std::vector<Float_t>(0);
201  return (*ptr);
202  }
203 
204  // probability of classifier response (mvaval) to be signal (requires "CreateMvaPdf" option set)
205  virtual Double_t GetProba( const Event *ev); // the simple one, automatically calcualtes the mvaVal and uses the SAME sig/bkg ratio as given in the training sample (typically 50/50 .. (NormMode=EqualNumEvents) but can be different)
206  virtual Double_t GetProba( Double_t mvaVal, Double_t ap_sig );
207 
208  // Rarity of classifier response (signal or background (default) is uniform in [0,1])
209  virtual Double_t GetRarity( Double_t mvaVal, Types::ESBType reftype = Types::kBackground ) const;
210 
211  // create ranking
212  virtual const Ranking* CreateRanking() = 0;
213 
214  // make ROOT-independent C++ class
215  virtual void MakeClass( const TString& classFileName = TString("") ) const;
216 
217  // print help message
218  void PrintHelpMessage() const;
219 
220  //
221  // streamer methods for training information (creates "weight" files) --------
222  //
223  public:
224  void WriteStateToFile () const;
225  void ReadStateFromFile ();
226 
227  protected:
228  // the actual "weights"
229  virtual void AddWeightsXMLTo ( void* parent ) const = 0;
230  virtual void ReadWeightsFromXML ( void* wghtnode ) = 0;
231  virtual void ReadWeightsFromStream( std::istream& ) = 0; // backward compatibility
232  virtual void ReadWeightsFromStream( TFile& ) {} // backward compatibility
233 
234  private:
235  friend class MethodCategory;
236  friend class MethodCompositeBase;
237  void WriteStateToXML ( void* parent ) const;
238  void ReadStateFromXML ( void* parent );
239  void WriteStateToStream ( std::ostream& tf ) const; // needed for MakeClass
240  void WriteVarsToStream ( std::ostream& tf, const TString& prefix = "" ) const; // needed for MakeClass
241 
242 
243  public: // these two need to be public, they are used to read in-memory weight-files
244  void ReadStateFromStream ( std::istream& tf ); // backward compatibility
245  void ReadStateFromStream ( TFile& rf ); // backward compatibility
246  void ReadStateFromXMLString( const char* xmlstr ); // for reading from memory
247 
248  private:
249  // the variable information
250  void AddVarsXMLTo ( void* parent ) const;
251  void AddSpectatorsXMLTo ( void* parent ) const;
252  void AddTargetsXMLTo ( void* parent ) const;
253  void AddClassesXMLTo ( void* parent ) const;
254  void ReadVariablesFromXML ( void* varnode );
255  void ReadSpectatorsFromXML( void* specnode);
256  void ReadTargetsFromXML ( void* tarnode );
257  void ReadClassesFromXML ( void* clsnode );
258  void ReadVarsFromStream ( std::istream& istr ); // backward compatibility
259 
260  public:
261  // ---------------------------------------------------------------------------
262 
263  // write evaluation histograms into target file
264  virtual void WriteEvaluationHistosToFile(Types::ETreeType treetype);
265 
266  // write classifier-specific monitoring information to target file
267  virtual void WriteMonitoringHistosToFile() const;
268 
269  // ---------- public evaluation methods --------------------------------------
270 
271  // individual initialistion for testing of each method
272  // overload this one for individual initialisation of the testing,
273  // it is then called automatically within the global "TestInit"
274 
275  // variables (and private menber functions) for the Evaluation:
276  // get the effiency. It fills a histogram for efficiency/vs/bkg
277  // and returns the one value fo the efficiency demanded for
278  // in the TString argument. (Watch the string format)
279  virtual Double_t GetEfficiency( const TString&, Types::ETreeType, Double_t& err );
280  virtual Double_t GetTrainingEfficiency(const TString& );
281  virtual std::vector<Float_t> GetMulticlassEfficiency( std::vector<std::vector<Float_t> >& purity );
282  virtual std::vector<Float_t> GetMulticlassTrainingEfficiency(std::vector<std::vector<Float_t> >& purity );
283  virtual Double_t GetSignificance() const;
284  virtual Double_t GetROCIntegral(TH1D *histS, TH1D *histB) const;
285  virtual Double_t GetROCIntegral(PDF *pdfS=0, PDF *pdfB=0) const;
286  virtual Double_t GetMaximumSignificance( Double_t SignalEvents, Double_t BackgroundEvents,
287  Double_t& optimal_significance_value ) const;
288  virtual Double_t GetSeparation( TH1*, TH1* ) const;
289  virtual Double_t GetSeparation( PDF* pdfS = 0, PDF* pdfB = 0 ) const;
290 
291  virtual void GetRegressionDeviation(UInt_t tgtNum, Types::ETreeType type, Double_t& stddev,Double_t& stddev90Percent ) const;
292  // ---------- public accessors -----------------------------------------------
293 
294  // classifier naming (a lot of names ... aren't they ;-)
295  const TString& GetJobName () const { return fJobName; }
296  const TString& GetMethodName () const { return fMethodName; }
298  Types::EMVA GetMethodType () const { return fMethodType; }
299  const char* GetName () const { return fMethodName.Data(); }
300  const TString& GetTestvarName () const { return fTestvar; }
301  const TString GetProbaName () const { return fTestvar + "_Proba"; }
302  TString GetWeightFileName() const;
303 
304  // build classifier name in Test tree
305  // MVA prefix (e.g., "TMVA_")
306  void SetTestvarName ( const TString & v="" ) { fTestvar = (v=="") ? ("MVA_" + GetMethodName()) : v; }
307 
308  // number of input variable used by classifier
309  UInt_t GetNvar() const { return DataInfo().GetNVariables(); }
310  UInt_t GetNVariables() const { return DataInfo().GetNVariables(); }
311  UInt_t GetNTargets() const { return DataInfo().GetNTargets(); };
312 
313  // internal names and expressions of input variables
314  const TString& GetInputVar ( Int_t i ) const { return DataInfo().GetVariableInfo(i).GetInternalName(); }
315  const TString& GetInputLabel( Int_t i ) const { return DataInfo().GetVariableInfo(i).GetLabel(); }
316  const TString& GetInputTitle( Int_t i ) const { return DataInfo().GetVariableInfo(i).GetTitle(); }
317 
318  // normalisation and limit accessors
319  Double_t GetMean( Int_t ivar ) const { return GetTransformationHandler().GetMean(ivar); }
320  Double_t GetRMS ( Int_t ivar ) const { return GetTransformationHandler().GetRMS(ivar); }
321  Double_t GetXmin( Int_t ivar ) const { return GetTransformationHandler().GetMin(ivar); }
322  Double_t GetXmax( Int_t ivar ) const { return GetTransformationHandler().GetMax(ivar); }
323 
324  // sets the minimum requirement on the MVA output to declare an event signal-like
327 
328  // sets the minimum requirement on the MVA output to declare an event signal-like
330  void SetSignalReferenceCutOrientation( Double_t cutOrientation ) { fSignalReferenceCutOrientation = cutOrientation; }
331 
332  // pointers to ROOT directories
333  TDirectory* BaseDir() const;
334  TDirectory* MethodBaseDir() const;
335  void SetMethodDir ( TDirectory* methodDir ) { fBaseDir = fMethodBaseDir = methodDir; }
336  void SetBaseDir( TDirectory* methodDir ){ fBaseDir = methodDir; }
337  void SetMethodBaseDir( TDirectory* methodDir ){ fMethodBaseDir = methodDir; }
338 
339  // the TMVA version can be obtained and checked using
340  // if (GetTrainingTMVAVersionCode()>TMVA_VERSION(3,7,2)) {...}
341  // or
342  // if (GetTrainingROOTVersionCode()>ROOT_VERSION(5,15,5)) {...}
347 
349  {
350  if(fTransformationPointer && takeReroutedIfAvailable) return *fTransformationPointer; else return fTransformation;
351  }
352  const TransformationHandler& GetTransformationHandler(Bool_t takeReroutedIfAvailable=true) const
353  {
354  if(fTransformationPointer && takeReroutedIfAvailable) return *fTransformationPointer; else return fTransformation;
355  }
356 
357  void RerouteTransformationHandler (TransformationHandler* fTargetTransformation) { fTransformationPointer=fTargetTransformation; }
358 
359  // ---------- event accessors ------------------------------------------------
360 
361  // returns reference to data set
362  // NOTE: this DataSet is the "original" dataset, i.e. the one seen by ALL Classifiers WITHOUT transformation
363  DataSet* Data() const { return DataInfo().GetDataSet(); }
364  DataSetInfo& DataInfo() const { return fDataSetInfo; }
365 
366  mutable const Event* fTmpEvent; //! temporary event when testing on a different DataSet than the own one
367 
368  // event reference and update
369  // NOTE: these Event accessors make sure that you get the events transformed according to the
370  // particular clasifiers transformation chosen
371  UInt_t GetNEvents () const { return Data()->GetNEvents(); }
372  const Event* GetEvent () const;
373  const Event* GetEvent ( const TMVA::Event* ev ) const;
374  const Event* GetEvent ( Long64_t ievt ) const;
375  const Event* GetEvent ( Long64_t ievt , Types::ETreeType type ) const;
376  const Event* GetTrainingEvent( Long64_t ievt ) const;
377  const Event* GetTestingEvent ( Long64_t ievt ) const;
378  const std::vector<TMVA::Event*>& GetEventCollection( Types::ETreeType type );
379 
380  // ---------- public auxiliary methods ---------------------------------------
381 
382  // this method is used to decide whether an event is signal- or background-like
383  // the reference cut "xC" is taken to be where
384  // Int_[-oo,xC] { PDF_S(x) dx } = Int_[xC,+oo] { PDF_B(x) dx }
385  virtual Bool_t IsSignalLike();
386  virtual Bool_t IsSignalLike(Double_t mvaVal);
387 
388 
389  Bool_t HasMVAPdfs() const { return fHasMVAPdfs; }
394 
395  // setter method for suppressing writing to XML and writing of standalone classes
397 
398  protected:
399 
400  // ---------- protected acccessors -------------------------------------------
401 
402  //TDirectory* LocalTDir() const { return Data().LocalRootDir(); }
403 
404  // weight file name and directory (given by global config variable)
405  void SetWeightFileName( TString );
406 
407  const TString& GetWeightFileDir() const { return fFileDir; }
408  void SetWeightFileDir( TString fileDir );
409 
410  // are input variables normalised ?
411  Bool_t IsNormalised() const { return fNormalise; }
413 
414  // set number of input variables (only used by MethodCuts, could perhaps be removed)
415  // void SetNvar( Int_t n ) { fNvar = n; }
416 
417  // verbose and help flags
418  Bool_t Verbose() const { return fVerbose; }
419  Bool_t Help () const { return fHelp; }
420 
421  // ---------- protected event and tree accessors -----------------------------
422 
423  // names of input variables (if the original names are expressions, they are
424  // transformed into regexps)
425  const TString& GetInternalVarName( Int_t ivar ) const { return (*fInputVars)[ivar]; }
426  const TString& GetOriginalVarName( Int_t ivar ) const { return DataInfo().GetVariableInfo(ivar).GetExpression(); }
427 
428  Bool_t HasTrainingTree() const { return Data()->GetNTrainingEvents() != 0; }
429 
430  // ---------- protected auxiliary methods ------------------------------------
431 
432  protected:
433 
434  // make ROOT-independent C++ class for classifier response (classifier-specific implementation)
435  virtual void MakeClassSpecific( std::ostream&, const TString& = "" ) const {}
436 
437  // header and auxiliary classes
438  virtual void MakeClassSpecificHeader( std::ostream&, const TString& = "" ) const {}
439 
440  // static pointer to this object - required for ROOT finder (to be solved differently)
441  static MethodBase* GetThisBase();
442 
443  // some basic statistical analysis
444  void Statistics( Types::ETreeType treeType, const TString& theVarName,
446  Double_t&, Double_t&, Double_t& );
447 
448  // if TRUE, write weights only to text files
449  Bool_t TxtWeightsOnly() const { return kTRUE; }
450 
451  protected:
452 
453  // access to event information that needs method-specific information
454 
456 
457 
458  private:
459 
460  // ---------- private definitions --------------------------------------------
461  // Initialisation
462  void InitBase();
463  void DeclareBaseOptions();
464  void ProcessBaseOptions();
465 
466  // used in efficiency computation
467  enum ECutOrientation { kNegative = -1, kPositive = +1 };
469 
470  // ---------- private acccessors ---------------------------------------------
471 
472  // reset required for RootFinder
473  void ResetThisBase();
474 
475  // ---------- private auxiliary methods --------------------------------------
476 
477  // PDFs for classifier response (required to compute signal probability and Rarity)
478  void CreateMVAPdfs();
479 
480  // for root finder
481  static Double_t IGetEffForRoot( Double_t ); // interface
482  Double_t GetEffForRoot ( Double_t ); // implementation
483 
484  // used for file parsing
485  Bool_t GetLine( std::istream& fin, char * buf );
486 
487  // fill test tree with classification or regression results
488  virtual void AddClassifierOutput ( Types::ETreeType type );
490  virtual void AddRegressionOutput ( Types::ETreeType type );
491  virtual void AddMulticlassOutput ( Types::ETreeType type );
492 
493  private:
494 
495  void AddInfoItem( void* gi, const TString& name,
496  const TString& value) const;
497 
498  static void CreateVariableTransforms(const TString& trafoDefinition,
499  TMVA::DataSetInfo& dataInfo,
500  TMVA::TransformationHandler& transformationHandler,
501  TMVA::MsgLogger& log );
502 
503 
504  // ========== class members ==================================================
505 
506  protected:
507 
508  // direct accessors
509  Ranking* fRanking; // pointer to ranking object (created by derived classifiers)
510  std::vector<TString>* fInputVars; // vector of input variables used in MVA
511 
512  // histogram binning
513  Int_t fNbins; // number of bins in input variable histograms
514  Int_t fNbinsMVAoutput; // number of bins in MVA output histograms
515  Int_t fNbinsH; // number of bins in evaluation histograms
516 
517  Types::EAnalysisType fAnalysisType; // method-mode : true --> regression, false --> classification
518 
519  std::vector<Float_t>* fRegressionReturnVal; // holds the return-values for the regression
520  std::vector<Float_t>* fMulticlassReturnVal; // holds the return-values for the multiclass classification
521 
522  private:
523 
524  // MethodCuts redefines some of the evaluation variables and histograms -> must access private members
525  friend class MethodCuts;
526 
527  Bool_t fDisableWriting; //! set to true in order to suppress writing to XML
528 
529  // data sets
530  DataSetInfo& fDataSetInfo; //! the data set information (sometimes needed)
531 
532  Double_t fSignalReferenceCut; // minimum requirement on the MVA output to declare an event signal-like
533  Double_t fSignalReferenceCutOrientation; // minimum requirement on the MVA output to declare an event signal-like
534  Types::ESBType fVariableTransformType; // this is the event type (sig or bgd) assumed for variable transform
535 
536  // naming and versioning
537  TString fJobName; // name of job -> user defined, appears in weight files
538  TString fMethodName; // name of the method (set in derived class)
539  Types::EMVA fMethodType; // type of method (set in derived class)
540  TString fTestvar; // variable used in evaluation, etc (mostly the MVA)
541  UInt_t fTMVATrainingVersion; // TMVA version used for training
542  UInt_t fROOTTrainingVersion; // ROOT version used for training
543  Bool_t fConstructedFromWeightFile; // is it obtained from weight file?
544 
545  // Directory structure: fMethodBaseDir/fBaseDir
546  // where the first directory name is defined by the method type
547  // and the second is user supplied (the title given in Factory::BookMethod())
548  TDirectory* fBaseDir; // base directory for the instance, needed to know where to jump back from localDir
549  mutable TDirectory* fMethodBaseDir; // base directory for the method
550 
551  TString fParentDir; // method parent name, like booster name
552 
553  TString fFileDir; // unix sub-directory for weight files (default: "weights")
554  TString fWeightFile; // weight file name
555 
556  private:
557 
558  TH1* fEffS; // efficiency histogram for rootfinder
559 
560  PDF* fDefaultPDF; // default PDF definitions
561  PDF* fMVAPdfS; // signal MVA PDF
562  PDF* fMVAPdfB; // background MVA PDF
563 
564  // TH1D* fmvaS; // PDFs of MVA distribution (signal)
565  // TH1D* fmvaB; // PDFs of MVA distribution (background)
566  PDF* fSplS; // PDFs of MVA distribution (signal)
567  PDF* fSplB; // PDFs of MVA distribution (background)
568  TSpline* fSpleffBvsS; // splines for signal eff. versus background eff.
569 
570  PDF* fSplTrainS; // PDFs of training MVA distribution (signal)
571  PDF* fSplTrainB; // PDFs of training MVA distribution (background)
572  TSpline* fSplTrainEffBvsS; // splines for training signal eff. versus background eff.
573 
574  private:
575 
576  // basic statistics quantities of MVA
577  Double_t fMeanS; // mean (signal)
578  Double_t fMeanB; // mean (background)
579  Double_t fRmsS; // RMS (signal)
580  Double_t fRmsB; // RMS (background)
581  Double_t fXmin; // minimum (signal and background)
582  Double_t fXmax; // maximum (signal and background)
583 
584  // variable preprocessing
585  TString fVarTransformString; // labels variable transform method
586 
587  TransformationHandler* fTransformationPointer; // pointer to the rest of transformations
588  TransformationHandler fTransformation; // the list of transformations
589 
590 
591  // help and verbosity
592  Bool_t fVerbose; // verbose flag
593  TString fVerbosityLevelString; // verbosity level (user input string)
594  EMsgType fVerbosityLevel; // verbosity level
595  Bool_t fHelp; // help flag
596  Bool_t fHasMVAPdfs; // MVA Pdfs are created for this classifier
597 
598  Bool_t fIgnoreNegWeightsInTraining;// If true, events with negative weights are not used in training
599 
600  protected:
601 
603 
604  // for signal/background
605  UInt_t fSignalClass; // index of the Signal-class
606  UInt_t fBackgroundClass; // index of the Background-class
607 
608  private:
609 
610  // timing variables
611  Double_t fTrainTime; // for timing measurements
612  Double_t fTestTime; // for timing measurements
613 
614  // orientation of cut: depends on signal and background mean values
615  ECutOrientation fCutOrientation; // +1 if Sig>Bkg, -1 otherwise
616 
617  // for root finder
618  TSpline1* fSplRefS; // helper splines for RootFinder (signal)
619  TSpline1* fSplRefB; // helper splines for RootFinder (background)
620 
621  TSpline1* fSplTrainRefS; // helper splines for RootFinder (signal)
622  TSpline1* fSplTrainRefB; // helper splines for RootFinder (background)
623 
624  mutable std::vector<const std::vector<TMVA::Event*>*> fEventCollections; // if the method needs the complete event-collection, the transformed event coll. ist stored here.
625 
626  public:
627  Bool_t fSetupCompleted; // is method setup
628 
629  private:
630 
631  // This is a workaround for OSx where static thread_local data members are
632  // not supported. The C++ solution would indeed be the following:
633  static MethodBase*& GetThisBaseThreadLocal() {TTHREAD_TLS(MethodBase*) fgThisBase(nullptr); return fgThisBase; };
634 
635  // ===== depreciated options, kept for backward compatibility =====
636  private:
637 
638  Bool_t fNormalise; // normalise input variables
639  Bool_t fUseDecorr; // synonymous for decorrelation
640  TString fVariableTransformTypeString; // labels variable transform type
641  Bool_t fTxtWeightsOnly; // if TRUE, write weights only to text files
642  Int_t fNbinsMVAPdf; // number of bins used in histogram that creates PDF
643  Int_t fNsmoothMVAPdf; // number of times a histogram is smoothed before creating the PDF
644 
645  protected:
646 
647  ClassDef(MethodBase,0) // Virtual base class for all TMVA method
648 
649  };
650 } // namespace TMVA
651 
652 
653 
654 
655 
656 
657 
658 // ========== INLINE FUNCTIONS =========================================================
659 
660 
661 //_______________________________________________________________________
662 inline const TMVA::Event* TMVA::MethodBase::GetEvent( const TMVA::Event* ev ) const
663 {
664  return GetTransformationHandler().Transform(ev);
665 }
666 
668 {
669  if(fTmpEvent)
670  return GetTransformationHandler().Transform(fTmpEvent);
671  else
672  return GetTransformationHandler().Transform(Data()->GetEvent());
673 }
674 
676 {
677  assert(fTmpEvent==0);
678  return GetTransformationHandler().Transform(Data()->GetEvent(ievt));
679 }
680 
682 {
683  assert(fTmpEvent==0);
684  return GetTransformationHandler().Transform(Data()->GetEvent(ievt, type));
685 }
686 
688 {
689  assert(fTmpEvent==0);
690  return GetEvent(ievt, Types::kTraining);
691 }
692 
694 {
695  assert(fTmpEvent==0);
696  return GetEvent(ievt, Types::kTesting);
697 }
698 
699 #endif
virtual void DeclareOptions()=0
Types::EAnalysisType fAnalysisType
Definition: MethodBase.h:517
TString fMethodName
Definition: MethodBase.h:538
virtual void AddClassifierOutputProb(Types::ETreeType type)
prepare tree branch with the method's discriminating variable
Definition: MethodBase.cxx:890
virtual void SetTuneParameters(std::map< TString, Double_t > tuneParameters)
set the tuning parameters accoding to the argument This is just a dummy .
Definition: MethodBase.cxx:647
virtual void ReadWeightsFromStream(TFile &)
Definition: MethodBase.h:232
const TString & GetWeightFileDir() const
Definition: MethodBase.h:407
virtual const std::vector< Float_t > & GetMulticlassValues()
Definition: MethodBase.h:199
void AddInfoItem(void *gi, const TString &name, const TString &value) const
xml writing
Bool_t GetLine(std::istream &fin, char *buf)
reads one line from the input stream checks for certain keywords and interprets the line if keywords ...
long long Long64_t
Definition: RtypesCore.h:69
TString GetTrainingROOTVersionString() const
calculates the ROOT version string from the training version code on the fly
void AddOutput(Types::ETreeType type, Types::EAnalysisType analysisType)
static void CreateVariableTransforms(const TString &trafoDefinition, TMVA::DataSetInfo &dataInfo, TMVA::TransformationHandler &transformationHandler, TMVA::MsgLogger &log)
create variable transformations
Definition: MethodBase.cxx:481
Bool_t fIgnoreNegWeightsInTraining
Definition: MethodBase.h:598
void ReadStateFromXML(void *parent)
virtual Double_t GetMvaValue(Double_t *errLower=0, Double_t *errUpper=0)=0
TSpline1 * fSplTrainRefS
Definition: MethodBase.h:621
const TString & GetExpression() const
Definition: VariableInfo.h:62
#define assert(cond)
Definition: unittest.h:542
const char * GetName() const
Returns name of object.
Definition: MethodBase.h:299
virtual const Ranking * CreateRanking()=0
Bool_t IgnoreEventsWithNegWeightsInTraining() const
Definition: MethodBase.h:602
virtual void WriteEvaluationHistosToFile(Types::ETreeType treetype)
writes all MVA evaluation histograms to file
TString GetTrainingTMVAVersionString() const
calculates the TMVA version string from the training version code on the fly
virtual std::map< TString, Double_t > OptimizeTuningParameters(TString fomType="ROCIntegral", TString fitType="FitGA")
call the Optimzier with the set of paremeters and ranges that are meant to be tuned.
Definition: MethodBase.cxx:626
TString fWeightFile
Definition: MethodBase.h:554
TString fVariableTransformTypeString
Definition: MethodBase.h:640
void SetMethodBaseDir(TDirectory *methodDir)
Definition: MethodBase.h:337
Base class for spline implementation containing the Draw/Paint methods //.
Definition: TSpline.h:22
TransformationHandler * fTransformationPointer
Definition: MethodBase.h:587
Types::ESBType fVariableTransformType
Definition: MethodBase.h:534
A ROOT file is a suite of consecutive data records (TKey instances) with a well defined format...
Definition: TFile.h:45
const Event * GetTestingEvent(Long64_t ievt) const
Definition: MethodBase.h:693
DataSet * Data() const
Definition: MethodBase.h:363
UInt_t GetNvar() const
Definition: MethodBase.h:309
EAnalysisType
Definition: Types.h:124
const TString & GetOriginalVarName(Int_t ivar) const
Definition: MethodBase.h:426
UInt_t GetNTargets() const
Definition: DataSetInfo.h:129
const std::vector< Float_t > & GetRegressionValues(const TMVA::Event *const ev)
Definition: MethodBase.h:186
virtual Double_t GetMaximumSignificance(Double_t SignalEvents, Double_t BackgroundEvents, Double_t &optimal_significance_value) const
plot significance, S/Sqrt(S^2 + B^2), curve for given number of signal and background events; returns...
void SetSignalReferenceCutOrientation(Double_t cutOrientation)
Definition: MethodBase.h:330
virtual const std::vector< Float_t > & GetRegressionValues()
Definition: MethodBase.h:193
Bool_t IsNormalised() const
Definition: MethodBase.h:411
Basic string class.
Definition: TString.h:137
1-D histogram with a float per channel (see TH1 documentation)}
Definition: TH1.h:570
void SetTrainTime(Double_t trainTime)
Definition: MethodBase.h:138
TransformationHandler & GetTransformationHandler(Bool_t takeReroutedIfAvailable=true)
Definition: MethodBase.h:348
virtual Double_t GetKSTrainingVsTest(Char_t SorB, TString opt="X")
int Int_t
Definition: RtypesCore.h:41
bool Bool_t
Definition: RtypesCore.h:59
MethodBase(const TString &jobName, Types::EMVA methodType, const TString &methodTitle, DataSetInfo &dsi, const TString &theOption="", TDirectory *theBaseDir=0)
standard constructur
Definition: MethodBase.cxx:132
virtual void TestMulticlass()
test multiclass classification
TString fJobName
Definition: MethodBase.h:537
ECutOrientation GetCutOrientation() const
Definition: MethodBase.h:468
TSpline1 * fSplRefB
Definition: MethodBase.h:619
TSpline1 * fSplRefS
Definition: MethodBase.h:618
Bool_t Verbose() const
Definition: MethodBase.h:418
std::vector< TString > * fInputVars
Definition: MethodBase.h:510
void ReadTargetsFromXML(void *tarnode)
read target info from XML
virtual void Init()=0
const TransformationHandler & GetTransformationHandler(Bool_t takeReroutedIfAvailable=true) const
Definition: MethodBase.h:352
Double_t fTrainTime
Definition: MethodBase.h:611
UInt_t GetNVariables() const
Definition: DataSetInfo.h:128
const TString & GetInputVar(Int_t i) const
Definition: MethodBase.h:314
Double_t fTestTime
Definition: MethodBase.h:612
DataSet * GetDataSet() const
returns data set
virtual Bool_t IsSignalLike()
uses a pre-set cut on the MVA output (SetSignalReferenceCut and SetSignalReferenceCutOrientation) for...
Definition: MethodBase.cxx:838
const TString & GetMethodName() const
Definition: MethodBase.h:296
void CreateMVAPdfs()
Create PDFs of the MVA output variables.
Bool_t IsConstructedFromWeightFile() const
Definition: MethodBase.h:455
TString GetWeightFileName() const
retrieve weight file name
void SetMethodDir(TDirectory *methodDir)
Definition: MethodBase.h:335
void ReadVariablesFromXML(void *varnode)
read variable info from XML
const TString & GetInternalName() const
Definition: VariableInfo.h:63
UInt_t fSignalClass
Definition: MethodBase.h:605
const char * Data() const
Definition: TString.h:349
static Types & Instance()
the the single instance of "Types" if existin already, or create it (Signleton)
Definition: Types.cxx:61
Types::EAnalysisType GetAnalysisType() const
Definition: MethodBase.h:391
DataSetInfo & fDataSetInfo
set to true in order to suppress writing to XML
Definition: MethodBase.h:530
#define ClassDef(name, id)
Definition: Rtypes.h:254
ECutOrientation fCutOrientation
Definition: MethodBase.h:615
void WriteStateToStream(std::ostream &tf) const
general method used in writing the header of the weight files where the used variables, variable transformation type etc.
virtual ~MethodBase()
destructor
Definition: MethodBase.cxx:254
UInt_t GetNTargets() const
Definition: MethodBase.h:311
void WriteVarsToStream(std::ostream &tf, const TString &prefix="") const
write the list of variables (name, min, max) for a given data transformation method to the stream ...
void ReadStateFromFile()
Function to write options and weights to file.
virtual void MakeClass(const TString &classFileName=TString("")) const
create reader class for method (classification only at present)
Double_t GetRMS(Int_t ivar, Int_t cls=-1) const
virtual void AddClassifierOutput(Types::ETreeType type)
prepare tree branch with the method's discriminating variable
Definition: MethodBase.cxx:852
Bool_t DoMulticlass() const
Definition: MethodBase.h:393
Double_t fMeanB
Definition: MethodBase.h:578
void ReadClassesFromXML(void *clsnode)
read number of classes from XML
void ResetThisBase()
reset required for RootFinder
std::vector< std::vector< double > > Data
void SetupMethod()
setup of methods
Definition: MethodBase.cxx:295
UInt_t GetNEvents() const
temporary event when testing on a different DataSet than the own one
Definition: MethodBase.h:371
TString fTestvar
Definition: MethodBase.h:540
Types::EMVA GetMethodType() const
Definition: MethodBase.h:298
Double_t GetTestTime() const
Definition: MethodBase.h:143
virtual void ProcessOptions()=0
virtual Double_t GetProba(const Event *ev)
Definition: PDF.h:71
TSpline * fSpleffBvsS
Definition: MethodBase.h:568
virtual Double_t GetEfficiency(const TString &, Types::ETreeType, Double_t &err)
fill background efficiency (resp.
Bool_t HasMVAPdfs() const
Definition: MethodBase.h:389
virtual std::vector< Float_t > GetMulticlassEfficiency(std::vector< std::vector< Float_t > > &purity)
Double_t GetEffForRoot(Double_t)
returns efficiency as function of cut
const Event * Transform(const Event *) const
the transformation
virtual void AddWeightsXMLTo(void *parent) const =0
UInt_t fTMVATrainingVersion
Definition: MethodBase.h:541
TransformationHandler fTransformation
Definition: MethodBase.h:588
void ReadStateFromXMLString(const char *xmlstr)
for reading from memory
UInt_t GetNVariables() const
Definition: MethodBase.h:310
void AddClassesXMLTo(void *parent) const
write class info to XML
Bool_t fDisableWriting
Definition: MethodBase.h:527
virtual void ReadWeightsFromXML(void *wghtnode)=0
Double_t GetMean(Int_t ivar) const
Definition: MethodBase.h:319
Bool_t HasTrainingTree() const
Definition: MethodBase.h:428
const Event * GetTrainingEvent(Long64_t ievt) const
Definition: MethodBase.h:687
virtual void Train()=0
Double_t fRmsB
Definition: MethodBase.h:580
Double_t fXmin
Definition: MethodBase.h:581
TSpline1 * fSplTrainRefB
Definition: MethodBase.h:622
TDirectory * fMethodBaseDir
Definition: MethodBase.h:549
SVector< double, 2 > v
Definition: Dict.h:5
UInt_t fROOTTrainingVersion
Definition: MethodBase.h:542
TString GetMethodName(Types::EMVA method) const
Definition: Types.cxx:127
void ReadVarsFromStream(std::istream &istr)
Read the variables (name, min, max) for a given data transformation method from the stream...
const TString & GetTitle() const
Definition: VariableInfo.h:65
virtual void WriteMonitoringHistosToFile() const
write special monitoring histograms to file dummy implementation here --------------— ...
void Statistics(Types::ETreeType treeType, const TString &theVarName, Double_t &, Double_t &, Double_t &, Double_t &, Double_t &, Double_t &)
calculates rms,mean, xmin, xmax of the event variable this can be either done for the variables as th...
EMsgType
Definition: Types.h:61
Bool_t Help() const
Definition: MethodBase.h:419
unsigned int UInt_t
Definition: RtypesCore.h:42
Double_t fMeanS
Definition: MethodBase.h:577
const Event * GetEvent() const
Definition: MethodBase.h:667
Int_t fNsmoothMVAPdf
Definition: MethodBase.h:643
Bool_t fTxtWeightsOnly
Definition: MethodBase.h:641
TDirectory * fBaseDir
Definition: MethodBase.h:548
virtual Double_t GetSignificance() const
compute significance of mean difference significance = | - |/Sqrt(RMS_S2 + RMS_B2) ...
Bool_t fHasMVAPdfs
Definition: MethodBase.h:596
TSpline * fSplTrainEffBvsS
Definition: MethodBase.h:572
static MethodBase *& GetThisBaseThreadLocal()
Definition: MethodBase.h:633
void DeclareBaseOptions()
define the options (their key words) that can be set in the option string here the options valid for ...
Definition: MethodBase.cxx:396
UInt_t GetTrainingROOTVersionCode() const
Definition: MethodBase.h:344
1-D histogram with a double per channel (see TH1 documentation)}
Definition: TH1.h:613
Double_t GetTrainTime() const
Definition: MethodBase.h:139
void SetNormalised(Bool_t norm)
Definition: MethodBase.h:412
void ProcessBaseOptions()
the option string is decoded, for availabel options see "DeclareOptions"
Definition: MethodBase.cxx:427
Double_t GetRMS(Int_t ivar) const
Definition: MethodBase.h:320
std::vector< const std::vector< TMVA::Event * > * > fEventCollections
Definition: MethodBase.h:624
TString fVerbosityLevelString
Definition: MethodBase.h:593
void WriteStateToFile() const
write options and weights to file note that each one text file for the main configuration information...
Double_t fRmsS
Definition: MethodBase.h:579
UInt_t fBackgroundClass
Definition: MethodBase.h:606
void AddTargetsXMLTo(void *parent) const
write target info to XML
void AddVarsXMLTo(void *parent) const
write variable info to XML
virtual void MakeClassSpecific(std::ostream &, const TString &="") const
Definition: MethodBase.h:435
const std::vector< TMVA::Event * > & GetEventCollection(Types::ETreeType type)
returns the event collection (i.e.
virtual void CheckSetup()
check may be overridden by derived class (sometimes, eg, fitters are used which can only be implement...
Definition: MethodBase.cxx:320
void RerouteTransformationHandler(TransformationHandler *fTargetTransformation)
Definition: MethodBase.h:357
void SetTestTime(Double_t testTime)
Definition: MethodBase.h:142
virtual void AddRegressionOutput(Types::ETreeType type)
prepare tree branch with the method's discriminating variable
Definition: MethodBase.cxx:740
double Double_t
Definition: RtypesCore.h:55
void SetWeightFileName(TString)
set the weight file name (depreciated)
EMsgType fVerbosityLevel
Definition: MethodBase.h:594
Describe directory structure in memory.
Definition: TDirectory.h:41
std::vector< Float_t > * fMulticlassReturnVal
Definition: MethodBase.h:520
virtual Double_t GetROCIntegral(TH1D *histS, TH1D *histB) const
calculate the area (integral) under the ROC curve as a overall quality measure of the classification ...
int type
Definition: TGX11.cxx:120
TDirectory * BaseDir() const
returns the ROOT directory where info/histograms etc of the corresponding MVA method instance are sto...
Long64_t GetNEvents(Types::ETreeType type=Types::kMaxTreeType) const
Definition: DataSet.h:225
virtual void Reset()
Definition: MethodBase.h:170
The TH1 histogram class.
Definition: TH1.h:80
DataSetInfo & DataInfo() const
Definition: MethodBase.h:364
VariableInfo & GetVariableInfo(Int_t i)
Definition: DataSetInfo.h:114
const TString & GetInputLabel(Int_t i) const
Definition: MethodBase.h:315
const TString & GetInputTitle(Int_t i) const
Definition: MethodBase.h:316
Bool_t TxtWeightsOnly() const
Definition: MethodBase.h:449
virtual void GetRegressionDeviation(UInt_t tgtNum, Types::ETreeType type, Double_t &stddev, Double_t &stddev90Percent) const
Definition: MethodBase.cxx:720
Double_t GetSignalReferenceCutOrientation() const
Definition: MethodBase.h:326
const TString GetProbaName() const
Definition: MethodBase.h:301
TString fParentDir
Definition: MethodBase.h:551
Bool_t fConstructedFromWeightFile
Definition: MethodBase.h:543
void ProcessSetup()
process all options the "CheckForUnusedOptions" is done in an independent call, since it may be overr...
Definition: MethodBase.cxx:310
TString fVarTransformString
Definition: MethodBase.h:585
virtual void AddMulticlassOutput(Types::ETreeType type)
prepare tree branch with the method's discriminating variable
Definition: MethodBase.cxx:779
#define name(a, b)
Definition: linkTestLib0.cpp:5
Double_t GetMin(Int_t ivar, Int_t cls=-1) const
Double_t GetMean(Int_t ivar, Int_t cls=-1) const
Types::EMVA fMethodType
Definition: MethodBase.h:539
char Char_t
Definition: RtypesCore.h:29
Bool_t DoRegression() const
Definition: MethodBase.h:392
virtual Double_t GetSeparation(TH1 *, TH1 *) const
compute "separation" defined as = (1/2) Int_-oo..+oo { (S(x) - B(x))^2/(S(x) + B(x)) dx } ...
virtual void MakeClassSpecificHeader(std::ostream &, const TString &="") const
Definition: MethodBase.h:438
Abstract ClassifierFactory template that handles arbitrary types.
virtual std::vector< Float_t > GetMulticlassTrainingEfficiency(std::vector< std::vector< Float_t > > &purity)
Ranking * fRanking
Definition: MethodBase.h:509
virtual void TestRegression(Double_t &bias, Double_t &biasT, Double_t &dev, Double_t &devT, Double_t &rms, Double_t &rmsT, Double_t &mInf, Double_t &mInfT, Double_t &corr, Types::ETreeType type)
calculate of regression output versus "true" value from test sample ...
Definition: MethodBase.cxx:931
const TString & GetJobName() const
Definition: MethodBase.h:295
Double_t GetXmax(Int_t ivar) const
Definition: MethodBase.h:322
void PrintHelpMessage() const
prints out method-specific help method
static MethodBase * GetThisBase()
return a pointer the base class of this method
UInt_t GetTrainingTMVAVersionCode() const
Definition: MethodBase.h:343
virtual void DeclareCompatibilityOptions()
options that are used ONLY for the READER to ensure backward compatibility they are hence without any...
Definition: MethodBase.cxx:599
Double_t fSignalReferenceCut
the data set information (sometimes needed)
Definition: MethodBase.h:532
const Event * fTmpEvent
Definition: MethodBase.h:366
TDirectory * MethodBaseDir() const
returns the ROOT directory where all instances of the corresponding MVA method are stored ...
void SetWeightFileDir(TString fileDir)
set directory of weight file
void AddSpectatorsXMLTo(void *parent) const
write spectator info to XML
A Graph is a graphics object made of two arrays X and Y with npoints each.
Definition: TGraph.h:53
Int_t fNbinsMVAoutput
Definition: MethodBase.h:514
virtual Double_t GetTrainingEfficiency(const TString &)
Double_t fXmax
Definition: MethodBase.h:582
void ReadSpectatorsFromXML(void *specnode)
read spectator info from XML
void DisableWriting(Bool_t setter)
Definition: MethodBase.h:396
void InitBase()
default initialization called by all constructors
Definition: MethodBase.cxx:328
std::vector< Float_t > * fRegressionReturnVal
Definition: MethodBase.h:519
Long64_t GetNTrainingEvents() const
Definition: DataSet.h:90
A TTree object has a header with a name and a title.
Definition: TTree.h:94
const TString & GetInternalVarName(Int_t ivar) const
Definition: MethodBase.h:425
Double_t GetSignalReferenceCut() const
Definition: MethodBase.h:325
Double_t GetMax(Int_t ivar, Int_t cls=-1) const
virtual Double_t GetRarity(Double_t mvaVal, Types::ESBType reftype=Types::kBackground) const
compute rarity: R(x) = Integrate_[-oo..x] { PDF(x') dx' } where PDF(x) is the PDF of the classifier's...
static Double_t IGetEffForRoot(Double_t)
interface for RootFinder
TString GetMethodTypeName() const
Definition: MethodBase.h:297
const TString & GetLabel() const
Definition: VariableInfo.h:64
virtual void ReadWeightsFromStream(std::istream &)=0
const Bool_t kTRUE
Definition: Rtypes.h:91
void SetTestvarName(const TString &v="")
Definition: MethodBase.h:306
TString fFileDir
Definition: MethodBase.h:553
float value
Definition: math.cpp:443
double norm(double *x, double *p)
Definition: unuranDistr.cxx:40
void WriteStateToXML(void *parent) const
general method used in writing the header of the weight files where the used variables, variable transformation type etc.
virtual void TestClassification()
initialization
void SetBaseDir(TDirectory *methodDir)
Definition: MethodBase.h:336
void ReadStateFromStream(std::istream &tf)
read the header from the weight files of the different MVA methods
virtual void SetAnalysisType(Types::EAnalysisType type)
Definition: MethodBase.h:390
double log(double)
Bool_t fSetupCompleted
Definition: MethodBase.h:627
void NoErrorCalc(Double_t *const err, Double_t *const errUpper)
Definition: MethodBase.cxx:820
void SetSignalReferenceCut(Double_t cut)
Definition: MethodBase.h:329
Double_t fSignalReferenceCutOrientation
Definition: MethodBase.h:533
Double_t GetXmin(Int_t ivar) const
Definition: MethodBase.h:321
const TString & GetTestvarName() const
Definition: MethodBase.h:300