Logo ROOT   6.12/07
Reference Guide
MethodBase.h
Go to the documentation of this file.
1 // @(#)root/tmva $Id$
2 // Author: Andreas Hoecker, Peter Speckmayer, Joerg Stelzer, Helge Voss, Kai Voss, Eckhard von Toerne, Jan Therhaag
3 
4 /**********************************************************************************
5  * Project: TMVA - a Root-integrated toolkit for multivariate data analysis *
6  * Package: TMVA *
7  * Class : MethodBase *
8  * Web : http://tmva.sourceforge.net *
9  * *
10  * Description: *
11  * Virtual base class for all MVA method *
12  * *
13  * Authors (alphabetical): *
14  * Andreas Hoecker <Andreas.Hocker@cern.ch> - CERN, Switzerland *
15  * Peter Speckmayer <peter.speckmayer@cern.ch> - CERN, Switzerland *
16  * Joerg Stelzer <Joerg.Stelzer@cern.ch> - CERN, Switzerland *
17  * Jan Therhaag <Jan.Therhaag@cern.ch> - U of Bonn, Germany *
18  * Eckhard v. Toerne <evt@uni-bonn.de> - U of Bonn, Germany *
19  * Helge Voss <Helge.Voss@cern.ch> - MPI-K Heidelberg, Germany *
20  * Kai Voss <Kai.Voss@cern.ch> - U. of Victoria, Canada *
21  * *
22  * Copyright (c) 2005-2011: *
23  * CERN, Switzerland *
24  * U. of Victoria, Canada *
25  * MPI-K Heidelberg, Germany *
26  * U. of Bonn, Germany *
27  * *
28  * Redistribution and use in source and binary forms, with or without *
29  * modification, are permitted according to the terms listed in LICENSE *
30  * (http://tmva.sourceforge.net/LICENSE) *
31  **********************************************************************************/
32 
33 #ifndef ROOT_TMVA_MethodBase
34 #define ROOT_TMVA_MethodBase
35 
36 //////////////////////////////////////////////////////////////////////////
37 // //
38 // MethodBase //
39 // //
40 // Virtual base class for all TMVA method //
41 // //
42 //////////////////////////////////////////////////////////////////////////
43 
44 #include <iosfwd>
45 #include <vector>
46 #include <map>
47 #include "assert.h"
48 
49 #include "TString.h"
50 
51 #include "TMVA/IMethod.h"
52 #include "TMVA/Configurable.h"
53 #include "TMVA/Types.h"
54 #include "TMVA/DataSet.h"
55 #include "TMVA/Event.h"
57 #include <TMVA/Results.h>
58 
59 #include <TFile.h>
60 
61 class TGraph;
62 class TTree;
63 class TDirectory;
64 class TSpline;
65 class TH1F;
66 class TH1D;
67 class TMultiGraph;
68 
69 /*! \class TMVA::IPythonInteractive
70 \ingroup TMVA
71 
72 This class is needed by JsMVA, and it's a helper class for tracking errors during
73 the training in Jupyter notebook. It’s only initialized in Jupyter notebook context.
74 In initialization we specify some title, and a TGraph will be created for every title.
75 We can add new data points easily to all TGraphs. These graphs are added to a
76 TMultiGraph, and during an interactive training we get this TMultiGraph object
77 and plot it with JsROOT.
78 */
79 
80 namespace TMVA {
81 
82  class Ranking;
83  class PDF;
84  class TSpline1;
85  class MethodCuts;
86  class MethodBoost;
87  class DataSetInfo;
88  namespace Experimental {
89  class Classification;
90  }
91 
93  public:
96  void Init(std::vector<TString>& graphTitles);
97  void ClearGraphs();
98  void AddPoint(Double_t x, Double_t y1, Double_t y2);
99  void AddPoint(std::vector<Double_t>& dat);
100  inline TMultiGraph* Get() {return fMultiGraph;}
101  inline bool NotInitialized(){ return fNumGraphs==0;};
102  private:
103  TMultiGraph* fMultiGraph;
104  std::vector<TGraph*> fGraphs;
107  };
108 
109  class MethodBase : virtual public IMethod, public Configurable {
110 
111  friend class Factory;
112  friend class RootFinder;
113  friend class MethodBoost;
115 
116  public:
117 
118  enum EWeightFileType { kROOT=0, kTEXT };
119 
120  // default constructor
121  MethodBase( const TString& jobName,
122  Types::EMVA methodType,
123  const TString& methodTitle,
124  DataSetInfo& dsi,
125  const TString& theOption = "" );
126 
127  // constructor used for Testing + Application of the MVA, only (no training),
128  // using given weight file
129  MethodBase( Types::EMVA methodType,
130  DataSetInfo& dsi,
131  const TString& weightFile );
132 
133  // default destructor
134  virtual ~MethodBase();
135 
136  // declaration, processing and checking of configuration options
137  void SetupMethod();
138  void ProcessSetup();
139  virtual void CheckSetup(); // may be overwritten by derived classes
140 
141  // ---------- main training and testing methods ------------------------------
142 
143  // prepare tree branch with the method's discriminating variable
144  void AddOutput( Types::ETreeType type, Types::EAnalysisType analysisType );
145 
146  // performs classifier training
147  // calls methods Train() implemented by derived classes
148  void TrainMethod();
149 
150  // optimize tuning parameters
151  virtual std::map<TString,Double_t> OptimizeTuningParameters(TString fomType="ROCIntegral", TString fitType="FitGA");
152  virtual void SetTuneParameters(std::map<TString,Double_t> tuneParameters);
153 
154  virtual void Train() = 0;
155 
156  // store and retrieve time used for training
157  void SetTrainTime( Double_t trainTime ) { fTrainTime = trainTime; }
158  Double_t GetTrainTime() const { return fTrainTime; }
159 
160  // store and retrieve time used for testing
161  void SetTestTime ( Double_t testTime ) { fTestTime = testTime; }
162  Double_t GetTestTime () const { return fTestTime; }
163 
164  // performs classifier testing
165  virtual void TestClassification();
166  virtual Double_t GetKSTrainingVsTest(Char_t SorB, TString opt="X");
167 
168  // performs multiclass classifier testing
169  virtual void TestMulticlass();
170 
171  // performs regression testing
172  virtual void TestRegression( Double_t& bias, Double_t& biasT,
173  Double_t& dev, Double_t& devT,
174  Double_t& rms, Double_t& rmsT,
175  Double_t& mInf, Double_t& mInfT, // mutual information
176  Double_t& corr,
177  Types::ETreeType type );
178 
179  // options treatment
180  virtual void Init() = 0;
181  virtual void DeclareOptions() = 0;
182  virtual void ProcessOptions() = 0;
183  virtual void DeclareCompatibilityOptions(); // declaration of past options
184 
185  // reset the Method --> As if it was not yet trained, just instantiated
186  // virtual void Reset() = 0;
187  //for the moment, I provide a dummy (that would not work) default, just to make
188  // compilation/running w/o parameter optimisation still possible
189  virtual void Reset(){return;}
190 
191  // classifier response:
192  // some methods may return a per-event error estimate
193  // error calculation is skipped if err==0
194  virtual Double_t GetMvaValue( Double_t* errLower = 0, Double_t* errUpper = 0) = 0;
195 
196  // signal/background classification response
197  Double_t GetMvaValue( const TMVA::Event* const ev, Double_t* err = 0, Double_t* errUpper = 0 );
198 
199  protected:
200  // helper function to set errors to -1
201  void NoErrorCalc(Double_t* const err, Double_t* const errUpper);
202 
203  // signal/background classification response for all current set of data
204  virtual std::vector<Double_t> GetMvaValues(Long64_t firstEvt = 0, Long64_t lastEvt = -1, Bool_t logProgress = false);
205 
206 
207  public:
208  // regression response
209  const std::vector<Float_t>& GetRegressionValues(const TMVA::Event* const ev){
210  fTmpEvent = ev;
211  const std::vector<Float_t>* ptr = &GetRegressionValues();
212  fTmpEvent = 0;
213  return (*ptr);
214  }
215 
216  virtual const std::vector<Float_t>& GetRegressionValues() {
217  std::vector<Float_t>* ptr = new std::vector<Float_t>(0);
218  return (*ptr);
219  }
220 
221  // multiclass classification response
222  virtual const std::vector<Float_t>& GetMulticlassValues() {
223  std::vector<Float_t>* ptr = new std::vector<Float_t>(0);
224  return (*ptr);
225  }
226 
227  // probability of classifier response (mvaval) to be signal (requires "CreateMvaPdf" option set)
228  virtual Double_t GetProba( const Event *ev); // the simple one, automatically calculates the mvaVal and uses the SAME sig/bkg ratio as given in the training sample (typically 50/50 .. (NormMode=EqualNumEvents) but can be different)
229  virtual Double_t GetProba( Double_t mvaVal, Double_t ap_sig );
230 
231  // Rarity of classifier response (signal or background (default) is uniform in [0,1])
232  virtual Double_t GetRarity( Double_t mvaVal, Types::ESBType reftype = Types::kBackground ) const;
233 
234  // create ranking
235  virtual const Ranking* CreateRanking() = 0;
236 
237  // make ROOT-independent C++ class
238  virtual void MakeClass( const TString& classFileName = TString("") ) const;
239 
240  // print help message
241  void PrintHelpMessage() const;
242 
243  //
244  // streamer methods for training information (creates "weight" files) --------
245  //
246  public:
247  void WriteStateToFile () const;
248  void ReadStateFromFile ();
249 
250  protected:
251  // the actual "weights"
252  virtual void AddWeightsXMLTo ( void* parent ) const = 0;
253  virtual void ReadWeightsFromXML ( void* wghtnode ) = 0;
254  virtual void ReadWeightsFromStream( std::istream& ) = 0; // backward compatibility
255  virtual void ReadWeightsFromStream( TFile& ) {} // backward compatibility
256 
257  private:
258  friend class MethodCategory;
259  friend class MethodCompositeBase;
260  void WriteStateToXML ( void* parent ) const;
261  void ReadStateFromXML ( void* parent );
262  void WriteStateToStream ( std::ostream& tf ) const; // needed for MakeClass
263  void WriteVarsToStream ( std::ostream& tf, const TString& prefix = "" ) const; // needed for MakeClass
264 
265 
266  public: // these two need to be public, they are used to read in-memory weight-files
267  void ReadStateFromStream ( std::istream& tf ); // backward compatibility
268  void ReadStateFromStream ( TFile& rf ); // backward compatibility
269  void ReadStateFromXMLString( const char* xmlstr ); // for reading from memory
270 
271  private:
272  // the variable information
273  void AddVarsXMLTo ( void* parent ) const;
274  void AddSpectatorsXMLTo ( void* parent ) const;
275  void AddTargetsXMLTo ( void* parent ) const;
276  void AddClassesXMLTo ( void* parent ) const;
277  void ReadVariablesFromXML ( void* varnode );
278  void ReadSpectatorsFromXML( void* specnode);
279  void ReadTargetsFromXML ( void* tarnode );
280  void ReadClassesFromXML ( void* clsnode );
281  void ReadVarsFromStream ( std::istream& istr ); // backward compatibility
282 
283  public:
284  // ---------------------------------------------------------------------------
285 
286  // write evaluation histograms into target file
287  virtual void WriteEvaluationHistosToFile(Types::ETreeType treetype);
288 
289  // write classifier-specific monitoring information to target file
290  virtual void WriteMonitoringHistosToFile() const;
291 
292  // ---------- public evaluation methods --------------------------------------
293 
294  // individual initialization for testing of each method
295  // overload this one for individual initialisation of the testing,
296  // it is then called automatically within the global "TestInit"
297 
298  // variables (and private member functions) for the Evaluation:
299  // get the efficiency. It fills a histogram for efficiency/vs/bkg
300  // and returns the one value fo the efficiency demanded for
301  // in the TString argument. (Watch the string format)
302  virtual Double_t GetEfficiency( const TString&, Types::ETreeType, Double_t& err );
303  virtual Double_t GetTrainingEfficiency(const TString& );
304  virtual std::vector<Float_t> GetMulticlassEfficiency( std::vector<std::vector<Float_t> >& purity );
305  virtual std::vector<Float_t> GetMulticlassTrainingEfficiency(std::vector<std::vector<Float_t> >& purity );
306  virtual TMatrixD GetMulticlassConfusionMatrix(Double_t effB, Types::ETreeType type);
307  virtual Double_t GetSignificance() const;
308  virtual Double_t GetROCIntegral(TH1D *histS, TH1D *histB) const;
309  virtual Double_t GetROCIntegral(PDF *pdfS=0, PDF *pdfB=0) const;
310  virtual Double_t GetMaximumSignificance( Double_t SignalEvents, Double_t BackgroundEvents,
311  Double_t& optimal_significance_value ) const;
312  virtual Double_t GetSeparation( TH1*, TH1* ) const;
313  virtual Double_t GetSeparation( PDF* pdfS = 0, PDF* pdfB = 0 ) const;
314 
315  virtual void GetRegressionDeviation(UInt_t tgtNum, Types::ETreeType type, Double_t& stddev,Double_t& stddev90Percent ) const;
316  // ---------- public accessors -----------------------------------------------
317 
318  // classifier naming (a lot of names ... aren't they ;-)
319  const TString& GetJobName () const { return fJobName; }
320  const TString& GetMethodName () const { return fMethodName; }
321  TString GetMethodTypeName() const { return Types::Instance().GetMethodName(fMethodType); }
322  Types::EMVA GetMethodType () const { return fMethodType; }
323  const char* GetName () const { return fMethodName.Data(); }
324  const TString& GetTestvarName () const { return fTestvar; }
325  const TString GetProbaName () const { return fTestvar + "_Proba"; }
326  TString GetWeightFileName() const;
327 
328  // build classifier name in Test tree
329  // MVA prefix (e.g., "TMVA_")
330  void SetTestvarName ( const TString & v="" ) { fTestvar = (v=="") ? ("MVA_" + GetMethodName()) : v; }
331 
332  // number of input variable used by classifier
333  UInt_t GetNvar() const { return DataInfo().GetNVariables(); }
334  UInt_t GetNVariables() const { return DataInfo().GetNVariables(); }
335  UInt_t GetNTargets() const { return DataInfo().GetNTargets(); };
336 
337  // internal names and expressions of input variables
338  const TString& GetInputVar ( Int_t i ) const { return DataInfo().GetVariableInfo(i).GetInternalName(); }
339  const TString& GetInputLabel( Int_t i ) const { return DataInfo().GetVariableInfo(i).GetLabel(); }
340  const char * GetInputTitle( Int_t i ) const { return DataInfo().GetVariableInfo(i).GetTitle(); }
341 
342  // normalisation and limit accessors
343  Double_t GetMean( Int_t ivar ) const { return GetTransformationHandler().GetMean(ivar); }
344  Double_t GetRMS ( Int_t ivar ) const { return GetTransformationHandler().GetRMS(ivar); }
345  Double_t GetXmin( Int_t ivar ) const { return GetTransformationHandler().GetMin(ivar); }
346  Double_t GetXmax( Int_t ivar ) const { return GetTransformationHandler().GetMax(ivar); }
347 
348  // sets the minimum requirement on the MVA output to declare an event signal-like
349  Double_t GetSignalReferenceCut() const { return fSignalReferenceCut; }
350  Double_t GetSignalReferenceCutOrientation() const { return fSignalReferenceCutOrientation; }
351 
352  // sets the minimum requirement on the MVA output to declare an event signal-like
353  void SetSignalReferenceCut( Double_t cut ) { fSignalReferenceCut = cut; }
354  void SetSignalReferenceCutOrientation( Double_t cutOrientation ) { fSignalReferenceCutOrientation = cutOrientation; }
355 
356  // pointers to ROOT directories
357  TDirectory* BaseDir() const;
358  TDirectory* MethodBaseDir() const;
359  TFile* GetFile() const {return fFile;}
360 
361  void SetMethodDir ( TDirectory* methodDir ) { fBaseDir = fMethodBaseDir = methodDir; }
362  void SetBaseDir( TDirectory* methodDir ){ fBaseDir = methodDir; }
363  void SetMethodBaseDir( TDirectory* methodDir ){ fMethodBaseDir = methodDir; }
364  void SetFile(TFile* file){fFile=file;}
365 
366  //Silent file
367  void SetSilentFile(Bool_t status){fSilentFile=status;}
368  Bool_t IsSilentFile(){return fSilentFile;}
369 
370  //Model Persistence
371  void SetModelPersistence(Bool_t status){fModelPersistence=status;}//added support to create/remove dir here if exits or not
372  Bool_t IsModelPersistence(){return fModelPersistence;}
373 
374  // the TMVA version can be obtained and checked using
375  // if (GetTrainingTMVAVersionCode()>TMVA_VERSION(3,7,2)) {...}
376  // or
377  // if (GetTrainingROOTVersionCode()>ROOT_VERSION(5,15,5)) {...}
378  UInt_t GetTrainingTMVAVersionCode() const { return fTMVATrainingVersion; }
379  UInt_t GetTrainingROOTVersionCode() const { return fROOTTrainingVersion; }
380  TString GetTrainingTMVAVersionString() const;
381  TString GetTrainingROOTVersionString() const;
382 
384  {
385  if(fTransformationPointer && takeReroutedIfAvailable) return *fTransformationPointer; else return fTransformation;
386  }
387  const TransformationHandler& GetTransformationHandler(Bool_t takeReroutedIfAvailable=true) const
388  {
389  if(fTransformationPointer && takeReroutedIfAvailable) return *fTransformationPointer; else return fTransformation;
390  }
391 
392  void RerouteTransformationHandler (TransformationHandler* fTargetTransformation) { fTransformationPointer=fTargetTransformation; }
393 
394  // ---------- event accessors ------------------------------------------------
395 
396  // returns reference to data set
397  // NOTE: this DataSet is the "original" dataset, i.e. the one seen by ALL Classifiers WITHOUT transformation
398  DataSet* Data() const { return DataInfo().GetDataSet(); }
399  DataSetInfo& DataInfo() const { return fDataSetInfo; }
400 
401  mutable const Event* fTmpEvent; //! temporary event when testing on a different DataSet than the own one
402 
403  // event reference and update
404  // NOTE: these Event accessors make sure that you get the events transformed according to the
405  // particular classifiers transformation chosen
406  UInt_t GetNEvents () const { return Data()->GetNEvents(); }
407  const Event* GetEvent () const;
408  const Event* GetEvent ( const TMVA::Event* ev ) const;
409  const Event* GetEvent ( Long64_t ievt ) const;
410  const Event* GetEvent ( Long64_t ievt , Types::ETreeType type ) const;
411  const Event* GetTrainingEvent( Long64_t ievt ) const;
412  const Event* GetTestingEvent ( Long64_t ievt ) const;
413  const std::vector<TMVA::Event*>& GetEventCollection( Types::ETreeType type );
414 
415  // ---------- public auxiliary methods ---------------------------------------
416 
417  // this method is used to decide whether an event is signal- or background-like
418  // the reference cut "xC" is taken to be where
419  // Int_[-oo,xC] { PDF_S(x) dx } = Int_[xC,+oo] { PDF_B(x) dx }
420  virtual Bool_t IsSignalLike();
421  virtual Bool_t IsSignalLike(Double_t mvaVal);
422 
423 
424  Bool_t HasMVAPdfs() const { return fHasMVAPdfs; }
425  virtual void SetAnalysisType( Types::EAnalysisType type ) { fAnalysisType = type; }
426  Types::EAnalysisType GetAnalysisType() const { return fAnalysisType; }
427  Bool_t DoRegression() const { return fAnalysisType == Types::kRegression; }
428  Bool_t DoMulticlass() const { return fAnalysisType == Types::kMulticlass; }
429 
430  // setter method for suppressing writing to XML and writing of standalone classes
431  void DisableWriting(Bool_t setter){ fModelPersistence = setter?kFALSE:kTRUE; }//DEPRECATED
432 
433  protected:
434  // helper variables for JsMVA
435  IPythonInteractive *fInteractive = nullptr;
436  bool fExitFromTraining = false;
437  UInt_t fIPyMaxIter = 0, fIPyCurrentIter = 0;
438 
439  public:
440 
441  // initializing IPythonInteractive class (for JsMVA only)
442  inline void InitIPythonInteractive(){
443  if (fInteractive) delete fInteractive;
444  fInteractive = new IPythonInteractive();
445  }
446 
447  // get training errors (for JsMVA only)
448  inline TMultiGraph* GetInteractiveTrainingError(){return fInteractive->Get();}
449 
450  // stop's the training process (for JsMVA only)
451  inline void ExitFromTraining(){
452  fExitFromTraining = true;
453  }
454 
455  // check's if the training ended (for JsMVA only)
456  inline bool TrainingEnded(){
457  if (fExitFromTraining && fInteractive){
458  delete fInteractive;
459  fInteractive = nullptr;
460  }
461  return fExitFromTraining;
462  }
463 
464  // get fIPyMaxIter
465  inline UInt_t GetMaxIter(){ return fIPyMaxIter; }
466 
467  // get fIPyCurrentIter
468  inline UInt_t GetCurrentIter(){ return fIPyCurrentIter; }
469 
470  protected:
471 
472  // ---------- protected accessors -------------------------------------------
473 
474  //TDirectory* LocalTDir() const { return Data().LocalRootDir(); }
475 
476  // weight file name and directory (given by global config variable)
477  void SetWeightFileName( TString );
478 
479  const TString& GetWeightFileDir() const { return fFileDir; }
480  void SetWeightFileDir( TString fileDir );
481 
482  // are input variables normalised ?
483  Bool_t IsNormalised() const { return fNormalise; }
484  void SetNormalised( Bool_t norm ) { fNormalise = norm; }
485 
486  // set number of input variables (only used by MethodCuts, could perhaps be removed)
487  // void SetNvar( Int_t n ) { fNvar = n; }
488 
489  // verbose and help flags
490  Bool_t Verbose() const { return fVerbose; }
491  Bool_t Help () const { return fHelp; }
492 
493  // ---------- protected event and tree accessors -----------------------------
494 
495  // names of input variables (if the original names are expressions, they are
496  // transformed into regexps)
497  const TString& GetInternalVarName( Int_t ivar ) const { return (*fInputVars)[ivar]; }
498  const TString& GetOriginalVarName( Int_t ivar ) const { return DataInfo().GetVariableInfo(ivar).GetExpression(); }
499 
500  Bool_t HasTrainingTree() const { return Data()->GetNTrainingEvents() != 0; }
501 
502  // ---------- protected auxiliary methods ------------------------------------
503 
504  protected:
505 
506  // make ROOT-independent C++ class for classifier response (classifier-specific implementation)
507  virtual void MakeClassSpecific( std::ostream&, const TString& = "" ) const {}
508 
509  // header and auxiliary classes
510  virtual void MakeClassSpecificHeader( std::ostream&, const TString& = "" ) const {}
511 
512  // static pointer to this object - required for ROOT finder (to be solved differently)(solved by Omar)
513  //static MethodBase* GetThisBase();
514 
515  // some basic statistical analysis
516  void Statistics( Types::ETreeType treeType, const TString& theVarName,
518  Double_t&, Double_t&, Double_t& );
519 
520  // if TRUE, write weights only to text files
521  Bool_t TxtWeightsOnly() const { return kTRUE; }
522 
523  protected:
524 
525  // access to event information that needs method-specific information
526 
527  Bool_t IsConstructedFromWeightFile() const { return fConstructedFromWeightFile; }
528 
529  private:
530 
531  // ---------- private definitions --------------------------------------------
532  // Initialisation
533  void InitBase();
534  void DeclareBaseOptions();
535  void ProcessBaseOptions();
536 
537  // used in efficiency computation
538  enum ECutOrientation { kNegative = -1, kPositive = +1 };
539  ECutOrientation GetCutOrientation() const { return fCutOrientation; }
540 
541  // ---------- private accessors ---------------------------------------------
542 
543  // reset required for RootFinder
544  void ResetThisBase();
545 
546  // ---------- private auxiliary methods --------------------------------------
547 
548  // PDFs for classifier response (required to compute signal probability and Rarity)
549  void CreateMVAPdfs();
550 
551  // for root finder
552  //virtual method to find ROOT
553  virtual Double_t GetValueForRoot ( Double_t ); // implementation
554 
555  // used for file parsing
556  Bool_t GetLine( std::istream& fin, char * buf );
557 
558  // fill test tree with classification or regression results
559  virtual void AddClassifierOutput ( Types::ETreeType type );
560  virtual void AddClassifierOutputProb( Types::ETreeType type );
561  virtual void AddRegressionOutput ( Types::ETreeType type );
562  virtual void AddMulticlassOutput ( Types::ETreeType type );
563 
564  private:
565 
566  void AddInfoItem( void* gi, const TString& name,
567  const TString& value) const;
568 
569  // ========== class members ==================================================
570 
571  protected:
572 
573  // direct accessors
574  Ranking* fRanking; // pointer to ranking object (created by derived classifiers)
575  std::vector<TString>* fInputVars; // vector of input variables used in MVA
576 
577  // histogram binning
578  Int_t fNbins; // number of bins in input variable histograms
579  Int_t fNbinsMVAoutput; // number of bins in MVA output histograms
580  Int_t fNbinsH; // number of bins in evaluation histograms
581 
582  Types::EAnalysisType fAnalysisType; // method-mode : true --> regression, false --> classification
583 
584  std::vector<Float_t>* fRegressionReturnVal; // holds the return-values for the regression
585  std::vector<Float_t>* fMulticlassReturnVal; // holds the return-values for the multiclass classification
586 
587  private:
588 
589  // MethodCuts redefines some of the evaluation variables and histograms -> must access private members
590  friend class MethodCuts;
591 
592 
593  // data sets
594  DataSetInfo& fDataSetInfo; //! the data set information (sometimes needed)
595 
596  Double_t fSignalReferenceCut; // minimum requirement on the MVA output to declare an event signal-like
597  Double_t fSignalReferenceCutOrientation; // minimum requirement on the MVA output to declare an event signal-like
598  Types::ESBType fVariableTransformType; // this is the event type (sig or bgd) assumed for variable transform
599 
600  // naming and versioning
601  TString fJobName; // name of job -> user defined, appears in weight files
602  TString fMethodName; // name of the method (set in derived class)
603  Types::EMVA fMethodType; // type of method (set in derived class)
604  TString fTestvar; // variable used in evaluation, etc (mostly the MVA)
605  UInt_t fTMVATrainingVersion; // TMVA version used for training
606  UInt_t fROOTTrainingVersion; // ROOT version used for training
607  Bool_t fConstructedFromWeightFile; // is it obtained from weight file?
608 
609  // Directory structure: dataloader/fMethodBaseDir/fBaseDir
610  // where the first directory name is defined by the method type
611  // and the second is user supplied (the title given in Factory::BookMethod())
612  TDirectory* fBaseDir; // base directory for the instance, needed to know where to jump back from localDir
613  mutable TDirectory* fMethodBaseDir; // base directory for the method
614  //this will be the next way to save results
616 
617  //SilentFile
619  //Model Persistence
621 
622  TString fParentDir; // method parent name, like booster name
623 
624  TString fFileDir; // unix sub-directory for weight files (default: DataLoader's Name + "weights")
625  TString fWeightFile; // weight file name
626 
627  private:
628 
629  TH1* fEffS; // efficiency histogram for rootfinder
630 
631  PDF* fDefaultPDF; // default PDF definitions
632  PDF* fMVAPdfS; // signal MVA PDF
633  PDF* fMVAPdfB; // background MVA PDF
634 
635  // TH1D* fmvaS; // PDFs of MVA distribution (signal)
636  // TH1D* fmvaB; // PDFs of MVA distribution (background)
637  PDF* fSplS; // PDFs of MVA distribution (signal)
638  PDF* fSplB; // PDFs of MVA distribution (background)
639  TSpline* fSpleffBvsS; // splines for signal eff. versus background eff.
640 
641  PDF* fSplTrainS; // PDFs of training MVA distribution (signal)
642  PDF* fSplTrainB; // PDFs of training MVA distribution (background)
643  TSpline* fSplTrainEffBvsS; // splines for training signal eff. versus background eff.
644 
645  private:
646 
647  // basic statistics quantities of MVA
648  Double_t fMeanS; // mean (signal)
649  Double_t fMeanB; // mean (background)
650  Double_t fRmsS; // RMS (signal)
651  Double_t fRmsB; // RMS (background)
652  Double_t fXmin; // minimum (signal and background)
653  Double_t fXmax; // maximum (signal and background)
654 
655  // variable preprocessing
656  TString fVarTransformString; // labels variable transform method
657 
658  TransformationHandler* fTransformationPointer; // pointer to the rest of transformations
659  TransformationHandler fTransformation; // the list of transformations
660 
661 
662  // help and verbosity
663  Bool_t fVerbose; // verbose flag
664  TString fVerbosityLevelString; // verbosity level (user input string)
665  EMsgType fVerbosityLevel; // verbosity level
666  Bool_t fHelp; // help flag
667  Bool_t fHasMVAPdfs; // MVA Pdfs are created for this classifier
668 
669  Bool_t fIgnoreNegWeightsInTraining;// If true, events with negative weights are not used in training
670 
671  protected:
672 
673  Bool_t IgnoreEventsWithNegWeightsInTraining() const { return fIgnoreNegWeightsInTraining; }
674 
675  // for signal/background
676  UInt_t fSignalClass; // index of the Signal-class
677  UInt_t fBackgroundClass; // index of the Background-class
678 
679  private:
680 
681  // timing variables
682  Double_t fTrainTime; // for timing measurements
683  Double_t fTestTime; // for timing measurements
684 
685  // orientation of cut: depends on signal and background mean values
686  ECutOrientation fCutOrientation; // +1 if Sig>Bkg, -1 otherwise
687 
688  // for root finder
689  TSpline1* fSplRefS; // helper splines for RootFinder (signal)
690  TSpline1* fSplRefB; // helper splines for RootFinder (background)
691 
692  TSpline1* fSplTrainRefS; // helper splines for RootFinder (signal)
693  TSpline1* fSplTrainRefB; // helper splines for RootFinder (background)
694 
695  mutable std::vector<const std::vector<TMVA::Event*>*> fEventCollections; // if the method needs the complete event-collection, the transformed event coll. ist stored here.
696 
697  public:
698  Bool_t fSetupCompleted; // is method setup
699 
700  private:
701 
702  // This is a workaround for OSx where static thread_local data members are
703  // not supported. The C++ solution would indeed be the following:
704 // static MethodBase*& GetThisBaseThreadLocal() {TTHREAD_TLS(MethodBase*) fgThisBase(nullptr); return fgThisBase; };
705 
706  // ===== depreciated options, kept for backward compatibility =====
707  private:
708 
709  Bool_t fNormalise; // normalise input variables
710  Bool_t fUseDecorr; // synonymous for decorrelation
711  TString fVariableTransformTypeString; // labels variable transform type
712  Bool_t fTxtWeightsOnly; // if TRUE, write weights only to text files
713  Int_t fNbinsMVAPdf; // number of bins used in histogram that creates PDF
714  Int_t fNsmoothMVAPdf; // number of times a histogram is smoothed before creating the PDF
715 
716  protected:
718  ClassDef(MethodBase,0); // Virtual base class for all TMVA method
719 
720  };
721 } // namespace TMVA
722 
723 
724 
725 
726 
727 
728 
729 // ========== INLINE FUNCTIONS =========================================================
730 
731 
732 //_______________________________________________________________________
733 inline const TMVA::Event* TMVA::MethodBase::GetEvent( const TMVA::Event* ev ) const
734 {
735  return GetTransformationHandler().Transform(ev);
736 }
737 
739 {
740  if(fTmpEvent)
741  return GetTransformationHandler().Transform(fTmpEvent);
742  else
743  return GetTransformationHandler().Transform(Data()->GetEvent());
744 }
745 
747 {
748  assert(fTmpEvent==0);
749  return GetTransformationHandler().Transform(Data()->GetEvent(ievt));
750 }
751 
753 {
754  assert(fTmpEvent==0);
755  return GetTransformationHandler().Transform(Data()->GetEvent(ievt, type));
756 }
757 
759 {
760  assert(fTmpEvent==0);
761  return GetEvent(ievt, Types::kTraining);
762 }
763 
765 {
766  assert(fTmpEvent==0);
767  return GetEvent(ievt, Types::kTesting);
768 }
769 
770 #endif
Bool_t HasMVAPdfs() const
Definition: MethodBase.h:424
Types::EAnalysisType fAnalysisType
Definition: MethodBase.h:582
void SetModelPersistence(Bool_t status)
Definition: MethodBase.h:371
TString fMethodName
Definition: MethodBase.h:602
virtual void ReadWeightsFromStream(TFile &)
Definition: MethodBase.h:255
virtual const std::vector< Float_t > & GetMulticlassValues()
Definition: MethodBase.h:222
long long Long64_t
Definition: RtypesCore.h:69
TString GetMethodName(Types::EMVA method) const
Definition: Types.cxx:136
Bool_t fIgnoreNegWeightsInTraining
Definition: MethodBase.h:669
Bool_t IsConstructedFromWeightFile() const
Definition: MethodBase.h:527
virtual void MakeClassSpecificHeader(std::ostream &, const TString &="") const
Definition: MethodBase.h:510
TSpline1 * fSplTrainRefS
Definition: MethodBase.h:692
const TString GetProbaName() const
Definition: MethodBase.h:325
std::vector< TGraph * > fGraphs
Definition: MethodBase.h:104
const TransformationHandler & GetTransformationHandler(Bool_t takeReroutedIfAvailable=true) const
Definition: MethodBase.h:387
UInt_t GetNvar() const
Definition: MethodBase.h:333
static Types & Instance()
the the single instance of "Types" if existing already, or create it (Singleton)
Definition: Types.cxx:70
const TString & GetOriginalVarName(Int_t ivar) const
Definition: MethodBase.h:498
TString fWeightFile
Definition: MethodBase.h:625
TString fVariableTransformTypeString
Definition: MethodBase.h:711
void SetMethodBaseDir(TDirectory *methodDir)
Definition: MethodBase.h:363
Base class for spline implementation containing the Draw/Paint methods.
Definition: TSpline.h:20
TransformationHandler * fTransformationPointer
Definition: MethodBase.h:658
Types::ESBType fVariableTransformType
Definition: MethodBase.h:598
A ROOT file is a suite of consecutive data records (TKey instances) with a well defined format...
Definition: TFile.h:46
EAnalysisType
Definition: Types.h:125
A TMultiGraph is a collection of TGraph (or derived) objects.
Definition: TMultiGraph.h:35
void InitIPythonInteractive()
Definition: MethodBase.h:442
Virtual base Class for all MVA method.
Definition: MethodBase.h:109
const std::vector< Float_t > & GetRegressionValues(const TMVA::Event *const ev)
Definition: MethodBase.h:209
void SetSignalReferenceCutOrientation(Double_t cutOrientation)
Definition: MethodBase.h:354
virtual const std::vector< Float_t > & GetRegressionValues()
Definition: MethodBase.h:216
Basic string class.
Definition: TString.h:125
1-D histogram with a float per channel (see TH1 documentation)}
Definition: TH1.h:567
void SetTrainTime(Double_t trainTime)
Definition: MethodBase.h:157
const TString & GetInternalVarName(Int_t ivar) const
Definition: MethodBase.h:497
TransformationHandler & GetTransformationHandler(Bool_t takeReroutedIfAvailable=true)
Definition: MethodBase.h:383
Ranking for variables in method (implementation)
Definition: Ranking.h:48
int Int_t
Definition: RtypesCore.h:41
TMultiGraph * Get()
Definition: MethodBase.h:100
bool Bool_t
Definition: RtypesCore.h:59
Results * fResults
Definition: MethodBase.h:717
TString fJobName
Definition: MethodBase.h:601
TSpline1 * fSplRefB
Definition: MethodBase.h:690
UInt_t GetNTargets() const
Definition: MethodBase.h:335
TSpline1 * fSplRefS
Definition: MethodBase.h:689
std::vector< TString > * fInputVars
Definition: MethodBase.h:575
const char * GetInputTitle(Int_t i) const
Definition: MethodBase.h:340
void SetSilentFile(Bool_t status)
Definition: MethodBase.h:367
Double_t fTrainTime
Definition: MethodBase.h:682
Double_t fTestTime
Definition: MethodBase.h:683
Double_t GetMean(Int_t ivar) const
Definition: MethodBase.h:343
Double_t GetTrainTime() const
Definition: MethodBase.h:158
const TString & GetInputLabel(Int_t i) const
Definition: MethodBase.h:339
void SetMethodDir(TDirectory *methodDir)
Definition: MethodBase.h:361
const TString & GetWeightFileDir() const
Definition: MethodBase.h:479
UInt_t fSignalClass
Definition: MethodBase.h:676
const TString & GetInputVar(Int_t i) const
Definition: MethodBase.h:338
Double_t x[n]
Definition: legend1.C:17
DataSetInfo & fDataSetInfo
Definition: MethodBase.h:594
#define ClassDef(name, id)
Definition: Rtypes.h:320
ECutOrientation fCutOrientation
Definition: MethodBase.h:686
Bool_t TxtWeightsOnly() const
Definition: MethodBase.h:521
UInt_t GetTrainingTMVAVersionCode() const
Definition: MethodBase.h:378
const Event * GetEvent() const
Definition: MethodBase.h:738
DataSet * Data() const
Definition: MethodBase.h:398
void Init(TClassEdit::TInterpreterLookupHelper *helper)
Definition: TClassEdit.cxx:119
Virtual base class for combining several TMVA method.
Double_t fMeanB
Definition: MethodBase.h:649
Double_t GetXmin(Int_t ivar) const
Definition: MethodBase.h:345
DataSetInfo & DataInfo() const
Definition: MethodBase.h:399
Bool_t DoRegression() const
Definition: MethodBase.h:427
TString fTestvar
Definition: MethodBase.h:604
Class that contains all the data information.
Definition: DataSetInfo.h:60
TFile * GetFile() const
Definition: MethodBase.h:359
PDF wrapper for histograms; uses user-defined spline interpolation.
Definition: PDF.h:63
TSpline * fSpleffBvsS
Definition: MethodBase.h:639
Bool_t fModelPersistence
Definition: MethodBase.h:620
const Event * GetTrainingEvent(Long64_t ievt) const
Definition: MethodBase.h:758
Bool_t Verbose() const
Definition: MethodBase.h:490
UInt_t fTMVATrainingVersion
Definition: MethodBase.h:605
UInt_t GetNEvents() const
temporary event when testing on a different DataSet than the own one
Definition: MethodBase.h:406
Class for boosting a TMVA method.
Definition: MethodBoost.h:58
Double_t GetXmax(Int_t ivar) const
Definition: MethodBase.h:346
TransformationHandler fTransformation
Definition: MethodBase.h:659
Bool_t DoMulticlass() const
Definition: MethodBase.h:428
Class that contains all the data information.
Definition: DataSet.h:69
virtual void MakeClassSpecific(std::ostream &, const TString &="") const
Definition: MethodBase.h:507
const Event * GetTestingEvent(Long64_t ievt) const
Definition: MethodBase.h:764
Bool_t HasTrainingTree() const
Definition: MethodBase.h:500
Double_t fRmsB
Definition: MethodBase.h:651
Double_t fXmin
Definition: MethodBase.h:652
std::string GetMethodName(TCppMethod_t)
Definition: Cppyy.cxx:733
TSpline1 * fSplTrainRefB
Definition: MethodBase.h:693
TDirectory * fMethodBaseDir
Definition: MethodBase.h:613
SVector< double, 2 > v
Definition: Dict.h:5
UInt_t fROOTTrainingVersion
Definition: MethodBase.h:606
const char * GetName() const
Definition: MethodBase.h:323
UInt_t GetTrainingROOTVersionCode() const
Definition: MethodBase.h:379
unsigned int UInt_t
Definition: RtypesCore.h:42
Double_t fMeanS
Definition: MethodBase.h:648
Bool_t Help() const
Definition: MethodBase.h:491
Int_t fNsmoothMVAPdf
Definition: MethodBase.h:714
Bool_t fTxtWeightsOnly
Definition: MethodBase.h:712
const TString & GetJobName() const
Definition: MethodBase.h:319
const TString & GetMethodName() const
Definition: MethodBase.h:320
TDirectory * fBaseDir
Definition: MethodBase.h:612
Bool_t fHasMVAPdfs
Definition: MethodBase.h:667
TSpline * fSplTrainEffBvsS
Definition: MethodBase.h:643
Class that contains all the data information.
This is the main MVA steering class.
Definition: Factory.h:81
1-D histogram with a double per channel (see TH1 documentation)}
Definition: TH1.h:610
Bool_t IsSilentFile()
Definition: MethodBase.h:368
Linear interpolation of TGraph.
Definition: TSpline1.h:43
Double_t GetSignalReferenceCutOrientation() const
Definition: MethodBase.h:350
void SetNormalised(Bool_t norm)
Definition: MethodBase.h:484
Double_t GetTestTime() const
Definition: MethodBase.h:162
UInt_t GetNVariables() const
Definition: MethodBase.h:334
std::vector< const std::vector< TMVA::Event * > * > fEventCollections
Definition: MethodBase.h:695
const Bool_t kFALSE
Definition: RtypesCore.h:88
TString fVerbosityLevelString
Definition: MethodBase.h:664
Class for categorizing the phase space.
Double_t fRmsS
Definition: MethodBase.h:650
UInt_t fBackgroundClass
Definition: MethodBase.h:677
Bool_t IgnoreEventsWithNegWeightsInTraining() const
Definition: MethodBase.h:673
void RerouteTransformationHandler(TransformationHandler *fTargetTransformation)
Definition: MethodBase.h:392
void SetTestTime(Double_t testTime)
Definition: MethodBase.h:161
Multivariate optimisation of signal efficiency for given background efficiency, applying rectangular ...
Definition: MethodCuts.h:61
UInt_t GetMaxIter()
Definition: MethodBase.h:465
double Double_t
Definition: RtypesCore.h:55
EMsgType fVerbosityLevel
Definition: MethodBase.h:665
Describe directory structure in memory.
Definition: TDirectory.h:34
std::vector< Float_t > * fMulticlassReturnVal
Definition: MethodBase.h:585
Bool_t IsNormalised() const
Definition: MethodBase.h:483
int type
Definition: TGX11.cxx:120
void SetFile(TFile *file)
Definition: MethodBase.h:364
virtual void Reset()
Definition: MethodBase.h:189
The TH1 histogram class.
Definition: TH1.h:56
void ExitFromTraining()
Definition: MethodBase.h:451
TString fParentDir
Definition: MethodBase.h:622
Bool_t fConstructedFromWeightFile
Definition: MethodBase.h:607
TString fVarTransformString
Definition: MethodBase.h:656
Interface for all concrete MVA method implementations.
Definition: IMethod.h:54
Types::EMVA fMethodType
Definition: MethodBase.h:603
char Char_t
Definition: RtypesCore.h:29
Double_t GetRMS(Int_t ivar) const
Definition: MethodBase.h:344
Root finding using Brents algorithm (translated from CERNLIB function RZERO)
Definition: RootFinder.h:48
This class is needed by JsMVA, and it&#39;s a helper class for tracking errors during the training in Jup...
Definition: MethodBase.h:92
Abstract ClassifierFactory template that handles arbitrary types.
Ranking * fRanking
Definition: MethodBase.h:574
TString GetMethodTypeName() const
Definition: MethodBase.h:321
Definition: file.py:1
bool TrainingEnded()
Definition: MethodBase.h:456
Class that is the base-class for a vector of result.
Definition: Results.h:57
Double_t fSignalReferenceCut
the data set information (sometimes needed)
Definition: MethodBase.h:596
const Event * fTmpEvent
Definition: MethodBase.h:401
Double_t GetSignalReferenceCut() const
Definition: MethodBase.h:349
A Graph is a graphics object made of two arrays X and Y with npoints each.
Definition: TGraph.h:41
Int_t fNbinsMVAoutput
Definition: MethodBase.h:579
Bool_t fSilentFile
Definition: MethodBase.h:618
UInt_t GetCurrentIter()
Definition: MethodBase.h:468
Double_t fXmax
Definition: MethodBase.h:653
void DisableWriting(Bool_t setter)
Definition: MethodBase.h:431
ECutOrientation GetCutOrientation() const
Definition: MethodBase.h:539
std::vector< Float_t > * fRegressionReturnVal
Definition: MethodBase.h:584
Types::EAnalysisType GetAnalysisType() const
Definition: MethodBase.h:426
A TTree object has a header with a name and a title.
Definition: TTree.h:70
const TString & GetTestvarName() const
Definition: MethodBase.h:324
void SetTestvarName(const TString &v="")
Definition: MethodBase.h:330
TString fFileDir
Definition: MethodBase.h:624
TMultiGraph * GetInteractiveTrainingError()
Definition: MethodBase.h:448
const Bool_t kTRUE
Definition: RtypesCore.h:87
Types::EMVA GetMethodType() const
Definition: MethodBase.h:322
void SetBaseDir(TDirectory *methodDir)
Definition: MethodBase.h:362
virtual void SetAnalysisType(Types::EAnalysisType type)
Definition: MethodBase.h:425
char name[80]
Definition: TGX11.cxx:109
Bool_t fSetupCompleted
Definition: MethodBase.h:698
void SetSignalReferenceCut(Double_t cut)
Definition: MethodBase.h:353
Double_t fSignalReferenceCutOrientation
Definition: MethodBase.h:597
Bool_t IsModelPersistence()
Definition: MethodBase.h:372