ROOT  6.06/09
Reference Guide
MethodBoost.h
Go to the documentation of this file.
1 // @(#)root/tmva $Id$
2 // Author: Andreas Hoecker, Joerg Stelzer, Helge Voss, Kai Voss,Or Cohen, Jan Therhaag, Eckhard von Toerne
3 
4 /**********************************************************************************
5  * Project: TMVA - a Root-integrated toolkit for multivariate data analysis *
6  * Package: TMVA *
7  * Class : MethodCompositeBase *
8  * Web : http://tmva.sourceforge.net *
9  * *
10  * Description: *
11  * Virtual base class for all MVA method *
12  * *
13  * Authors (alphabetical): *
14  * Andreas Hoecker <Andreas.Hocker@cern.ch> - CERN, Switzerland *
15  * Peter Speckmayer <Peter.Speckmazer@cern.ch> - CERN, Switzerland *
16  * Joerg Stelzer <Joerg.Stelzer@cern.ch> - CERN, Switzerland *
17  * Helge Voss <Helge.Voss@cern.ch> - MPI-K Heidelberg, Germany *
18  * Jan Therhaag <Jan.Therhaag@cern.ch> - U of Bonn, Germany *
19  * Eckhard v. Toerne <evt@uni-bonn.de> - U of Bonn, Germany *
20  * *
21  * Copyright (c) 2005-2011: *
22  * CERN, Switzerland *
23  * U. of Victoria, Canada *
24  * MPI-K Heidelberg, Germany *
25  * U. of Bonn, Germany *
26  * *
27  * Redistribution and use in source and binary forms, with or without *
28  * modification, are permitted according to the terms listed in LICENSE *
29  * (http://tmva.sourceforge.net/LICENSE) *
30  **********************************************************************************/
31 
32 #ifndef ROOT_TMVA_MethodBoost
33 #define ROOT_TMVA_MethodBoost
34 
35 //////////////////////////////////////////////////////////////////////////
36 // //
37 // MethodBoost //
38 // //
39 // Class for boosting a TMVA method //
40 // //
41 //////////////////////////////////////////////////////////////////////////
42 
43 #include <iosfwd>
44 #include <vector>
45 
46 #ifndef ROOT_TMVA_MethodBase
47 #include "TMVA/MethodBase.h"
48 #endif
49 
50 #ifndef ROOT_TMVA_MethodCompositeBase
52 #endif
53 
54 namespace TMVA {
55 
56  class Factory; // DSMTEST
57  class Reader; // DSMTEST
58  class DataSetManager; // DSMTEST
59 
61 
62  public :
63 
64  // constructors
65  MethodBoost( const TString& jobName,
66  const TString& methodTitle,
67  DataSetInfo& theData,
68  const TString& theOption = "",
69  TDirectory* theTargetDir = NULL );
70 
72  const TString& theWeightFile,
73  TDirectory* theTargetDir = NULL );
74 
75  virtual ~MethodBoost( void );
76 
77  virtual Bool_t HasAnalysisType( Types::EAnalysisType type, UInt_t numberClasses, UInt_t /*numberTargets*/ );
78 
79  // training and boosting all the classifiers
80  void Train( void );
81 
82  // ranking of input variables
83  const Ranking* CreateRanking();
84 
85  // saves the name and options string of the boosted classifier
86  Bool_t BookMethod( Types::EMVA theMethod, TString methodTitle, TString theOption );
87  void SetBoostedMethodName ( TString methodName ) { fBoostedMethodName = methodName; }
88 
89  Int_t GetBoostNum() { return fBoostNum; }
90 
91  void CleanBoostOptions();
92 
93  Double_t GetMvaValue( Double_t* err=0, Double_t* errUpper = 0 );
94 
95  private :
96  // clean up
97  void ClearAll();
98 
99  // print fit results
100  void PrintResults( const TString&, std::vector<Double_t>&, const Double_t ) const;
101 
102  // initializing mostly monitoring tools of the boost process
103  void Init();
104  void InitHistos();
105  void CheckSetup();
106 
107  void MonitorBoost( Types::EBoostStage stage, UInt_t methodIdx=0);
108 
109  // the option handling methods
110  void DeclareOptions();
112  void ProcessOptions();
113 
114 
117  // training a single classifier
118  void SingleTrain();
119 
120  // calculating a boosting weight from the classifier, storing it in the next one
122  Double_t AdaBoost(MethodBase* method, Bool_t useYesNoLeaf );
123  Double_t Bagging();
124 
125 
126  // calculate weight of single method
128 
129  // return ROC integral on training/testing sample
131 
132  // writing the monitoring histograms and tree to a file
133  void WriteMonitoringHistosToFile( void ) const;
134 
135  // write evaluation histograms into target file
136  virtual void WriteEvaluationHistosToFile(Types::ETreeType treetype);
137 
138  // performs the MethodBase testing + testing of each boosted classifier
139  virtual void TestClassification();
140 
141  // finding the MVA to cut between sig and bgd according to fMVACutPerc,fMVACutType
142  void FindMVACut(MethodBase* method);
143 
144  // setting all the boost weights to 1
145  void ResetBoostWeights();
146 
147  // creating the vectors of histogram for monitoring MVA response of each classifier
148  void CreateMVAHistorgrams();
149 
150  // calculate MVA values of current trained method on training
151  // sample
152  void CalcMVAValues();
153 
154  UInt_t fBoostNum; // Number of times the classifier is boosted
155  TString fBoostType; // string specifying the boost type
156 
157  TString fTransformString; // min and max values for the classifier response
158  Bool_t fDetailedMonitoring; // produce detailed monitoring histograms (boost-wise)
159 
160  Double_t fAdaBoostBeta; // ADA boost parameter, default is 1
161  UInt_t fRandomSeed; // seed for random number generator used for bagging
162  Double_t fBaggedSampleFraction;// rel.Size of bagged sample
163 
164  TString fBoostedMethodName; // details of the boosted classifier
167 
168  Bool_t fMonitorBoostedMethod; // monitor the MVA response of every classifier
169 
170  // MVA output from each classifier over the training hist, using orignal events weights
171  std::vector< TH1* > fTrainSigMVAHist;
172  std::vector< TH1* > fTrainBgdMVAHist;
173  // MVA output from each classifier over the training hist, using boosted events weights
174  std::vector< TH1* > fBTrainSigMVAHist;
175  std::vector< TH1* > fBTrainBgdMVAHist;
176  // MVA output from each classifier over the testing hist
177  std::vector< TH1* > fTestSigMVAHist;
178  std::vector
180 
181  //monitoring tree/ntuple and it's variables
182  TTree* fMonitorTree; // tree to monitor values during the boosting
183  Double_t fBoostWeight; // the weight used to boost the next classifier
184  Double_t fMethodError; // estimation of the level error of the classifier
185  // analysing the train dataset
186  Double_t fROC_training; // roc integral of last trained method (on training sample)
187 
188  // overlap integral of mva distributions for signal and
189  // background (training sample)
191 
192  std::vector<Float_t> *fMVAvalues; // mva values for the last trained method
193 
195  friend class Factory; // DSMTEST
196  friend class Reader; // DSMTEST
197 
198  TString fHistoricOption; //historic variable, only needed for "CompatibilityOptions"
199  Bool_t fHistoricBoolOption; //historic variable, only needed for "CompatibilityOptions"
200 
201  protected:
202 
203  // get help message text
204  void GetHelpMessage() const;
205 
207  };
208 }
209 
210 #endif
Double_t GetBoostROCIntegral(Bool_t, Types::ETreeType, Bool_t CalcOverlapIntergral=kFALSE)
Calculate the ROC integral of a single classifier or even the whole boosted classifier.
void MonitorBoost(Types::EBoostStage stage, UInt_t methodIdx=0)
fill various monitoring histograms from information of the individual classifiers that have been boos...
std::vector< Float_t > * fMVAvalues
Definition: MethodBoost.h:192
Double_t CalcMethodWeight()
Double_t fROC_training
Definition: MethodBoost.h:186
void SingleTrain()
initialization
std::vector< TH1 * > fTestSigMVAHist
Definition: MethodBoost.h:177
Double_t Bagging()
Bagging or Bootstrap boosting, gives new random poisson weight for every event.
Double_t AdaBoost(MethodBase *method, Bool_t useYesNoLeaf)
the standard (discrete or real) AdaBoost algorithm
Int_t GetBoostNum()
Definition: MethodBoost.h:89
Bool_t fDetailedMonitoring
Definition: MethodBoost.h:158
virtual Bool_t HasAnalysisType(Types::EAnalysisType type, UInt_t numberClasses, UInt_t)
Boost can handle classification with 2 classes and regression with one regression-target.
EAnalysisType
Definition: Types.h:124
UInt_t CurrentMethodIdx()
Definition: MethodBoost.h:116
Basic string class.
Definition: TString.h:137
int Int_t
Definition: RtypesCore.h:41
bool Bool_t
Definition: RtypesCore.h:59
const Bool_t kFALSE
Definition: Rtypes.h:92
std::vector< TH1 * > fTrainBgdMVAHist
Definition: MethodBoost.h:172
const Ranking * CreateRanking()
void ResetBoostWeights()
resetting back the boosted weights of the events to 1
Double_t fOverlap_integral
Definition: MethodBoost.h:190
void FindMVACut(MethodBase *method)
find the CUT on the individual MVA that defines an event as correct or misclassified (to be used in t...
MethodBoost(const TString &jobName, const TString &methodTitle, DataSetInfo &theData, const TString &theOption="", TDirectory *theTargetDir=NULL)
#define ClassDef(name, id)
Definition: Rtypes.h:254
void ProcessOptions()
process user options
Double_t SingleBoost(MethodBase *method)
virtual ~MethodBoost(void)
destructor
void GetHelpMessage() const
Get help message text.
Bool_t BookMethod(Types::EMVA theMethod, TString methodTitle, TString theOption)
just registering the string from which the boosted classifier will be created
TString fHistoricOption
Definition: MethodBoost.h:198
virtual void WriteEvaluationHistosToFile(Types::ETreeType treetype)
writes all MVA evaluation histograms to file
Double_t fBaggedSampleFraction
Definition: MethodBoost.h:162
TString fTransformString
Definition: MethodBoost.h:157
unsigned int UInt_t
Definition: RtypesCore.h:42
MethodBase * CurrentMethod()
Definition: MethodBoost.h:115
void SetBoostedMethodName(TString methodName)
Definition: MethodBoost.h:87
void CreateMVAHistorgrams()
void DeclareCompatibilityOptions()
options that are used ONLY for the READER to ensure backward compatibility they are hence without any...
TString fBoostedMethodOptions
Definition: MethodBoost.h:166
Bool_t fMonitorBoostedMethod
Definition: MethodBoost.h:168
void CheckSetup()
check may be overridden by derived class (sometimes, eg, fitters are used which can only be implement...
double Double_t
Definition: RtypesCore.h:55
Describe directory structure in memory.
Definition: TDirectory.h:41
std::vector< TH1 * > fTrainSigMVAHist
Definition: MethodBoost.h:171
TString fBoostedMethodTitle
Definition: MethodBoost.h:165
int type
Definition: TGX11.cxx:120
The TH1 histogram class.
Definition: TH1.h:80
Double_t fMethodError
Definition: MethodBoost.h:184
virtual void TestClassification()
initialization
void WriteMonitoringHistosToFile(void) const
write special monitoring histograms to file dummy implementation here --------------— ...
void PrintResults(const TString &, std::vector< Double_t > &, const Double_t) const
TString fBoostedMethodName
Definition: MethodBoost.h:164
Abstract ClassifierFactory template that handles arbitrary types.
Double_t fAdaBoostBeta
Definition: MethodBoost.h:160
DataSetManager * fDataSetManager
Definition: MethodBoost.h:194
Bool_t fHistoricBoolOption
Definition: MethodBoost.h:199
void InitHistos()
initialisation routine
#define NULL
Definition: Rtypes.h:82
A TTree object has a header with a name and a title.
Definition: TTree.h:94
TTree * fMonitorTree
Definition: MethodBoost.h:182
std::vector< TH1 * > fBTrainSigMVAHist
Definition: MethodBoost.h:174
Double_t fBoostWeight
Definition: MethodBoost.h:183
std::vector< TH1 * > fBTrainBgdMVAHist
Definition: MethodBoost.h:175
Double_t GetMvaValue(Double_t *err=0, Double_t *errUpper=0)
return boosted MVA response
std::vector< TH1 * > fTestBgdMVAHist
Definition: MethodBoost.h:179