Logo ROOT   6.10/09
Reference Guide
MethodBoost.h
Go to the documentation of this file.
1 // @(#)root/tmva $Id$
2 // Author: Andreas Hoecker, Joerg Stelzer, Helge Voss, Kai Voss,Or Cohen, Jan Therhaag, Eckhard von Toerne
3 
4 /**********************************************************************************
5  * Project: TMVA - a Root-integrated toolkit for multivariate data analysis *
6  * Package: TMVA *
7  * Class : MethodCompositeBase *
8  * Web : http://tmva.sourceforge.net *
9  * *
10  * Description: *
11  * Virtual base class for all MVA method *
12  * *
13  * Authors (alphabetical): *
14  * Andreas Hoecker <Andreas.Hocker@cern.ch> - CERN, Switzerland *
15  * Peter Speckmayer <Peter.Speckmazer@cern.ch> - CERN, Switzerland *
16  * Joerg Stelzer <Joerg.Stelzer@cern.ch> - CERN, Switzerland *
17  * Helge Voss <Helge.Voss@cern.ch> - MPI-K Heidelberg, Germany *
18  * Jan Therhaag <Jan.Therhaag@cern.ch> - U of Bonn, Germany *
19  * Eckhard v. Toerne <evt@uni-bonn.de> - U of Bonn, Germany *
20  * *
21  * Copyright (c) 2005-2011: *
22  * CERN, Switzerland *
23  * U. of Victoria, Canada *
24  * MPI-K Heidelberg, Germany *
25  * U. of Bonn, Germany *
26  * *
27  * Redistribution and use in source and binary forms, with or without *
28  * modification, are permitted according to the terms listed in LICENSE *
29  * (http://tmva.sourceforge.net/LICENSE) *
30  **********************************************************************************/
31 
32 #ifndef ROOT_TMVA_MethodBoost
33 #define ROOT_TMVA_MethodBoost
34 
35 //////////////////////////////////////////////////////////////////////////
36 // //
37 // MethodBoost //
38 // //
39 // Class for boosting a TMVA method //
40 // //
41 //////////////////////////////////////////////////////////////////////////
42 
43 #include <iosfwd>
44 #include <vector>
45 
46 #include "TMVA/MethodBase.h"
47 
49 
50 namespace TMVA {
51 
52  class Factory; // DSMTEST
53  class Reader; // DSMTEST
54  class DataSetManager; // DSMTEST
55 
57 
58  public :
59 
60  // constructors
61  MethodBoost( const TString& jobName,
62  const TString& methodTitle,
63  DataSetInfo& theData,
64  const TString& theOption = "" );
65 
67  const TString& theWeightFile );
68 
69  virtual ~MethodBoost( void );
70 
71  virtual Bool_t HasAnalysisType( Types::EAnalysisType type, UInt_t numberClasses, UInt_t /*numberTargets*/ );
72 
73  // training and boosting all the classifiers
74  void Train( void );
75 
76  // ranking of input variables
77  const Ranking* CreateRanking();
78 
79  // saves the name and options string of the boosted classifier
80  Bool_t BookMethod( Types::EMVA theMethod, TString methodTitle, TString theOption );
81  void SetBoostedMethodName ( TString methodName ) { fBoostedMethodName = methodName; }
82 
83  Int_t GetBoostNum() { return fBoostNum; }
84 
85  void CleanBoostOptions();
86 
87  Double_t GetMvaValue( Double_t* err=0, Double_t* errUpper = 0 );
88 
89  private :
90  // clean up
91  void ClearAll();
92 
93  // print fit results
94  void PrintResults( const TString&, std::vector<Double_t>&, const Double_t ) const;
95 
96  // initializing mostly monitoring tools of the boost process
97  void Init();
98  void InitHistos();
99  void CheckSetup();
100 
101  void MonitorBoost( Types::EBoostStage stage, UInt_t methodIdx=0);
102 
103  // the option handling methods
104  void DeclareOptions();
106  void ProcessOptions();
107 
108 
111  // training a single classifier
112  void SingleTrain();
113 
114  // calculating a boosting weight from the classifier, storing it in the next one
116  Double_t AdaBoost(MethodBase* method, Bool_t useYesNoLeaf );
117  Double_t Bagging();
118 
119 
120  // calculate weight of single method
122 
123  // return ROC integral on training/testing sample
125 
126  // writing the monitoring histograms and tree to a file
127  void WriteMonitoringHistosToFile( void ) const;
128 
129  // write evaluation histograms into target file
130  virtual void WriteEvaluationHistosToFile(Types::ETreeType treetype);
131 
132  // performs the MethodBase testing + testing of each boosted classifier
133  virtual void TestClassification();
134 
135  // finding the MVA to cut between sig and bgd according to fMVACutPerc,fMVACutType
136  void FindMVACut(MethodBase* method);
137 
138  // setting all the boost weights to 1
139  void ResetBoostWeights();
140 
141  // creating the vectors of histogram for monitoring MVA response of each classifier
142  void CreateMVAHistorgrams();
143 
144  // calculate MVA values of current trained method on training
145  // sample
146  void CalcMVAValues();
147 
148  UInt_t fBoostNum; // Number of times the classifier is boosted
149  TString fBoostType; // string specifying the boost type
150 
151  TString fTransformString; // min and max values for the classifier response
152  Bool_t fDetailedMonitoring; // produce detailed monitoring histograms (boost-wise)
153 
154  Double_t fAdaBoostBeta; // ADA boost parameter, default is 1
155  UInt_t fRandomSeed; // seed for random number generator used for bagging
156  Double_t fBaggedSampleFraction;// rel.Size of bagged sample
157 
158  TString fBoostedMethodName; // details of the boosted classifier
161 
162  Bool_t fMonitorBoostedMethod; // monitor the MVA response of every classifier
163 
164  // MVA output from each classifier over the training hist, using orignal events weights
165  std::vector< TH1* > fTrainSigMVAHist;
166  std::vector< TH1* > fTrainBgdMVAHist;
167  // MVA output from each classifier over the training hist, using boosted events weights
168  std::vector< TH1* > fBTrainSigMVAHist;
169  std::vector< TH1* > fBTrainBgdMVAHist;
170  // MVA output from each classifier over the testing hist
171  std::vector< TH1* > fTestSigMVAHist;
172  std::vector
174 
175  //monitoring tree/ntuple and it's variables
176  TTree* fMonitorTree; // tree to monitor values during the boosting
177  Double_t fBoostWeight; // the weight used to boost the next classifier
178  Double_t fMethodError; // estimation of the level error of the classifier
179  // analysing the train dataset
180  Double_t fROC_training; // roc integral of last trained method (on training sample)
181 
182  // overlap integral of mva distributions for signal and
183  // background (training sample)
185 
186  std::vector<Float_t> *fMVAvalues; // mva values for the last trained method
187 
189  friend class Factory; // DSMTEST
190  friend class Reader; // DSMTEST
191 
192  TString fHistoricOption; //historic variable, only needed for "CompatibilityOptions"
193  Bool_t fHistoricBoolOption; //historic variable, only needed for "CompatibilityOptions"
194 
195  protected:
196 
197  // get help message text
198  void GetHelpMessage() const;
199 
201  };
202 }
203 
204 #endif
Double_t GetBoostROCIntegral(Bool_t, Types::ETreeType, Bool_t CalcOverlapIntergral=kFALSE)
Calculate the ROC integral of a single classifier or even the whole boosted classifier.
void MonitorBoost(Types::EBoostStage stage, UInt_t methodIdx=0)
fill various monitoring histograms from information of the individual classifiers that have been boos...
std::vector< Float_t > * fMVAvalues
Definition: MethodBoost.h:186
Double_t CalcMethodWeight()
Double_t fROC_training
Definition: MethodBoost.h:180
void SingleTrain()
initialization
std::vector< TH1 *> fTestSigMVAHist
Definition: MethodBoost.h:171
Double_t Bagging()
Bagging or Bootstrap boosting, gives new random poisson weight for every event.
Double_t AdaBoost(MethodBase *method, Bool_t useYesNoLeaf)
the standard (discrete or real) AdaBoost algorithm
void WriteMonitoringHistosToFile(void) const
write special monitoring histograms to file dummy implementation here --------------— ...
Int_t GetBoostNum()
Definition: MethodBoost.h:83
Bool_t fDetailedMonitoring
Definition: MethodBoost.h:152
virtual Bool_t HasAnalysisType(Types::EAnalysisType type, UInt_t numberClasses, UInt_t)
Boost can handle classification with 2 classes and regression with one regression-target.
EAnalysisType
Definition: Types.h:125
MethodBoost(const TString &jobName, const TString &methodTitle, DataSetInfo &theData, const TString &theOption="")
Definition: MethodBoost.cxx:94
Virtual base Class for all MVA method.
Definition: MethodBase.h:106
UInt_t CurrentMethodIdx()
Definition: MethodBoost.h:110
Basic string class.
Definition: TString.h:129
Ranking for variables in method (implementation)
Definition: Ranking.h:48
int Int_t
Definition: RtypesCore.h:41
bool Bool_t
Definition: RtypesCore.h:59
std::vector< TH1 *> fTrainBgdMVAHist
Definition: MethodBoost.h:166
const Ranking * CreateRanking()
void ResetBoostWeights()
resetting back the boosted weights of the events to 1
Double_t fOverlap_integral
Definition: MethodBoost.h:184
void FindMVACut(MethodBase *method)
find the CUT on the individual MVA that defines an event as correct or misclassified (to be used in t...
#define ClassDef(name, id)
Definition: Rtypes.h:297
void ProcessOptions()
process user options
Double_t SingleBoost(MethodBase *method)
Virtual base class for combining several TMVA method.
virtual ~MethodBoost(void)
destructor
Class that contains all the data information.
Definition: DataSetInfo.h:60
Class for boosting a TMVA method.
Definition: MethodBoost.h:56
Bool_t BookMethod(Types::EMVA theMethod, TString methodTitle, TString theOption)
just registering the string from which the boosted classifier will be created
TString fHistoricOption
Definition: MethodBoost.h:192
void PrintResults(const TString &, std::vector< Double_t > &, const Double_t) const
virtual void WriteEvaluationHistosToFile(Types::ETreeType treetype)
writes all MVA evaluation histograms to file
Double_t fBaggedSampleFraction
Definition: MethodBoost.h:156
TString fTransformString
Definition: MethodBoost.h:151
unsigned int UInt_t
Definition: RtypesCore.h:42
MethodBase * CurrentMethod()
Definition: MethodBoost.h:109
This is the main MVA steering class.
Definition: Factory.h:81
void SetBoostedMethodName(TString methodName)
Definition: MethodBoost.h:81
void CreateMVAHistorgrams()
const Bool_t kFALSE
Definition: RtypesCore.h:92
void DeclareCompatibilityOptions()
options that are used ONLY for the READER to ensure backward compatibility they are hence without any...
TString fBoostedMethodOptions
Definition: MethodBoost.h:160
Bool_t fMonitorBoostedMethod
Definition: MethodBoost.h:162
void CheckSetup()
check may be overridden by derived class (sometimes, eg, fitters are used which can only be implement...
double Double_t
Definition: RtypesCore.h:55
Class that contains all the data information.
std::vector< TH1 *> fTrainSigMVAHist
Definition: MethodBoost.h:165
TString fBoostedMethodTitle
Definition: MethodBoost.h:159
int type
Definition: TGX11.cxx:120
The TH1 histogram class.
Definition: TH1.h:56
Double_t fMethodError
Definition: MethodBoost.h:178
void GetHelpMessage() const
Get help message text.
virtual void TestClassification()
initialization
The Reader class serves to use the MVAs in a specific analysis context.
Definition: Reader.h:63
TString fBoostedMethodName
Definition: MethodBoost.h:158
Abstract ClassifierFactory template that handles arbitrary types.
Double_t fAdaBoostBeta
Definition: MethodBoost.h:154
DataSetManager * fDataSetManager
Definition: MethodBoost.h:188
Bool_t fHistoricBoolOption
Definition: MethodBoost.h:193
void InitHistos()
initialisation routine
A TTree object has a header with a name and a title.
Definition: TTree.h:78
TTree * fMonitorTree
Definition: MethodBoost.h:176
std::vector< TH1 *> fBTrainSigMVAHist
Definition: MethodBoost.h:168
Double_t fBoostWeight
Definition: MethodBoost.h:177
std::vector< TH1 *> fBTrainBgdMVAHist
Definition: MethodBoost.h:169
Double_t GetMvaValue(Double_t *err=0, Double_t *errUpper=0)
return boosted MVA response
std::vector< TH1 *> fTestBgdMVAHist
Definition: MethodBoost.h:173