Logo ROOT   6.18/05
Reference Guide
MethodBoost.h
Go to the documentation of this file.
1// @(#)root/tmva $Id$
2// Author: Andreas Hoecker, Joerg Stelzer, Helge Voss, Kai Voss,Or Cohen, Jan Therhaag, Eckhard von Toerne
3
4/**********************************************************************************
5 * Project: TMVA - a Root-integrated toolkit for multivariate data analysis *
6 * Package: TMVA *
7 * Class : MethodCompositeBase *
8 * Web : http://tmva.sourceforge.net *
9 * *
10 * Description: *
11 * Virtual base class for all MVA method *
12 * *
13 * Authors (alphabetical): *
14 * Andreas Hoecker <Andreas.Hocker@cern.ch> - CERN, Switzerland *
15 * Peter Speckmayer <Peter.Speckmazer@cern.ch> - CERN, Switzerland *
16 * Joerg Stelzer <Joerg.Stelzer@cern.ch> - CERN, Switzerland *
17 * Helge Voss <Helge.Voss@cern.ch> - MPI-K Heidelberg, Germany *
18 * Jan Therhaag <Jan.Therhaag@cern.ch> - U of Bonn, Germany *
19 * Eckhard v. Toerne <evt@uni-bonn.de> - U of Bonn, Germany *
20 * *
21 * Copyright (c) 2005-2011: *
22 * CERN, Switzerland *
23 * U. of Victoria, Canada *
24 * MPI-K Heidelberg, Germany *
25 * U. of Bonn, Germany *
26 * *
27 * Redistribution and use in source and binary forms, with or without *
28 * modification, are permitted according to the terms listed in LICENSE *
29 * (http://tmva.sourceforge.net/LICENSE) *
30 **********************************************************************************/
31
32#ifndef ROOT_TMVA_MethodBoost
33#define ROOT_TMVA_MethodBoost
34
35//////////////////////////////////////////////////////////////////////////
36// //
37// MethodBoost //
38// //
39// Class for boosting a TMVA method //
40// //
41//////////////////////////////////////////////////////////////////////////
42
43#include <iosfwd>
44#include <vector>
45
46#include "TMVA/MethodBase.h"
47
49
50namespace TMVA {
51
52 class Factory; // DSMTEST
53 class Reader; // DSMTEST
54 class DataSetManager; // DSMTEST
55 namespace Experimental {
56 class Classification;
57 }
59 friend class Factory; // DSMTEST
60 friend class Reader; // DSMTEST
62
63 public :
64
65 // constructors
66 MethodBoost( const TString& jobName,
67 const TString& methodTitle,
68 DataSetInfo& theData,
69 const TString& theOption = "" );
70
72 const TString& theWeightFile );
73
74 virtual ~MethodBoost( void );
75
76 virtual Bool_t HasAnalysisType( Types::EAnalysisType type, UInt_t numberClasses, UInt_t /*numberTargets*/ );
77
78 // training and boosting all the classifiers
79 void Train( void );
80
81 // ranking of input variables
82 const Ranking* CreateRanking();
83
84 // saves the name and options string of the boosted classifier
85 Bool_t BookMethod( Types::EMVA theMethod, TString methodTitle, TString theOption );
86 void SetBoostedMethodName ( TString methodName ) { fBoostedMethodName = methodName; }
87
89
90 void CleanBoostOptions();
91
92 Double_t GetMvaValue( Double_t* err=0, Double_t* errUpper = 0 );
93
94 private :
95 // clean up
96 void ClearAll();
97
98 // print fit results
99 void PrintResults( const TString&, std::vector<Double_t>&, const Double_t ) const;
100
101 // initializing mostly monitoring tools of the boost process
102 void Init();
103 void InitHistos();
104 void CheckSetup();
105
106 void MonitorBoost( Types::EBoostStage stage, UInt_t methodIdx=0);
107
108 // the option handling methods
109 void DeclareOptions();
111 void ProcessOptions();
112
113
116 // training a single classifier
117 void SingleTrain();
118
119 // calculating a boosting weight from the classifier, storing it in the next one
121 Double_t AdaBoost(MethodBase* method, Bool_t useYesNoLeaf );
123
124
125 // calculate weight of single method
127
128 // return ROC integral on training/testing sample
130
131 // writing the monitoring histograms and tree to a file
132 void WriteMonitoringHistosToFile( void ) const;
133
134 // write evaluation histograms into target file
135 virtual void WriteEvaluationHistosToFile(Types::ETreeType treetype);
136
137 // performs the MethodBase testing + testing of each boosted classifier
138 virtual void TestClassification();
139
140 // finding the MVA to cut between sig and bgd according to fMVACutPerc,fMVACutType
141 void FindMVACut(MethodBase* method);
142
143 // setting all the boost weights to 1
144 void ResetBoostWeights();
145
146 // creating the vectors of histogram for monitoring MVA response of each classifier
148
149 // calculate MVA values of current trained method on training
150 // sample
151 void CalcMVAValues();
152
153 UInt_t fBoostNum; // Number of times the classifier is boosted
154 TString fBoostType; // string specifying the boost type
155
156 TString fTransformString; // min and max values for the classifier response
157 Bool_t fDetailedMonitoring; // produce detailed monitoring histograms (boost-wise)
158
159 Double_t fAdaBoostBeta; // ADA boost parameter, default is 1
160 UInt_t fRandomSeed; // seed for random number generator used for bagging
161 Double_t fBaggedSampleFraction;// rel.Size of bagged sample
162
163 TString fBoostedMethodName; // details of the boosted classifier
166
167 Bool_t fMonitorBoostedMethod; // monitor the MVA response of every classifier
168
169 // MVA output from each classifier over the training hist, using orignal events weights
170 std::vector< TH1* > fTrainSigMVAHist;
171 std::vector< TH1* > fTrainBgdMVAHist;
172 // MVA output from each classifier over the training hist, using boosted events weights
173 std::vector< TH1* > fBTrainSigMVAHist;
174 std::vector< TH1* > fBTrainBgdMVAHist;
175 // MVA output from each classifier over the testing hist
176 std::vector< TH1* > fTestSigMVAHist;
177 std::vector
179
180 //monitoring tree/ntuple and it's variables
181 TTree* fMonitorTree; // tree to monitor values during the boosting
182 Double_t fBoostWeight; // the weight used to boost the next classifier
183 Double_t fMethodError; // estimation of the level error of the classifier
184 // analysing the train dataset
185 Double_t fROC_training; // roc integral of last trained method (on training sample)
186
187 // overlap integral of mva distributions for signal and
188 // background (training sample)
190
191 std::vector<Float_t> *fMVAvalues; // mva values for the last trained method
192
194 TString fHistoricOption; //historic variable, only needed for "CompatibilityOptions"
195 Bool_t fHistoricBoolOption; //historic variable, only needed for "CompatibilityOptions"
196
197 protected:
198
199 // get help message text
200 void GetHelpMessage() const;
201
203 };
204}
205
206#endif
int Int_t
Definition: RtypesCore.h:41
unsigned int UInt_t
Definition: RtypesCore.h:42
const Bool_t kFALSE
Definition: RtypesCore.h:88
bool Bool_t
Definition: RtypesCore.h:59
double Double_t
Definition: RtypesCore.h:55
#define ClassDef(name, id)
Definition: Rtypes.h:326
int type
Definition: TGX11.cxx:120
The TH1 histogram class.
Definition: TH1.h:56
Class to perform two class classification.
Class that contains all the data information.
Definition: DataSetInfo.h:60
Class that contains all the data information.
This is the main MVA steering class.
Definition: Factory.h:81
Virtual base Class for all MVA method.
Definition: MethodBase.h:109
friend class MethodBoost
Definition: MethodBase.h:114
Class for boosting a TMVA method.
Definition: MethodBoost.h:58
void MonitorBoost(Types::EBoostStage stage, UInt_t methodIdx=0)
fill various monitoring histograms from information of the individual classifiers that have been boos...
void ResetBoostWeights()
resetting back the boosted weights of the events to 1
MethodBase * CurrentMethod()
Definition: MethodBoost.h:114
Double_t fAdaBoostBeta
Definition: MethodBoost.h:159
TString fBoostedMethodOptions
Definition: MethodBoost.h:165
Double_t fBaggedSampleFraction
Definition: MethodBoost.h:161
Double_t fBoostWeight
Definition: MethodBoost.h:182
std::vector< TH1 * > fTestSigMVAHist
Definition: MethodBoost.h:176
void SingleTrain()
initialization
void SetBoostedMethodName(TString methodName)
Definition: MethodBoost.h:86
Bool_t fDetailedMonitoring
Definition: MethodBoost.h:157
void PrintResults(const TString &, std::vector< Double_t > &, const Double_t) const
DataSetManager * fDataSetManager
Definition: MethodBoost.h:193
virtual void WriteEvaluationHistosToFile(Types::ETreeType treetype)
writes all MVA evaluation histograms to file
std::vector< TH1 * > fTestBgdMVAHist
Definition: MethodBoost.h:178
void CreateMVAHistorgrams()
Bool_t fHistoricBoolOption
Definition: MethodBoost.h:195
TTree * fMonitorTree
Definition: MethodBoost.h:181
void WriteMonitoringHistosToFile(void) const
write special monitoring histograms to file dummy implementation here --------------—
Double_t AdaBoost(MethodBase *method, Bool_t useYesNoLeaf)
the standard (discrete or real) AdaBoost algorithm
TString fBoostedMethodTitle
Definition: MethodBoost.h:164
Bool_t BookMethod(Types::EMVA theMethod, TString methodTitle, TString theOption)
just registering the string from which the boosted classifier will be created
UInt_t CurrentMethodIdx()
Definition: MethodBoost.h:115
std::vector< TH1 * > fTrainSigMVAHist
Definition: MethodBoost.h:170
TString fHistoricOption
Definition: MethodBoost.h:194
void CheckSetup()
check may be overridden by derived class (sometimes, eg, fitters are used which can only be implement...
virtual void TestClassification()
initialization
void InitHistos()
initialisation routine
void ProcessOptions()
process user options
Double_t GetBoostROCIntegral(Bool_t, Types::ETreeType, Bool_t CalcOverlapIntergral=kFALSE)
Calculate the ROC integral of a single classifier or even the whole boosted classifier.
Int_t GetBoostNum()
Definition: MethodBoost.h:88
Double_t fROC_training
Definition: MethodBoost.h:185
TString fBoostedMethodName
Definition: MethodBoost.h:163
std::vector< TH1 * > fBTrainBgdMVAHist
Definition: MethodBoost.h:174
std::vector< TH1 * > fTrainBgdMVAHist
Definition: MethodBoost.h:171
Double_t SingleBoost(MethodBase *method)
Double_t CalcMethodWeight()
Double_t GetMvaValue(Double_t *err=0, Double_t *errUpper=0)
return boosted MVA response
std::vector< TH1 * > fBTrainSigMVAHist
Definition: MethodBoost.h:173
Double_t Bagging()
Bagging or Bootstrap boosting, gives new random poisson weight for every event.
virtual Bool_t HasAnalysisType(Types::EAnalysisType type, UInt_t numberClasses, UInt_t)
Boost can handle classification with 2 classes and regression with one regression-target.
const Ranking * CreateRanking()
Double_t fMethodError
Definition: MethodBoost.h:183
TString fTransformString
Definition: MethodBoost.h:156
void DeclareCompatibilityOptions()
options that are used ONLY for the READER to ensure backward compatibility they are hence without any...
std::vector< Float_t > * fMVAvalues
Definition: MethodBoost.h:191
Bool_t fMonitorBoostedMethod
Definition: MethodBoost.h:167
virtual ~MethodBoost(void)
destructor
void FindMVACut(MethodBase *method)
find the CUT on the individual MVA that defines an event as correct or misclassified (to be used in t...
Double_t fOverlap_integral
Definition: MethodBoost.h:189
void GetHelpMessage() const
Get help message text.
Virtual base class for combining several TMVA method.
Ranking for variables in method (implementation)
Definition: Ranking.h:48
The Reader class serves to use the MVAs in a specific analysis context.
Definition: Reader.h:63
EAnalysisType
Definition: Types.h:127
Basic string class.
Definition: TString.h:131
A TTree represents a columnar dataset.
Definition: TTree.h:71
create variable transformations