Logo ROOT  
Reference Guide
MethodBoost.h
Go to the documentation of this file.
1// @(#)root/tmva $Id$
2// Author: Andreas Hoecker, Joerg Stelzer, Helge Voss, Kai Voss,Or Cohen, Jan Therhaag, Eckhard von Toerne
3
4/**********************************************************************************
5 * Project: TMVA - a Root-integrated toolkit for multivariate data analysis *
6 * Package: TMVA *
7 * Class : MethodCompositeBase *
8 * Web : http://tmva.sourceforge.net *
9 * *
10 * Description: *
11 * Virtual base class for all MVA method *
12 * *
13 * Authors (alphabetical): *
14 * Andreas Hoecker <Andreas.Hocker@cern.ch> - CERN, Switzerland *
15 * Peter Speckmayer <Peter.Speckmazer@cern.ch> - CERN, Switzerland *
16 * Joerg Stelzer <Joerg.Stelzer@cern.ch> - CERN, Switzerland *
17 * Helge Voss <Helge.Voss@cern.ch> - MPI-K Heidelberg, Germany *
18 * Jan Therhaag <Jan.Therhaag@cern.ch> - U of Bonn, Germany *
19 * Eckhard v. Toerne <evt@uni-bonn.de> - U of Bonn, Germany *
20 * *
21 * Copyright (c) 2005-2011: *
22 * CERN, Switzerland *
23 * U. of Victoria, Canada *
24 * MPI-K Heidelberg, Germany *
25 * U. of Bonn, Germany *
26 * *
27 * Redistribution and use in source and binary forms, with or without *
28 * modification, are permitted according to the terms listed in LICENSE *
29 * (http://tmva.sourceforge.net/LICENSE) *
30 **********************************************************************************/
31
32#ifndef ROOT_TMVA_MethodBoost
33#define ROOT_TMVA_MethodBoost
34
35//////////////////////////////////////////////////////////////////////////
36// //
37// MethodBoost //
38// //
39// Class for boosting a TMVA method //
40// //
41//////////////////////////////////////////////////////////////////////////
42
43#include <iosfwd>
44#include <vector>
45
46#include "TMVA/MethodBase.h"
47
49
50namespace TMVA {
51
52 class Factory; // DSMTEST
53 class Reader; // DSMTEST
54 class DataSetManager; // DSMTEST
55 namespace Experimental {
56 class Classification;
57 }
59 friend class Factory; // DSMTEST
60 friend class Reader; // DSMTEST
62
63 public :
64
65 // constructors
66 MethodBoost( const TString& jobName,
67 const TString& methodTitle,
68 DataSetInfo& theData,
69 const TString& theOption = "" );
70
72 const TString& theWeightFile );
73
74 virtual ~MethodBoost( void );
75
76 virtual Bool_t HasAnalysisType( Types::EAnalysisType type, UInt_t numberClasses, UInt_t /*numberTargets*/ );
77
78 // training and boosting all the classifiers
79 void Train( void );
80
81 // ranking of input variables
82 const Ranking* CreateRanking();
83
84 // saves the name and options string of the boosted classifier
85 Bool_t BookMethod( Types::EMVA theMethod, TString methodTitle, TString theOption );
86 void SetBoostedMethodName ( TString methodName ) { fBoostedMethodName = methodName; }
87
89
90 void CleanBoostOptions();
91
92 Double_t GetMvaValue( Double_t* err=0, Double_t* errUpper = 0 );
93
94 private :
95 // clean up
96 void ClearAll();
97
98 // print fit results
99 void PrintResults( const TString&, std::vector<Double_t>&, const Double_t ) const;
100
101 // initializing mostly monitoring tools of the boost process
102 void Init();
103 void InitHistos();
104 void CheckSetup();
105
106 void MonitorBoost( Types::EBoostStage stage, UInt_t methodIdx=0);
107
108 // the option handling methods
109 void DeclareOptions();
111 void ProcessOptions();
112
113
116 // training a single classifier
117 void SingleTrain();
118
119 // calculating a boosting weight from the classifier, storing it in the next one
121 Double_t AdaBoost(MethodBase* method, Bool_t useYesNoLeaf );
123
124
125 // calculate weight of single method
127
128 // return ROC integral on training/testing sample
130
131 // writing the monitoring histograms and tree to a file
132 void WriteMonitoringHistosToFile( void ) const;
133
134 // write evaluation histograms into target file
135 virtual void WriteEvaluationHistosToFile(Types::ETreeType treetype);
136
137 // performs the MethodBase testing + testing of each boosted classifier
138 virtual void TestClassification();
139
140 // finding the MVA to cut between sig and bgd according to fMVACutPerc,fMVACutType
141 void FindMVACut(MethodBase* method);
142
143 // setting all the boost weights to 1
144 void ResetBoostWeights();
145
146 // creating the vectors of histogram for monitoring MVA response of each classifier
148
149 // calculate MVA values of current trained method on training
150 // sample
151 void CalcMVAValues();
152
153 UInt_t fBoostNum; ///< Number of times the classifier is boosted
154 TString fBoostType; ///< string specifying the boost type
155
156 TString fTransformString; ///< min and max values for the classifier response
157 Bool_t fDetailedMonitoring; ///< produce detailed monitoring histograms (boost-wise)
158
159 Double_t fAdaBoostBeta; ///< ADA boost parameter, default is 1
160 UInt_t fRandomSeed; ///< seed for random number generator used for bagging
161 Double_t fBaggedSampleFraction; ///< rel.Size of bagged sample
162
163 TString fBoostedMethodName; ///< details of the boosted classifier
166
167 Bool_t fMonitorBoostedMethod; ///< monitor the MVA response of every classifier
168
169 // MVA output from each classifier over the training hist, using orignal events weights
170 std::vector< TH1* > fTrainSigMVAHist;
171 std::vector< TH1* > fTrainBgdMVAHist;
172 // MVA output from each classifier over the training hist, using boosted events weights
173 std::vector< TH1* > fBTrainSigMVAHist;
174 std::vector< TH1* > fBTrainBgdMVAHist;
175 // MVA output from each classifier over the testing hist
176 std::vector< TH1* > fTestSigMVAHist;
177 std::vector
179
180 //monitoring tree/ntuple and it's variables
181 TTree* fMonitorTree; ///< tree to monitor values during the boosting
182 Double_t fBoostWeight; ///< the weight used to boost the next classifier
183 Double_t fMethodError; ///< estimation of the level error of the classifier
184 // analysing the train dataset
185 Double_t fROC_training; ///< roc integral of last trained method (on training sample)
186
187 // overlap integral of mva distributions for signal and
188 // background (training sample)
190
191 std::vector<Float_t> *fMVAvalues; ///< mva values for the last trained method
192
194 TString fHistoricOption; ///< historic variable, only needed for "CompatibilityOptions"
195 Bool_t fHistoricBoolOption; ///< historic variable, only needed for "CompatibilityOptions"
196
197 protected:
198
199 // get help message text
200 void GetHelpMessage() const;
201
203 };
204}
205
206#endif
bool Bool_t
Definition: RtypesCore.h:63
const Bool_t kFALSE
Definition: RtypesCore.h:101
double Double_t
Definition: RtypesCore.h:59
#define ClassDef(name, id)
Definition: Rtypes.h:335
Option_t Option_t TPoint TPoint const char GetTextMagnitude GetFillStyle GetLineColor GetLineWidth GetMarkerStyle GetTextAlign GetTextColor GetTextSize void char Point_t Rectangle_t WindowAttributes_t Float_t Float_t Float_t Int_t Int_t UInt_t UInt_t Rectangle_t Int_t Int_t Window_t TString Int_t GCValues_t GetPrimarySelectionOwner GetDisplay GetScreen GetColormap GetNativeEvent const char const char dpyName wid window const char font_name cursor keysym reg const char only_if_exist regb h Point_t winding char text const char depth char const char Int_t count const char ColorStruct_t color const char Pixmap_t Pixmap_t PictureAttributes_t attr const char char ret_data h unsigned char height h Atom_t Int_t ULong_t ULong_t unsigned char prop_list Atom_t Atom_t Atom_t Time_t type
TH1 is the base class of all histogram classes in ROOT.
Definition: TH1.h:58
Class to perform two class classification.
Class that contains all the data information.
Definition: DataSetInfo.h:62
Class that contains all the data information.
This is the main MVA steering class.
Definition: Factory.h:80
Virtual base Class for all MVA method.
Definition: MethodBase.h:111
friend class MethodBoost
Definition: MethodBase.h:116
Class for boosting a TMVA method.
Definition: MethodBoost.h:58
void MonitorBoost(Types::EBoostStage stage, UInt_t methodIdx=0)
fill various monitoring histograms from information of the individual classifiers that have been boos...
void ResetBoostWeights()
resetting back the boosted weights of the events to 1
MethodBase * CurrentMethod()
Definition: MethodBoost.h:114
Double_t fAdaBoostBeta
ADA boost parameter, default is 1.
Definition: MethodBoost.h:159
TString fBoostedMethodOptions
options
Definition: MethodBoost.h:165
Double_t fBaggedSampleFraction
rel.Size of bagged sample
Definition: MethodBoost.h:161
Double_t fBoostWeight
the weight used to boost the next classifier
Definition: MethodBoost.h:182
std::vector< TH1 * > fTestSigMVAHist
Definition: MethodBoost.h:176
void SingleTrain()
initialization
UInt_t fBoostNum
Number of times the classifier is boosted.
Definition: MethodBoost.h:153
void SetBoostedMethodName(TString methodName)
Definition: MethodBoost.h:86
Bool_t fDetailedMonitoring
produce detailed monitoring histograms (boost-wise)
Definition: MethodBoost.h:157
UInt_t fRandomSeed
seed for random number generator used for bagging
Definition: MethodBoost.h:160
void PrintResults(const TString &, std::vector< Double_t > &, const Double_t) const
DataSetManager * fDataSetManager
DSMTEST.
Definition: MethodBoost.h:193
virtual void WriteEvaluationHistosToFile(Types::ETreeType treetype)
writes all MVA evaluation histograms to file
std::vector< TH1 * > fTestBgdMVAHist
Definition: MethodBoost.h:178
void CreateMVAHistorgrams()
Bool_t fHistoricBoolOption
historic variable, only needed for "CompatibilityOptions"
Definition: MethodBoost.h:195
TTree * fMonitorTree
tree to monitor values during the boosting
Definition: MethodBoost.h:181
void WriteMonitoringHistosToFile(void) const
write special monitoring histograms to file dummy implementation here --------------—
Double_t AdaBoost(MethodBase *method, Bool_t useYesNoLeaf)
the standard (discrete or real) AdaBoost algorithm
TString fBoostedMethodTitle
title
Definition: MethodBoost.h:164
Bool_t BookMethod(Types::EMVA theMethod, TString methodTitle, TString theOption)
just registering the string from which the boosted classifier will be created
UInt_t CurrentMethodIdx()
Definition: MethodBoost.h:115
std::vector< TH1 * > fTrainSigMVAHist
Definition: MethodBoost.h:170
TString fHistoricOption
historic variable, only needed for "CompatibilityOptions"
Definition: MethodBoost.h:194
void CheckSetup()
check may be overridden by derived class (sometimes, eg, fitters are used which can only be implement...
virtual void TestClassification()
initialization
void InitHistos()
initialisation routine
void ProcessOptions()
process user options
TString fBoostType
string specifying the boost type
Definition: MethodBoost.h:154
Double_t GetBoostROCIntegral(Bool_t, Types::ETreeType, Bool_t CalcOverlapIntergral=kFALSE)
Calculate the ROC integral of a single classifier or even the whole boosted classifier.
Int_t GetBoostNum()
Definition: MethodBoost.h:88
Double_t fROC_training
roc integral of last trained method (on training sample)
Definition: MethodBoost.h:185
TString fBoostedMethodName
details of the boosted classifier
Definition: MethodBoost.h:163
std::vector< TH1 * > fBTrainBgdMVAHist
Definition: MethodBoost.h:174
std::vector< TH1 * > fTrainBgdMVAHist
Definition: MethodBoost.h:171
Double_t SingleBoost(MethodBase *method)
Double_t CalcMethodWeight()
Double_t GetMvaValue(Double_t *err=0, Double_t *errUpper=0)
return boosted MVA response
std::vector< TH1 * > fBTrainSigMVAHist
Definition: MethodBoost.h:173
Double_t Bagging()
Bagging or Bootstrap boosting, gives new random poisson weight for every event.
virtual Bool_t HasAnalysisType(Types::EAnalysisType type, UInt_t numberClasses, UInt_t)
Boost can handle classification with 2 classes and regression with one regression-target.
const Ranking * CreateRanking()
Double_t fMethodError
estimation of the level error of the classifier
Definition: MethodBoost.h:183
TString fTransformString
min and max values for the classifier response
Definition: MethodBoost.h:156
void DeclareCompatibilityOptions()
options that are used ONLY for the READER to ensure backward compatibility they are hence without any...
std::vector< Float_t > * fMVAvalues
mva values for the last trained method
Definition: MethodBoost.h:191
Bool_t fMonitorBoostedMethod
monitor the MVA response of every classifier
Definition: MethodBoost.h:167
virtual ~MethodBoost(void)
destructor
void FindMVACut(MethodBase *method)
find the CUT on the individual MVA that defines an event as correct or misclassified (to be used in t...
Double_t fOverlap_integral
Definition: MethodBoost.h:189
void GetHelpMessage() const
Get help message text.
Virtual base class for combining several TMVA method.
Ranking for variables in method (implementation)
Definition: Ranking.h:48
The Reader class serves to use the MVAs in a specific analysis context.
Definition: Reader.h:64
EAnalysisType
Definition: Types.h:126
Basic string class.
Definition: TString.h:136
A TTree represents a columnar dataset.
Definition: TTree.h:79
create variable transformations