Logo ROOT  
Reference Guide
 
Loading...
Searching...
No Matches
MethodBoost.h
Go to the documentation of this file.
1// @(#)root/tmva $Id$
2// Author: Andreas Hoecker, Joerg Stelzer, Helge Voss, Kai Voss,Or Cohen, Jan Therhaag, Eckhard von Toerne
3
4/**********************************************************************************
5 * Project: TMVA - a Root-integrated toolkit for multivariate data analysis *
6 * Package: TMVA *
7 * Class : MethodCompositeBase *
8 * Web : http://tmva.sourceforge.net *
9 * *
10 * Description: *
11 * Virtual base class for all MVA method *
12 * *
13 * Authors (alphabetical): *
14 * Andreas Hoecker <Andreas.Hocker@cern.ch> - CERN, Switzerland *
15 * Peter Speckmayer <Peter.Speckmazer@cern.ch> - CERN, Switzerland *
16 * Joerg Stelzer <Joerg.Stelzer@cern.ch> - CERN, Switzerland *
17 * Helge Voss <Helge.Voss@cern.ch> - MPI-K Heidelberg, Germany *
18 * Jan Therhaag <Jan.Therhaag@cern.ch> - U of Bonn, Germany *
19 * Eckhard v. Toerne <evt@uni-bonn.de> - U of Bonn, Germany *
20 * *
21 * Copyright (c) 2005-2011: *
22 * CERN, Switzerland *
23 * U. of Victoria, Canada *
24 * MPI-K Heidelberg, Germany *
25 * U. of Bonn, Germany *
26 * *
27 * Redistribution and use in source and binary forms, with or without *
28 * modification, are permitted according to the terms listed in LICENSE *
29 * (http://tmva.sourceforge.net/LICENSE) *
30 **********************************************************************************/
31
32#ifndef ROOT_TMVA_MethodBoost
33#define ROOT_TMVA_MethodBoost
34
35//////////////////////////////////////////////////////////////////////////
36// //
37// MethodBoost //
38// //
39// Class for boosting a TMVA method //
40// //
41//////////////////////////////////////////////////////////////////////////
42
43#include <iosfwd>
44#include <vector>
45
46#include "TMVA/MethodBase.h"
47
49
50namespace TMVA {
51
52 class Factory; // DSMTEST
53 class Reader; // DSMTEST
54 class DataSetManager; // DSMTEST
55 namespace Experimental {
56 class Classification;
57 }
59 friend class Factory; // DSMTEST
60 friend class Reader; // DSMTEST
62
63 public :
64
65 // constructors
66 MethodBoost( const TString& jobName,
67 const TString& methodTitle,
68 DataSetInfo& theData,
69 const TString& theOption = "" );
70
72 const TString& theWeightFile );
73
74 virtual ~MethodBoost( void );
75
76 virtual Bool_t HasAnalysisType( Types::EAnalysisType type, UInt_t numberClasses, UInt_t /*numberTargets*/ );
77
78 // training and boosting all the classifiers
79 void Train( void );
80
81 // ranking of input variables
82 const Ranking* CreateRanking();
83
84 // saves the name and options string of the boosted classifier
85 Bool_t BookMethod( Types::EMVA theMethod, TString methodTitle, TString theOption );
86 void SetBoostedMethodName ( TString methodName ) { fBoostedMethodName = methodName; }
87
89
90 void CleanBoostOptions();
91
92 Double_t GetMvaValue( Double_t* err=0, Double_t* errUpper = 0 );
93
94 private :
95 // clean up
96 void ClearAll();
97
98 // print fit results
99 void PrintResults( const TString&, std::vector<Double_t>&, const Double_t ) const;
100
101 // initializing mostly monitoring tools of the boost process
102 void Init();
103 void InitHistos();
104 void CheckSetup();
105
106 void MonitorBoost( Types::EBoostStage stage, UInt_t methodIdx=0);
107
108 // the option handling methods
109 void DeclareOptions();
111 void ProcessOptions();
112
113
116 // training a single classifier
117 void SingleTrain();
118
119 // calculating a boosting weight from the classifier, storing it in the next one
121 Double_t AdaBoost(MethodBase* method, Bool_t useYesNoLeaf );
123
124
125 // calculate weight of single method
127
128 // return ROC integral on training/testing sample
130
131 // writing the monitoring histograms and tree to a file
132 void WriteMonitoringHistosToFile( void ) const;
133
134 // write evaluation histograms into target file
135 virtual void WriteEvaluationHistosToFile(Types::ETreeType treetype);
136
137 // performs the MethodBase testing + testing of each boosted classifier
138 virtual void TestClassification();
139
140 // finding the MVA to cut between sig and bgd according to fMVACutPerc,fMVACutType
141 void FindMVACut(MethodBase* method);
142
143 // setting all the boost weights to 1
144 void ResetBoostWeights();
145
146 // creating the vectors of histogram for monitoring MVA response of each classifier
148
149 // calculate MVA values of current trained method on training
150 // sample
151 void CalcMVAValues();
152
153 UInt_t fBoostNum; // Number of times the classifier is boosted
154 TString fBoostType; // string specifying the boost type
155
156 TString fTransformString; // min and max values for the classifier response
157 Bool_t fDetailedMonitoring; // produce detailed monitoring histograms (boost-wise)
158
159 Double_t fAdaBoostBeta; // ADA boost parameter, default is 1
160 UInt_t fRandomSeed; // seed for random number generator used for bagging
161 Double_t fBaggedSampleFraction;// rel.Size of bagged sample
162
163 TString fBoostedMethodName; // details of the boosted classifier
166
167 Bool_t fMonitorBoostedMethod; // monitor the MVA response of every classifier
168
169 // MVA output from each classifier over the training hist, using orignal events weights
170 std::vector< TH1* > fTrainSigMVAHist;
171 std::vector< TH1* > fTrainBgdMVAHist;
172 // MVA output from each classifier over the training hist, using boosted events weights
173 std::vector< TH1* > fBTrainSigMVAHist;
174 std::vector< TH1* > fBTrainBgdMVAHist;
175 // MVA output from each classifier over the testing hist
176 std::vector< TH1* > fTestSigMVAHist;
177 std::vector
179
180 //monitoring tree/ntuple and it's variables
181 TTree* fMonitorTree; // tree to monitor values during the boosting
182 Double_t fBoostWeight; // the weight used to boost the next classifier
183 Double_t fMethodError; // estimation of the level error of the classifier
184 // analysing the train dataset
185 Double_t fROC_training; // roc integral of last trained method (on training sample)
186
187 // overlap integral of mva distributions for signal and
188 // background (training sample)
190
191 std::vector<Float_t> *fMVAvalues; // mva values for the last trained method
192
194 TString fHistoricOption; //historic variable, only needed for "CompatibilityOptions"
195 Bool_t fHistoricBoolOption; //historic variable, only needed for "CompatibilityOptions"
196
197 protected:
198
199 // get help message text
200 void GetHelpMessage() const;
201
203 };
204}
205
206#endif
const Bool_t kFALSE
Definition RtypesCore.h:101
bool Bool_t
Definition RtypesCore.h:63
double Double_t
Definition RtypesCore.h:59
#define ClassDef(name, id)
Definition Rtypes.h:325
int type
Definition TGX11.cxx:121
TH1 is the base class of all histogram classes in ROOT.
Definition TH1.h:58
Class to perform two class classification.
Class that contains all the data information.
Definition DataSetInfo.h:62
Class that contains all the data information.
This is the main MVA steering class.
Definition Factory.h:80
Virtual base Class for all MVA method.
Definition MethodBase.h:111
Class for boosting a TMVA method.
Definition MethodBoost.h:58
void MonitorBoost(Types::EBoostStage stage, UInt_t methodIdx=0)
fill various monitoring histograms from information of the individual classifiers that have been boos...
void ResetBoostWeights()
resetting back the boosted weights of the events to 1
MethodBase * CurrentMethod()
Double_t fAdaBoostBeta
TString fBoostedMethodOptions
Double_t fBaggedSampleFraction
Double_t fBoostWeight
std::vector< TH1 * > fTestSigMVAHist
void SingleTrain()
initialization
void SetBoostedMethodName(TString methodName)
Definition MethodBoost.h:86
Bool_t fDetailedMonitoring
void PrintResults(const TString &, std::vector< Double_t > &, const Double_t) const
DataSetManager * fDataSetManager
virtual void WriteEvaluationHistosToFile(Types::ETreeType treetype)
writes all MVA evaluation histograms to file
std::vector< TH1 * > fTestBgdMVAHist
Bool_t fHistoricBoolOption
void WriteMonitoringHistosToFile(void) const
write special monitoring histograms to file dummy implementation here --------------—
Double_t AdaBoost(MethodBase *method, Bool_t useYesNoLeaf)
the standard (discrete or real) AdaBoost algorithm
TString fBoostedMethodTitle
Bool_t BookMethod(Types::EMVA theMethod, TString methodTitle, TString theOption)
just registering the string from which the boosted classifier will be created
UInt_t CurrentMethodIdx()
std::vector< TH1 * > fTrainSigMVAHist
TString fHistoricOption
void CheckSetup()
check may be overridden by derived class (sometimes, eg, fitters are used which can only be implement...
virtual void TestClassification()
initialization
void InitHistos()
initialisation routine
void ProcessOptions()
process user options
Double_t GetBoostROCIntegral(Bool_t, Types::ETreeType, Bool_t CalcOverlapIntergral=kFALSE)
Calculate the ROC integral of a single classifier or even the whole boosted classifier.
Double_t fROC_training
TString fBoostedMethodName
std::vector< TH1 * > fBTrainBgdMVAHist
std::vector< TH1 * > fTrainBgdMVAHist
Double_t SingleBoost(MethodBase *method)
Double_t CalcMethodWeight()
Double_t GetMvaValue(Double_t *err=0, Double_t *errUpper=0)
return boosted MVA response
std::vector< TH1 * > fBTrainSigMVAHist
Double_t Bagging()
Bagging or Bootstrap boosting, gives new random poisson weight for every event.
virtual Bool_t HasAnalysisType(Types::EAnalysisType type, UInt_t numberClasses, UInt_t)
Boost can handle classification with 2 classes and regression with one regression-target.
const Ranking * CreateRanking()
Double_t fMethodError
TString fTransformString
void DeclareCompatibilityOptions()
options that are used ONLY for the READER to ensure backward compatibility they are hence without any...
std::vector< Float_t > * fMVAvalues
Bool_t fMonitorBoostedMethod
virtual ~MethodBoost(void)
destructor
void FindMVACut(MethodBase *method)
find the CUT on the individual MVA that defines an event as correct or misclassified (to be used in t...
Double_t fOverlap_integral
void GetHelpMessage() const
Get help message text.
Virtual base class for combining several TMVA method.
Ranking for variables in method (implementation)
Definition Ranking.h:48
The Reader class serves to use the MVAs in a specific analysis context.
Definition Reader.h:64
Basic string class.
Definition TString.h:136
A TTree represents a columnar dataset.
Definition TTree.h:79
create variable transformations