Logo ROOT  
Reference Guide
 
Loading...
Searching...
No Matches
MethodBDT.h
Go to the documentation of this file.
1// @(#)root/tmva $Id$
2// Author: Andreas Hoecker, Joerg Stelzer, Helge Voss, Kai Voss, Jan Therhaag
3
4/**********************************************************************************
5 * Project: TMVA - a Root-integrated toolkit for multivariate data analysis *
6 * Package: TMVA *
7 * Class : MethodBDT (Boosted Decision Trees) *
8 * *
9 * *
10 * Description: *
11 * Analysis of Boosted Decision Trees *
12 * *
13 * Authors (alphabetical): *
14 * Andreas Hoecker <Andreas.Hocker@cern.ch> - CERN, Switzerland *
15 * Helge Voss <Helge.Voss@cern.ch> - MPI-K Heidelberg, Germany *
16 * Kai Voss <Kai.Voss@cern.ch> - U. of Victoria, Canada *
17 * Doug Schouten <dschoute@sfu.ca> - Simon Fraser U., Canada *
18 * Jan Therhaag <jan.therhaag@cern.ch> - U. of Bonn, Germany *
19 * *
20 * Copyright (c) 2005-2011: *
21 * CERN, Switzerland *
22 * U. of Victoria, Canada *
23 * MPI-K Heidelberg, Germany *
24 * U. of Bonn, Germany *
25 * *
26 * Redistribution and use in source and binary forms, with or without *
27 * modification, are permitted according to the terms listed in LICENSE *
28 * (see tmva/doc/LICENSE) *
29 **********************************************************************************/
30
31#ifndef ROOT_TMVA_MethodBDT
32#define ROOT_TMVA_MethodBDT
33
34//////////////////////////////////////////////////////////////////////////
35// //
36// MethodBDT //
37// //
38// Analysis of Boosted Decision Trees //
39// //
40//////////////////////////////////////////////////////////////////////////
41
42#include <vector>
43#include <memory>
44#include <map>
45
46#include "TH2.h"
47#include "TTree.h"
48#include "TMVA/MethodBase.h"
49#include "TMVA/DecisionTree.h"
50#include "TMVA/Event.h"
51#include "TMVA/LossFunction.h"
52
53// Multithreading only if the compilation flag is turned on
54#ifdef R__USE_IMT
56#include "TSystem.h"
57#endif
58
59namespace TMVA {
60
61 class SeparationBase;
62
63 class MethodBDT : public MethodBase {
64
65 public:
66
67 // constructor for training and reading
68 MethodBDT( const TString& jobName,
69 const TString& methodTitle,
70 DataSetInfo& theData,
71 const TString& theOption = "");
72
73 // constructor for calculating BDT-MVA using previously generated decision trees
74 MethodBDT( DataSetInfo& theData,
75 const TString& theWeightFile);
76
77 virtual ~MethodBDT( void );
78
79 virtual Bool_t HasAnalysisType( Types::EAnalysisType type, UInt_t numberClasses, UInt_t numberTargets );
80
81
82 // write all Events from the Tree into a vector of Events, that are
83 // more easily manipulated
84 void InitEventSample();
85
86 // optimize tuning parameters
87 virtual std::map<TString,Double_t> OptimizeTuningParameters(TString fomType="ROCIntegral", TString fitType="FitGA");
88 virtual void SetTuneParameters(std::map<TString,Double_t> tuneParameters);
89
90 // training method
91 void Train( void );
92
93 // revoke training
94 void Reset( void );
95
97
98 // write weights to file
99 void AddWeightsXMLTo( void* parent ) const;
100
101 // read weights from file
102 void ReadWeightsFromStream( std::istream& istr );
103 void ReadWeightsFromXML(void* parent);
104
105 // write method specific histos to target file
106 void WriteMonitoringHistosToFile( void ) const;
107
108 // calculate the MVA value
109 Double_t GetMvaValue( Double_t* err = nullptr, Double_t* errUpper = nullptr);
110
111 // get the actual forest size (might be less than fNTrees, the requested one, if boosting is stopped early
112 UInt_t GetNTrees() const {return fForest.size();}
113 private:
114
115 Double_t GetMvaValue( Double_t* err, Double_t* errUpper, UInt_t useNTrees );
116 Double_t PrivateGetMvaValue( const TMVA::Event *ev, Double_t* err=nullptr, Double_t* errUpper=nullptr, UInt_t useNTrees=0 );
117 void BoostMonitor(Int_t iTree);
118
119 public:
120 const std::vector<Float_t>& GetMulticlassValues();
121
122 // regression response
123 const std::vector<Float_t>& GetRegressionValues();
124
125 // apply the boost algorithm to a tree in the collection
126 Double_t Boost( std::vector<const TMVA::Event*>&, DecisionTree *dt, UInt_t cls = 0);
127
128 // ranking of input variables
129 const Ranking* CreateRanking();
130
131 // the option handling methods
132 void DeclareOptions();
133 void ProcessOptions();
135 void SetMinNodeSize(Double_t sizeInPercent);
136 void SetMinNodeSize(TString sizeInPercent);
137
144
145
146 // get the forest
147 inline const std::vector<TMVA::DecisionTree*> & GetForest() const;
148
149 // get the forest
150 inline const std::vector<const TMVA::Event*> & GetTrainingEvents() const;
151
152 inline const std::vector<double> & GetBoostWeights() const;
153
154 //return the individual relative variable importance
155 std::vector<Double_t> GetVariableImportance();
157
159
160 // make ROOT-independent C++ class for classifier response (classifier-specific implementation)
161 void MakeClassSpecific( std::ostream&, const TString& ) const;
162
163 // header and auxiliary classes
164 void MakeClassSpecificHeader( std::ostream&, const TString& ) const;
165
166 void MakeClassInstantiateNode( DecisionTreeNode *n, std::ostream& fout,
167 const TString& className ) const;
168
169 void GetHelpMessage() const;
170
171 protected:
173
174 private:
175 // Init used in the various constructors
176 void Init( void );
177
179
180 // boosting algorithm (adaptive boosting)
181 Double_t AdaBoost( std::vector<const TMVA::Event*>&, DecisionTree *dt );
182
183 // boosting algorithm (adaptive boosting with cost matrix)
184 Double_t AdaCost( std::vector<const TMVA::Event*>&, DecisionTree *dt );
185
186 // boosting as a random re-weighting
187 Double_t Bagging( );
188
189 // boosting special for regression
190 Double_t RegBoost( std::vector<const TMVA::Event*>&, DecisionTree *dt );
191
192 // adaboost adapted to regression
193 Double_t AdaBoostR2( std::vector<const TMVA::Event*>&, DecisionTree *dt );
194
195 // binomial likelihood gradient boost for classification
196 // (see Friedman: "Greedy Function Approximation: a Gradient Boosting Machine"
197 // Technical report, Dept. of Statistics, Stanford University)
198 Double_t GradBoost( std::vector<const TMVA::Event*>&, DecisionTree *dt, UInt_t cls = 0);
199 Double_t GradBoostRegression(std::vector<const TMVA::Event*>&, DecisionTree *dt );
200 void InitGradBoost( std::vector<const TMVA::Event*>&);
201 void UpdateTargets( std::vector<const TMVA::Event*>&, UInt_t cls = 0);
202 void UpdateTargetsRegression( std::vector<const TMVA::Event*>&,Bool_t first=kFALSE);
204 void GetBaggedSubSample(std::vector<const TMVA::Event*>&);
205
206 std::vector<const TMVA::Event*> fEventSample; ///< the training events
207 std::vector<const TMVA::Event*> fValidationSample; ///< the Validation events
208 std::vector<const TMVA::Event*> fSubSample; ///< subsample for bagged grad boost
209 std::vector<const TMVA::Event*> *fTrainSample; ///< pointer to sample actually used in training (fEventSample or fSubSample) for example
210
211 Int_t fNTrees; ///< number of decision trees requested
212 std::vector<DecisionTree*> fForest; ///< the collection of decision trees
213 std::vector<double> fBoostWeights; ///< the weights applied in the individual boosts
214 Double_t fSigToBkgFraction; ///< Signal to Background fraction assumed during training
215 TString fBoostType; ///< string specifying the boost type
216 Double_t fAdaBoostBeta; ///< beta parameter for AdaBoost algorithm
217 TString fAdaBoostR2Loss; ///< loss type used in AdaBoostR2 (Linear,Quadratic or Exponential)
218 //Double_t fTransitionPoint; ///< break-down point for gradient regression
219 Double_t fShrinkage; ///< learning rate for gradient boost;
220 Bool_t fBaggedBoost; ///< turn bagging in combination with boost on/off
221 Bool_t fBaggedGradBoost; ///< turn bagging in combination with grad boost on/off
222 //Double_t fSumOfWeights; ///< sum of all event weights
223 //std::map< const TMVA::Event*, std::pair<Double_t, Double_t> > fWeightedResiduals; ///< weighted regression residuals
224 std::map< const TMVA::Event*, LossFunctionEventInfo> fLossFunctionEventInfo; ///< map event to true value, predicted value, and weight
225 /// used by different loss functions for BDT regression
226 std::map< const TMVA::Event*,std::vector<double> > fResiduals; ///< individual event residuals for gradient boost
227
228 //options for the decision Tree
229 SeparationBase *fSepType; ///< the separation used in node splitting
230 TString fSepTypeS; ///< the separation (option string) used in node splitting
231 Int_t fMinNodeEvents; ///< min number of events in node
232 Float_t fMinNodeSize; ///< min percentage of training events in node
233 TString fMinNodeSizeS; ///< string containing min percentage of training events in node
234
235 Int_t fNCuts; ///< grid used in cut applied in node splitting
236 Bool_t fUseFisherCuts; ///< use multivariate splits using the Fisher criterium
237 Double_t fMinLinCorrForFisher; ///< the minimum linear correlation between two variables demanded for use in fisher criterium in node splitting
238 Bool_t fUseExclusiveVars; ///< individual variables already used in fisher criterium are not anymore analysed individually for node splitting
239 Bool_t fUseYesNoLeaf; ///< use sig or bkg classification in leave nodes or sig/bkg
240 Double_t fNodePurityLimit; ///< purity limit for sig/bkg nodes
241 UInt_t fNNodesMax; ///< max # of nodes
242 UInt_t fMaxDepth; ///< max depth
243
244 DecisionTree::EPruneMethod fPruneMethod; ///< method used for pruning
245 TString fPruneMethodS; ///< prune method option String
246 Double_t fPruneStrength; ///< a parameter to set the "amount" of pruning..needs to be adjusted
247 Double_t fFValidationEvents; ///< fraction of events to use for pruning
248 Bool_t fAutomatic; ///< use user given prune strength or automatically determined one using a validation sample
249 Bool_t fRandomisedTrees; ///< choose a random subset of possible cut variables at each node during training
250 UInt_t fUseNvars; ///< the number of variables used in the randomised tree splitting
251 Bool_t fUsePoissonNvars; ///< use "fUseNvars" not as fixed number but as mean of a poisson distr. in each split
252 UInt_t fUseNTrainEvents; ///< number of randomly picked training events used in randomised (and bagged) trees
253
254 Double_t fBaggedSampleFraction; ///< relative size of bagged event sample to original sample size
255 TString fNegWeightTreatment; ///< variable that holds the option of how to treat negative event weights in training
256 Bool_t fNoNegWeightsInTraining; ///< ignore negative event weights in the training
257 Bool_t fInverseBoostNegWeights; ///< boost ev. with neg. weights with 1/boostweight rather than boostweight
258 Bool_t fPairNegWeightsGlobal; ///< pair ev. with neg. and pos. weights in training sample and "annihilate" them
259 Bool_t fTrainWithNegWeights; ///< yes there are negative event weights and we don't ignore them
260 Bool_t fDoBoostMonitor; ///< create control plot with ROC integral vs tree number
261
262
263 //some histograms for monitoring
264 TTree* fMonitorNtuple; ///< monitoring ntuple
265 Int_t fITree; ///< ntuple var: ith tree
266 Double_t fBoostWeight; ///< ntuple var: boost weight
267 Double_t fErrorFraction; ///< ntuple var: misclassification error fraction
268
269 Double_t fCss; ///< Cost factor
270 Double_t fCts_sb; ///< Cost factor
271 Double_t fCtb_ss; ///< Cost factor
272 Double_t fCbb; ///< Cost factor
273
274 Bool_t fDoPreselection; ///< do or do not perform automatic pre-selection of 100% eff. cuts
275
276 Bool_t fSkipNormalization; ///< true for skipping normalization at initialization of trees
277
278 std::vector<Double_t> fVariableImportance; ///< the relative importance of the different variables
279
280
281 void DeterminePreselectionCuts(const std::vector<const TMVA::Event*>& eventSample);
283
284 std::vector<Double_t> fLowSigCut;
285 std::vector<Double_t> fLowBkgCut;
286 std::vector<Double_t> fHighSigCut;
287 std::vector<Double_t> fHighBkgCut;
288
289 std::vector<Bool_t> fIsLowSigCut;
290 std::vector<Bool_t> fIsLowBkgCut;
291 std::vector<Bool_t> fIsHighSigCut;
292 std::vector<Bool_t> fIsHighBkgCut;
293
294 Bool_t fHistoricBool; //historic variable, only needed for "CompatibilityOptions"
295
296 TString fRegressionLossFunctionBDTGS; ///< the option string determining the loss function for BDT regression
297 Double_t fHuberQuantile; ///< the option string determining the quantile for the Huber Loss Function
298 ///< in BDT regression.
300
301 // debugging flags
302 static const Int_t fgDebugLevel; ///< debug level determining some printout/control plots etc.
303
304 // for backward compatibility
305 ClassDef(MethodBDT,0); // Analysis of Boosted Decision Trees
306 };
307
308} // namespace TMVA
309
310const std::vector<TMVA::DecisionTree*>& TMVA::MethodBDT::GetForest() const { return fForest; }
311const std::vector<const TMVA::Event*> & TMVA::MethodBDT::GetTrainingEvents() const { return fEventSample; }
312const std::vector<double>& TMVA::MethodBDT::GetBoostWeights() const { return fBoostWeights; }
313
314#endif
#define d(i)
Definition RSha256.hxx:102
#define b(i)
Definition RSha256.hxx:100
#define f(i)
Definition RSha256.hxx:104
#define e(i)
Definition RSha256.hxx:103
bool Bool_t
Definition RtypesCore.h:63
unsigned int UInt_t
Definition RtypesCore.h:46
float Float_t
Definition RtypesCore.h:57
constexpr Bool_t kFALSE
Definition RtypesCore.h:101
double Double_t
Definition RtypesCore.h:59
#define ClassDef(name, id)
Definition Rtypes.h:337
Option_t Option_t TPoint TPoint const char GetTextMagnitude GetFillStyle GetLineColor GetLineWidth GetMarkerStyle GetTextAlign GetTextColor GetTextSize void char Point_t Rectangle_t WindowAttributes_t Float_t Float_t Float_t Int_t Int_t UInt_t UInt_t Rectangle_t Int_t Int_t Window_t TString Int_t GCValues_t GetPrimarySelectionOwner GetDisplay GetScreen GetColormap GetNativeEvent const char const char dpyName wid window const char font_name cursor keysym reg const char only_if_exist regb h Point_t winding char text const char depth char const char Int_t count const char ColorStruct_t color const char Pixmap_t Pixmap_t PictureAttributes_t attr const char char ret_data h unsigned char height h Atom_t Int_t ULong_t ULong_t unsigned char prop_list Atom_t Atom_t Atom_t Time_t type
Class that contains all the data information.
Definition DataSetInfo.h:62
Implementation of a Decision Tree.
Analysis of Boosted Decision Trees.
Definition MethodBDT.h:63
Double_t fCbb
Cost factor.
Definition MethodBDT.h:272
std::vector< Double_t > fHighBkgCut
Definition MethodBDT.h:287
void SetBaggedSampleFraction(Double_t f)
Definition MethodBDT.h:143
Bool_t fBaggedGradBoost
turn bagging in combination with grad boost on/off
Definition MethodBDT.h:221
DecisionTree::EPruneMethod fPruneMethod
method used for pruning
Definition MethodBDT.h:244
std::vector< const TMVA::Event * > fEventSample
the training events
Definition MethodBDT.h:206
void Init(void)
Common initialisation with defaults for the BDT-Method.
Double_t fHuberQuantile
the option string determining the quantile for the Huber Loss Function in BDT regression.
Definition MethodBDT.h:297
static const Int_t fgDebugLevel
debug level determining some printout/control plots etc.
Definition MethodBDT.h:302
Bool_t fBaggedBoost
turn bagging in combination with boost on/off
Definition MethodBDT.h:220
TString fMinNodeSizeS
string containing min percentage of training events in node
Definition MethodBDT.h:233
void BoostMonitor(Int_t iTree)
Fills the ROCIntegral vs Itree from the testSample for the monitoring plots during the training .
const std::vector< Float_t > & GetMulticlassValues()
Get the multiclass MVA response for the BDT classifier.
std::map< const TMVA::Event *, LossFunctionEventInfo > fLossFunctionEventInfo
map event to true value, predicted value, and weight used by different loss functions for BDT regress...
Definition MethodBDT.h:224
std::vector< const TMVA::Event * > * fTrainSample
pointer to sample actually used in training (fEventSample or fSubSample) for example
Definition MethodBDT.h:209
std::vector< Bool_t > fIsHighSigCut
Definition MethodBDT.h:291
Double_t AdaBoostR2(std::vector< const TMVA::Event * > &, DecisionTree *dt)
Adaption of the AdaBoost to regression problems (see H.Drucker 1997).
Double_t PrivateGetMvaValue(const TMVA::Event *ev, Double_t *err=nullptr, Double_t *errUpper=nullptr, UInt_t useNTrees=0)
Return the MVA value (range [-1;1]) that classifies the event according to the majority vote from the...
void MakeClassSpecific(std::ostream &, const TString &) const
Make ROOT-independent C++ class for classifier response (classifier-specific implementation).
Bool_t fPairNegWeightsGlobal
pair ev. with neg. and pos. weights in training sample and "annihilate" them
Definition MethodBDT.h:258
Bool_t fSkipNormalization
true for skipping normalization at initialization of trees
Definition MethodBDT.h:276
Bool_t fUseExclusiveVars
individual variables already used in fisher criterium are not anymore analysed individually for node ...
Definition MethodBDT.h:238
UInt_t fUseNvars
the number of variables used in the randomised tree splitting
Definition MethodBDT.h:250
Double_t fCts_sb
Cost factor.
Definition MethodBDT.h:270
void GetHelpMessage() const
Get help message text.
LossFunctionBDT * fRegressionLossFunctionBDTG
Definition MethodBDT.h:299
void DeterminePreselectionCuts(const std::vector< const TMVA::Event * > &eventSample)
Find useful preselection cuts that will be applied before and Decision Tree training.
Int_t fNCuts
grid used in cut applied in node splitting
Definition MethodBDT.h:235
void SetNTrees(Int_t d)
Definition MethodBDT.h:138
Double_t GradBoost(std::vector< const TMVA::Event * > &, DecisionTree *dt, UInt_t cls=0)
Calculate the desired response value for each region.
const Ranking * CreateRanking()
Compute ranking of input variables.
virtual void SetTuneParameters(std::map< TString, Double_t > tuneParameters)
Set the tuning parameters according to the argument.
void SetAdaBoostBeta(Double_t b)
Definition MethodBDT.h:139
Bool_t fUsePoissonNvars
use "fUseNvars" not as fixed number but as mean of a poisson distr. in each split
Definition MethodBDT.h:251
Float_t fMinNodeSize
min percentage of training events in node
Definition MethodBDT.h:232
Bool_t fDoBoostMonitor
create control plot with ROC integral vs tree number
Definition MethodBDT.h:260
Double_t AdaCost(std::vector< const TMVA::Event * > &, DecisionTree *dt)
The AdaCost boosting algorithm takes a simple cost Matrix (currently fixed for all events....
void DeclareOptions()
Define the options (their key words).
Double_t GetMvaValue(Double_t *err=nullptr, Double_t *errUpper=nullptr)
Bool_t fTrainWithNegWeights
yes there are negative event weights and we don't ignore them
Definition MethodBDT.h:259
TString fRegressionLossFunctionBDTGS
the option string determining the loss function for BDT regression
Definition MethodBDT.h:296
std::vector< double > fBoostWeights
the weights applied in the individual boosts
Definition MethodBDT.h:213
Bool_t fDoPreselection
do or do not perform automatic pre-selection of 100% eff. cuts
Definition MethodBDT.h:274
std::vector< Double_t > fVariableImportance
the relative importance of the different variables
Definition MethodBDT.h:278
Int_t fMinNodeEvents
min number of events in node
Definition MethodBDT.h:231
std::vector< Bool_t > fIsLowBkgCut
Definition MethodBDT.h:290
virtual std::map< TString, Double_t > OptimizeTuningParameters(TString fomType="ROCIntegral", TString fitType="FitGA")
Call the Optimizer with the set of parameters and ranges that are meant to be tuned.
Double_t Boost(std::vector< const TMVA::Event * > &, DecisionTree *dt, UInt_t cls=0)
Apply the boosting algorithm (the algorithm is selecte via the "option" given in the constructor.
Double_t TestTreeQuality(DecisionTree *dt)
Test the tree quality.. in terms of Misclassification.
std::vector< DecisionTree * > fForest
the collection of decision trees
Definition MethodBDT.h:212
std::vector< Bool_t > fIsLowSigCut
Definition MethodBDT.h:289
UInt_t GetNTrees() const
Definition MethodBDT.h:112
Double_t Bagging()
Call it boot-strapping, re-sampling or whatever you like, in the end it is nothing else but applying ...
Double_t fErrorFraction
ntuple var: misclassification error fraction
Definition MethodBDT.h:267
Bool_t fRandomisedTrees
choose a random subset of possible cut variables at each node during training
Definition MethodBDT.h:249
Double_t fBaggedSampleFraction
relative size of bagged event sample to original sample size
Definition MethodBDT.h:254
Double_t fCss
Cost factor.
Definition MethodBDT.h:269
Bool_t fUseFisherCuts
use multivariate splits using the Fisher criterium
Definition MethodBDT.h:236
Double_t fPruneStrength
a parameter to set the "amount" of pruning..needs to be adjusted
Definition MethodBDT.h:246
const std::vector< double > & GetBoostWeights() const
Definition MethodBDT.h:312
Int_t fNTrees
number of decision trees requested
Definition MethodBDT.h:211
void SetMaxDepth(Int_t d)
Definition MethodBDT.h:134
void UpdateTargets(std::vector< const TMVA::Event * > &, UInt_t cls=0)
Calculate residual for all events.
Double_t fFValidationEvents
fraction of events to use for pruning
Definition MethodBDT.h:247
std::vector< const TMVA::Event * > fSubSample
subsample for bagged grad boost
Definition MethodBDT.h:208
void UpdateTargetsRegression(std::vector< const TMVA::Event * > &, Bool_t first=kFALSE)
Calculate residuals for all events and update targets for next iter.
Double_t GradBoostRegression(std::vector< const TMVA::Event * > &, DecisionTree *dt)
Implementation of M_TreeBoost using any loss function as described by Friedman 1999.
void WriteMonitoringHistosToFile(void) const
Here we could write some histograms created during the processing to the output file.
std::vector< Double_t > fLowBkgCut
Definition MethodBDT.h:285
UInt_t fMaxDepth
max depth
Definition MethodBDT.h:242
void SetShrinkage(Double_t s)
Definition MethodBDT.h:141
TString fAdaBoostR2Loss
loss type used in AdaBoostR2 (Linear,Quadratic or Exponential)
Definition MethodBDT.h:217
virtual ~MethodBDT(void)
Destructor.
void AddWeightsXMLTo(void *parent) const
Write weights to XML.
Double_t GetGradBoostMVA(const TMVA::Event *e, UInt_t nTrees)
Returns MVA value: -1 for background, 1 for signal.
TString fPruneMethodS
prune method option String
Definition MethodBDT.h:245
Double_t fNodePurityLimit
purity limit for sig/bkg nodes
Definition MethodBDT.h:240
Int_t fITree
ntuple var: ith tree
Definition MethodBDT.h:265
virtual Bool_t HasAnalysisType(Types::EAnalysisType type, UInt_t numberClasses, UInt_t numberTargets)
BDT can handle classification with multiple classes and regression with one regression-target.
Double_t fShrinkage
learning rate for gradient boost;
Definition MethodBDT.h:219
void SetNodePurityLimit(Double_t l)
Definition MethodBDT.h:140
TString fSepTypeS
the separation (option string) used in node splitting
Definition MethodBDT.h:230
Double_t RegBoost(std::vector< const TMVA::Event * > &, DecisionTree *dt)
A special boosting only for Regression (not implemented).
void InitEventSample()
Initialize the event sample (i.e. reset the boost-weights... etc).
Double_t ApplyPreselectionCuts(const Event *ev)
Apply the preselection cuts before even bothering about any Decision Trees in the GetMVA .
void SetMinNodeSize(Double_t sizeInPercent)
Bool_t fHistoricBool
Definition MethodBDT.h:294
Double_t fBoostWeight
ntuple var: boost weight
Definition MethodBDT.h:266
void ProcessOptions()
The option string is decoded, for available options see "DeclareOptions".
void PreProcessNegativeEventWeights()
O.k.
Bool_t fUseYesNoLeaf
use sig or bkg classification in leave nodes or sig/bkg
Definition MethodBDT.h:239
std::vector< const TMVA::Event * > fValidationSample
the Validation events
Definition MethodBDT.h:207
Bool_t fAutomatic
use user given prune strength or automatically determined one using a validation sample
Definition MethodBDT.h:248
std::vector< Double_t > fLowSigCut
Definition MethodBDT.h:284
Bool_t fInverseBoostNegWeights
boost ev. with neg. weights with 1/boostweight rather than boostweight
Definition MethodBDT.h:257
Double_t fCtb_ss
Cost factor.
Definition MethodBDT.h:271
std::map< const TMVA::Event *, std::vector< double > > fResiduals
individual event residuals for gradient boost
Definition MethodBDT.h:226
UInt_t fNNodesMax
max # of nodes
Definition MethodBDT.h:241
void MakeClassInstantiateNode(DecisionTreeNode *n, std::ostream &fout, const TString &className) const
Recursively descends a tree and writes the node instance to the output stream.
Double_t AdaBoost(std::vector< const TMVA::Event * > &, DecisionTree *dt)
The AdaBoost implementation.
TTree * fMonitorNtuple
monitoring ntuple
Definition MethodBDT.h:264
std::vector< Double_t > GetVariableImportance()
Return the relative variable importance, normalized to all variables together having the importance 1...
void SetUseNvars(Int_t n)
Definition MethodBDT.h:142
Bool_t fNoNegWeightsInTraining
ignore negative event weights in the training
Definition MethodBDT.h:256
Double_t fAdaBoostBeta
beta parameter for AdaBoost algorithm
Definition MethodBDT.h:216
void InitGradBoost(std::vector< const TMVA::Event * > &)
Initialize targets for first tree.
void Train(void)
BDT training.
const std::vector< TMVA::DecisionTree * > & GetForest() const
Definition MethodBDT.h:310
void GetBaggedSubSample(std::vector< const TMVA::Event * > &)
Fills fEventSample with fBaggedSampleFraction*NEvents random training events.
const std::vector< const TMVA::Event * > & GetTrainingEvents() const
Definition MethodBDT.h:311
const std::vector< Float_t > & GetRegressionValues()
Get the regression value generated by the BDTs.
std::vector< Double_t > fHighSigCut
Definition MethodBDT.h:286
SeparationBase * fSepType
the separation used in node splitting
Definition MethodBDT.h:229
void ReadWeightsFromXML(void *parent)
Reads the BDT from the xml file.
void ReadWeightsFromStream(std::istream &istr)
Read the weights (BDT coefficients).
TString fNegWeightTreatment
variable that holds the option of how to treat negative event weights in training
Definition MethodBDT.h:255
std::vector< Bool_t > fIsHighBkgCut
Definition MethodBDT.h:292
void Reset(void)
Reset the method, as if it had just been instantiated (forget all training etc.).
Double_t fSigToBkgFraction
Signal to Background fraction assumed during training.
Definition MethodBDT.h:214
void MakeClassSpecificHeader(std::ostream &, const TString &) const
Specific class header.
Double_t fMinLinCorrForFisher
the minimum linear correlation between two variables demanded for use in fisher criterium in node spl...
Definition MethodBDT.h:237
UInt_t fUseNTrainEvents
number of randomly picked training events used in randomised (and bagged) trees
Definition MethodBDT.h:252
TString fBoostType
string specifying the boost type
Definition MethodBDT.h:215
void DeclareCompatibilityOptions()
Options that are used ONLY for the READER to ensure backward compatibility.
Virtual base Class for all MVA method.
Definition MethodBase.h:111
virtual void ReadWeightsFromStream(std::istream &)=0
Ranking for variables in method (implementation)
Definition Ranking.h:48
An interface to calculate the "SeparationGain" for different separation criteria used in various trai...
Basic string class.
Definition TString.h:139
A TTree represents a columnar dataset.
Definition TTree.h:79
const Int_t n
Definition legend1.C:16
create variable transformations
TLine l
Definition textangle.C:4