Logo ROOT  
Reference Guide
MethodDT.h
Go to the documentation of this file.
1// @(#)root/tmva $Id$
2// Author: Andreas Hoecker, Joerg Stelzer, Helge Voss, Kai Voss
3
4/**********************************************************************************
5 * Project: TMVA - a Root-integrated toolkit for multivariate data analysis *
6 * Package: TMVA *
7 * Class : MethodDT (Boosted Decision Trees) *
8 * Web : http://tmva.sourceforge.net *
9 * *
10 * Description: *
11 * Analysis of Boosted Decision Trees *
12 * *
13 * Authors (alphabetical): *
14 * Andreas Hoecker <Andreas.Hocker@cern.ch> - CERN, Switzerland *
15 * Helge Voss <Helge.Voss@cern.ch> - MPI-K Heidelberg, Germany *
16 * Or Cohen <orcohenor@gmail.com> - Weizmann Inst., Israel *
17 * *
18 * Copyright (c) 2005: *
19 * CERN, Switzerland *
20 * MPI-K Heidelberg, Germany *
21 * *
22 * Redistribution and use in source and binary forms, with or without *
23 * modification, are permitted according to the terms listed in LICENSE *
24 * (http://tmva.sourceforge.net/LICENSE) *
25 **********************************************************************************/
26
27#ifndef ROOT_TMVA_MethodDT
28#define ROOT_TMVA_MethodDT
29
30//////////////////////////////////////////////////////////////////////////
31// //
32// MethodDT //
33// //
34// Analysis of Single Decision Tree //
35// //
36//////////////////////////////////////////////////////////////////////////
37
38#include <vector>
39#include "TH1.h"
40#include "TH2.h"
41#include "TTree.h"
42#include "TMVA/MethodBase.h"
43#include "TMVA/DecisionTree.h"
44#include "TMVA/Event.h"
45
46namespace TMVA {
47 class MethodBoost;
48
49 class MethodDT : public MethodBase {
50 public:
51 MethodDT( const TString& jobName,
52 const TString& methodTitle,
53 DataSetInfo& theData,
54 const TString& theOption = "");
55
57 const TString& theWeightFile);
58
59 virtual ~MethodDT( void );
60
61 virtual Bool_t HasAnalysisType( Types::EAnalysisType type, UInt_t numberClasses, UInt_t numberTargets );
62
63 void Train( void );
64
66
67 // write weights to file
68 void AddWeightsXMLTo( void* parent ) const;
69
70 // read weights from file
71 void ReadWeightsFromStream( std::istream& istr );
72 void ReadWeightsFromXML ( void* wghtnode );
73
74 // calculate the MVA value
75 Double_t GetMvaValue( Double_t* err = 0, Double_t* errUpper = 0 );
76
77 // the option handling methods
78 void DeclareOptions();
79 void ProcessOptions();
81
82 void GetHelpMessage() const;
83
84 // ranking of input variables
85 const Ranking* CreateRanking();
86
88
90
92
93 void SetMinNodeSize(Double_t sizeInPercent);
94 void SetMinNodeSize(TString sizeInPercent);
95
98
99 private:
100 // Init used in the various constructors
101 void Init( void );
102
103 private:
104
105
106 std::vector<Event*> fEventSample; ///< the training events
107
108 DecisionTree* fTree; ///< the decision tree
109 //options for the decision Tree
110 SeparationBase *fSepType; ///< the separation used in node splitting
111 TString fSepTypeS; ///< the separation (option string) used in node splitting
112 Int_t fMinNodeEvents; ///< min number of events in node
113 Float_t fMinNodeSize; ///< min percentage of training events in node
114 TString fMinNodeSizeS; ///< string containing min percentage of training events in node
115
116 Int_t fNCuts; ///< grid used in cut applied in node splitting
117 Bool_t fUseYesNoLeaf; ///< use sig or bkg classification in leave nodes or sig/bkg
118 Double_t fNodePurityLimit; ///< purity limit for sig/bkg nodes
119 UInt_t fMaxDepth; ///< max depth
120
121
122 Double_t fErrorFraction; ///< ntuple var: misclassification error fraction
123 Double_t fPruneStrength; ///< a parameter to set the "amount" of pruning..needs to be adjusted
124 DecisionTree::EPruneMethod fPruneMethod; ///< method used for pruning
125 TString fPruneMethodS; ///< prune method option String
126 Bool_t fAutomatic; ///< use user given prune strength or automatically determined one using a validation sample
127 Bool_t fRandomisedTrees; ///< choose a random subset of possible cut variables at each node during training
128 Int_t fUseNvars; ///< the number of variables used in the randomised tree splitting
129 Bool_t fUsePoissonNvars; ///< fUseNvars is used as a poisson mean, and the actual value of useNvars is at each step drawn form that distribution
130 std::vector<Double_t> fVariableImportance; ///< the relative importance of the different variables
131
132 Double_t fDeltaPruneStrength; ///< step size in pruning, is adjusted according to experience of previous trees
133 // debugging flags
134 static const Int_t fgDebugLevel = 0; ///< debug level determining some printout/control plots etc.
135
136
137 Bool_t fPruneBeforeBoost; ///< ancient variable, only needed for "CompatibilityOptions"
138
139 ClassDef(MethodDT,0); // Analysis of Decision Trees
140
141 };
142}
143
144#endif
float Float_t
Definition: RtypesCore.h:57
double Double_t
Definition: RtypesCore.h:59
#define ClassDef(name, id)
Definition: Rtypes.h:335
Option_t Option_t TPoint TPoint const char GetTextMagnitude GetFillStyle GetLineColor GetLineWidth GetMarkerStyle GetTextAlign GetTextColor GetTextSize void char Point_t Rectangle_t WindowAttributes_t Float_t Float_t Float_t Int_t Int_t UInt_t UInt_t Rectangle_t Int_t Int_t Window_t TString Int_t GCValues_t GetPrimarySelectionOwner GetDisplay GetScreen GetColormap GetNativeEvent const char const char dpyName wid window const char font_name cursor keysym reg const char only_if_exist regb h Point_t winding char text const char depth char const char Int_t count const char ColorStruct_t color const char Pixmap_t Pixmap_t PictureAttributes_t attr const char char ret_data h unsigned char height h Atom_t Int_t ULong_t ULong_t unsigned char prop_list Atom_t Atom_t Atom_t Time_t type
UInt_t GetNNodes() const
Definition: BinaryTree.h:86
Class that contains all the data information.
Definition: DataSetInfo.h:62
Implementation of a Decision Tree.
Definition: DecisionTree.h:65
Int_t GetNNodesBeforePruning()
Definition: DecisionTree.h:180
Virtual base Class for all MVA method.
Definition: MethodBase.h:111
virtual void ReadWeightsFromStream(std::istream &)=0
Analysis of Boosted Decision Trees.
Definition: MethodDT.h:49
UInt_t fMaxDepth
max depth
Definition: MethodDT.h:119
Bool_t fAutomatic
use user given prune strength or automatically determined one using a validation sample
Definition: MethodDT.h:126
Int_t GetNNodes()
Definition: MethodDT.h:97
Float_t fMinNodeSize
min percentage of training events in node
Definition: MethodDT.h:113
virtual ~MethodDT(void)
destructor
Definition: MethodDT.cxx:368
MethodDT(const TString &jobName, const TString &methodTitle, DataSetInfo &theData, const TString &theOption="")
the standard constructor for just an ordinar "decision trees"
Definition: MethodDT.cxx:127
Bool_t fUsePoissonNvars
fUseNvars is used as a poisson mean, and the actual value of useNvars is at each step drawn form that...
Definition: MethodDT.h:129
Int_t fUseNvars
the number of variables used in the randomised tree splitting
Definition: MethodDT.h:128
Double_t TestTreeQuality(DecisionTree *dt)
Definition: MethodDT.cxx:506
virtual Bool_t HasAnalysisType(Types::EAnalysisType type, UInt_t numberClasses, UInt_t numberTargets)
FDA can handle classification with 2 classes and regression with one regression-target.
Definition: MethodDT.cxx:180
SeparationBase * fSepType
the separation used in node splitting
Definition: MethodDT.h:110
Double_t GetMvaValue(Double_t *err=0, Double_t *errUpper=0)
returns MVA value
Definition: MethodDT.cxx:551
DecisionTree::EPruneMethod fPruneMethod
method used for pruning
Definition: MethodDT.h:124
Double_t fErrorFraction
ntuple var: misclassification error fraction
Definition: MethodDT.h:122
static const Int_t fgDebugLevel
debug level determining some printout/control plots etc.
Definition: MethodDT.h:134
void Train(void)
Definition: MethodDT.cxx:375
Double_t fDeltaPruneStrength
step size in pruning, is adjusted according to experience of previous trees
Definition: MethodDT.h:132
Bool_t fUseYesNoLeaf
use sig or bkg classification in leave nodes or sig/bkg
Definition: MethodDT.h:117
const Ranking * CreateRanking()
Definition: MethodDT.cxx:566
void ReadWeightsFromXML(void *wghtnode)
Definition: MethodDT.cxx:531
Double_t fNodePurityLimit
purity limit for sig/bkg nodes
Definition: MethodDT.h:118
TString fSepTypeS
the separation (option string) used in node splitting
Definition: MethodDT.h:111
TString fMinNodeSizeS
string containing min percentage of training events in node
Definition: MethodDT.h:114
void GetHelpMessage() const
Definition: MethodDT.cxx:561
std::vector< Double_t > fVariableImportance
the relative importance of the different variables
Definition: MethodDT.h:130
void AddWeightsXMLTo(void *parent) const
Definition: MethodDT.cxx:523
Double_t PruneTree()
prune the decision tree if requested (good for individual trees that are best grown out,...
Definition: MethodDT.cxx:406
void ReadWeightsFromStream(std::istream &istr)
Definition: MethodDT.cxx:541
Int_t GetNNodesBeforePruning()
Definition: MethodDT.h:96
Bool_t fRandomisedTrees
choose a random subset of possible cut variables at each node during training
Definition: MethodDT.h:127
DecisionTree * fTree
the decision tree
Definition: MethodDT.h:108
std::vector< Event * > fEventSample
the training events
Definition: MethodDT.h:106
Double_t GetPruneStrength()
Definition: MethodDT.h:91
void DeclareOptions()
Define the options (their key words) that can be set in the option string.
Definition: MethodDT.cxx:212
Bool_t fPruneBeforeBoost
ancient variable, only needed for "CompatibilityOptions"
Definition: MethodDT.h:137
void Init(void)
common initialisation with defaults for the DT-Method
Definition: MethodDT.cxx:343
void SetMinNodeSize(Double_t sizeInPercent)
Definition: MethodDT.cxx:320
void DeclareCompatibilityOptions()
options that are used ONLY for the READER to ensure backward compatibility
Definition: MethodDT.cxx:245
Int_t fNCuts
grid used in cut applied in node splitting
Definition: MethodDT.h:116
Double_t fPruneStrength
a parameter to set the "amount" of pruning..needs to be adjusted
Definition: MethodDT.h:123
TString fPruneMethodS
prune method option String
Definition: MethodDT.h:125
Int_t fMinNodeEvents
min number of events in node
Definition: MethodDT.h:112
void ProcessOptions()
the option string is decoded, for available options see "DeclareOptions"
Definition: MethodDT.cxx:256
Ranking for variables in method (implementation)
Definition: Ranking.h:48
An interface to calculate the "SeparationGain" for different separation criteria used in various trai...
EAnalysisType
Definition: Types.h:126
Basic string class.
Definition: TString.h:136
create variable transformations