ROOT  6.06/09
Reference Guide
MethodDT.h
Go to the documentation of this file.
1 // @(#)root/tmva $Id$
2 // Author: Andreas Hoecker, Joerg Stelzer, Helge Voss, Kai Voss
3 
4 /**********************************************************************************
5  * Project: TMVA - a Root-integrated toolkit for multivariate data analysis *
6  * Package: TMVA *
7  * Class : MethodDT (Boosted Decision Trees) *
8  * Web : http://tmva.sourceforge.net *
9  * *
10  * Description: *
11  * Analysis of Boosted Decision Trees *
12  * *
13  * Authors (alphabetical): *
14  * Andreas Hoecker <Andreas.Hocker@cern.ch> - CERN, Switzerland *
15  * Helge Voss <Helge.Voss@cern.ch> - MPI-K Heidelberg, Germany *
16  * Or Cohen <orcohenor@gmail.com> - Weizmann Inst., Israel *
17  * *
18  * Copyright (c) 2005: *
19  * CERN, Switzerland *
20  * MPI-K Heidelberg, Germany *
21  * *
22  * Redistribution and use in source and binary forms, with or without *
23  * modification, are permitted according to the terms listed in LICENSE *
24  * (http://tmva.sourceforge.net/LICENSE) *
25  **********************************************************************************/
26 
27 #ifndef ROOT_TMVA_MethodDT
28 #define ROOT_TMVA_MethodDT
29 
30 //////////////////////////////////////////////////////////////////////////
31 // //
32 // MethodDT //
33 // //
34 // Analysis of Single Decision Tree //
35 // //
36 //////////////////////////////////////////////////////////////////////////
37 
38 #include <vector>
39 #ifndef ROOT_TH1
40 #include "TH1.h"
41 #endif
42 #ifndef ROOT_TH2
43 #include "TH2.h"
44 #endif
45 #ifndef ROOT_TTree
46 #include "TTree.h"
47 #endif
48 #ifndef ROOT_TMVA_MethodBase
49 #include "TMVA/MethodBase.h"
50 #endif
51 #ifndef ROOT_TMVA_DecisionTree
52 #include "TMVA/DecisionTree.h"
53 #endif
54 #ifndef ROOT_TMVA_Event
55 #include "TMVA/Event.h"
56 #endif
57 
58 namespace TMVA {
59  class MethodBoost;
60 
61  class MethodDT : public MethodBase {
62  public:
63  MethodDT( const TString& jobName,
64  const TString& methodTitle,
65  DataSetInfo& theData,
66  const TString& theOption = "",
67  TDirectory* theTargetDir = 0 );
68 
69  MethodDT( DataSetInfo& dsi,
70  const TString& theWeightFile,
71  TDirectory* theTargetDir = NULL );
72 
73  virtual ~MethodDT( void );
74 
75  virtual Bool_t HasAnalysisType( Types::EAnalysisType type, UInt_t numberClasses, UInt_t numberTargets );
76 
77  void Train( void );
78 
80 
81  // write weights to file
82  void AddWeightsXMLTo( void* parent ) const;
83 
84  // read weights from file
85  void ReadWeightsFromStream( std::istream& istr );
86  void ReadWeightsFromXML ( void* wghtnode );
87 
88  // calculate the MVA value
89  Double_t GetMvaValue( Double_t* err = 0, Double_t* errUpper = 0 );
90 
91  // the option handling methods
92  void DeclareOptions();
93  void ProcessOptions();
95 
96  void GetHelpMessage() const;
97 
98  // ranking of input variables
99  const Ranking* CreateRanking();
100 
101  Double_t PruneTree( );
102 
104 
106 
107  void SetMinNodeSize(Double_t sizeInPercent);
108  void SetMinNodeSize(TString sizeInPercent);
109 
112 
113  private:
114  // Init used in the various constructors
115  void Init( void );
116 
117  private:
118 
119 
120  std::vector<Event*> fEventSample; // the training events
121 
122  DecisionTree* fTree; // the decision tree
123  //options for the decision Tree
124  SeparationBase *fSepType; // the separation used in node splitting
125  TString fSepTypeS; // the separation (option string) used in node splitting
126  Int_t fMinNodeEvents; // min number of events in node
127  Float_t fMinNodeSize; // min percentage of training events in node
128  TString fMinNodeSizeS; // string containing min percentage of training events in node
129 
130  Int_t fNCuts; // grid used in cut applied in node splitting
131  Bool_t fUseYesNoLeaf; // use sig or bkg classification in leave nodes or sig/bkg
132  Double_t fNodePurityLimit; // purity limit for sig/bkg nodes
133  UInt_t fMaxDepth; // max depth
134 
135 
136  Double_t fErrorFraction; // ntuple var: misclassification error fraction
137  Double_t fPruneStrength; // a parameter to set the "amount" of pruning..needs to be adjusted
138  DecisionTree::EPruneMethod fPruneMethod; // method used for prunig
139  TString fPruneMethodS; // prune method option String
140  Bool_t fAutomatic; // use user given prune strength or automatically determined one using a validation sample
141  Bool_t fRandomisedTrees; // choose a random subset of possible cut variables at each node during training
142  Int_t fUseNvars; // the number of variables used in the randomised tree splitting
143  Bool_t fUsePoissonNvars; // fUseNvars is used as a poisson mean, and the actual value of useNvars is at each step drawn form that distribution
144  std::vector<Double_t> fVariableImportance; // the relative importance of the different variables
145 
146  Double_t fDeltaPruneStrength; // step size in pruning, is adjusted according to experience of previous trees
147  // debugging flags
148  static const Int_t fgDebugLevel = 0; // debug level determining some printout/control plots etc.
149 
150 
151  Bool_t fPruneBeforeBoost; //aincient variable, only needed for "CompatibilityOptions"
152 
153  ClassDef(MethodDT,0) // Analysis of Decision Trees
154 
155  };
156 }
157 
158 #endif
Bool_t fUsePoissonNvars
Definition: MethodDT.h:143
void GetHelpMessage() const
Definition: MethodDT.cxx:550
float Float_t
Definition: RtypesCore.h:53
std::vector< Event * > fEventSample
Definition: MethodDT.h:120
void Init(void)
common initialisation with defaults for the DT-Method
Definition: MethodDT.cxx:333
TString fPruneMethodS
Definition: MethodDT.h:139
DecisionTree::EPruneMethod fPruneMethod
Definition: MethodDT.h:138
EAnalysisType
Definition: Types.h:124
Basic string class.
Definition: TString.h:137
static const Int_t fgDebugLevel
Definition: MethodDT.h:148
int Int_t
Definition: RtypesCore.h:41
bool Bool_t
Definition: RtypesCore.h:59
virtual Bool_t HasAnalysisType(Types::EAnalysisType type, UInt_t numberClasses, UInt_t numberTargets)
FDA can handle classification with 2 classes and regression with one regression-target.
Definition: MethodDT.cxx:173
Double_t fNodePurityLimit
Definition: MethodDT.h:132
Double_t PruneTree()
prune the decision tree if requested (good for individual trees that are best grown out...
Definition: MethodDT.cxx:395
Bool_t fPruneBeforeBoost
Definition: MethodDT.h:151
void SetMinNodeSize(Double_t sizeInPercent)
Definition: MethodDT.cxx:308
Double_t fPruneStrength
Definition: MethodDT.h:137
Int_t fUseNvars
Definition: MethodDT.h:142
#define ClassDef(name, id)
Definition: Rtypes.h:254
Bool_t fAutomatic
Definition: MethodDT.h:140
UInt_t fMaxDepth
Definition: MethodDT.h:133
void ReadWeightsFromStream(std::istream &istr)
Definition: MethodDT.cxx:530
Int_t fMinNodeEvents
Definition: MethodDT.h:126
Double_t GetPruneStrength()
Definition: MethodDT.h:105
void DeclareOptions()
define the options (their key words) that can be set in the option string UseRandomisedTrees choose a...
Definition: MethodDT.cxx:202
Int_t GetNNodes()
Definition: MethodDT.h:111
MethodDT(const TString &jobName, const TString &methodTitle, DataSetInfo &theData, const TString &theOption="", TDirectory *theTargetDir=0)
Double_t fDeltaPruneStrength
Definition: MethodDT.h:146
void ProcessOptions()
the option string is decoded, for available options see "DeclareOptions"
Definition: MethodDT.cxx:244
Double_t fErrorFraction
Definition: MethodDT.h:136
TString fSepTypeS
Definition: MethodDT.h:125
Int_t GetNNodesBeforePruning()
Definition: DecisionTree.h:188
unsigned int UInt_t
Definition: RtypesCore.h:42
DecisionTree * fTree
Definition: MethodDT.h:122
Float_t fMinNodeSize
Definition: MethodDT.h:127
Double_t TestTreeQuality(DecisionTree *dt)
Definition: MethodDT.cxx:495
Int_t fNCuts
Definition: MethodDT.h:130
double Double_t
Definition: RtypesCore.h:55
void AddWeightsXMLTo(void *parent) const
Definition: MethodDT.cxx:512
Bool_t fRandomisedTrees
Definition: MethodDT.h:141
Describe directory structure in memory.
Definition: TDirectory.h:41
int type
Definition: TGX11.cxx:120
UInt_t GetNNodes() const
Definition: BinaryTree.h:92
Double_t GetMvaValue(Double_t *err=0, Double_t *errUpper=0)
returns MVA value
Definition: MethodDT.cxx:540
TString fMinNodeSizeS
Definition: MethodDT.h:128
void Train(void)
Definition: MethodDT.cxx:365
Int_t GetNNodesBeforePruning()
Definition: MethodDT.h:110
void DeclareCompatibilityOptions()
options that are used ONLY for the READER to ensure backward compatibility they are hence without any...
Definition: MethodDT.cxx:232
Abstract ClassifierFactory template that handles arbitrary types.
virtual ~MethodDT(void)
destructor
Definition: MethodDT.cxx:358
void ReadWeightsFromXML(void *wghtnode)
Definition: MethodDT.cxx:520
#define NULL
Definition: Rtypes.h:82
Bool_t fUseYesNoLeaf
Definition: MethodDT.h:131
virtual void ReadWeightsFromStream(std::istream &)=0
std::vector< Double_t > fVariableImportance
Definition: MethodDT.h:144
const Ranking * CreateRanking()
Definition: MethodDT.cxx:555
SeparationBase * fSepType
Definition: MethodDT.h:124