Logo ROOT  
Reference Guide
 
Loading...
Searching...
No Matches
DecisionTreeNode.h
Go to the documentation of this file.
1// @(#)root/tmva $Id$
2// Author: Andreas Hoecker, Joerg Stelzer, Helge Voss, Kai Voss, Eckhard von Toerne
3
4/**********************************************************************************
5 * Project: TMVA - a Root-integrated toolkit for multivariate data analysis *
6 * Package: TMVA *
7 * Class : DecisionTreeNode *
8 * Web : http://tmva.sourceforge.net *
9 * *
10 * Description: *
11 * Node for the Decision Tree *
12 * *
13 * Authors (alphabetical): *
14 * Andreas Hoecker <Andreas.Hocker@cern.ch> - CERN, Switzerland *
15 * Helge Voss <Helge.Voss@cern.ch> - MPI-K Heidelberg, Germany *
16 * Kai Voss <Kai.Voss@cern.ch> - U. of Victoria, Canada *
17 * Eckhard von Toerne <evt@physik.uni-bonn.de> - U. of Bonn, Germany *
18 * *
19 * Copyright (c) 2009: *
20 * CERN, Switzerland *
21 * U. of Victoria, Canada *
22 * MPI-K Heidelberg, Germany *
23 * U. of Bonn, Germany *
24 * *
25 * Redistribution and use in source and binary forms, with or without *
26 * modification, are permitted according to the terms listed in LICENSE *
27 * (http://tmva.sourceforge.net/LICENSE) *
28 **********************************************************************************/
29
30#ifndef ROOT_TMVA_DecisionTreeNode
31#define ROOT_TMVA_DecisionTreeNode
32
33//////////////////////////////////////////////////////////////////////////
34// //
35// DecisionTreeNode //
36// //
37// Node for the Decision Tree //
38// //
39//////////////////////////////////////////////////////////////////////////
40
41#include "TMVA/Node.h"
42
43#include "TMVA/Version.h"
44
45#include <sstream>
46#include <vector>
47#include <string>
48
49namespace TMVA {
50
52 {
53 public:
55 fSampleMax(),
56 fNodeR(0),fSubTreeR(0),fAlpha(0),fG(0),fNTerminal(0),
57 fNB(0),fNS(0),fSumTarget(0),fSumTarget2(0),fCC(0),
58 fNSigEvents ( 0 ), fNBkgEvents ( 0 ),
59 fNEvents ( -1 ),
66 fSeparationIndex (-1 ),
67 fSeparationGain ( -1 )
68 {
69 }
70 std::vector< Float_t > fSampleMin; // the minima for each ivar of the sample on the node during training
71 std::vector< Float_t > fSampleMax; // the maxima for each ivar of the sample on the node during training
72 Double_t fNodeR; // node resubstitution estimate, R(t)
73 Double_t fSubTreeR; // R(T) = Sum(R(t) : t in ~T)
74 Double_t fAlpha; // critical alpha for this node
75 Double_t fG; // minimum alpha in subtree rooted at this node
76 Int_t fNTerminal; // number of terminal nodes in subtree rooted at this node
77 Double_t fNB; // sum of weights of background events from the pruning sample in this node
78 Double_t fNS; // ditto for the signal events
79 Float_t fSumTarget; // sum of weight*target used for the calculatio of the variance (regression)
80 Float_t fSumTarget2; // sum of weight*target^2 used for the calculatio of the variance (regression)
81 Double_t fCC; // debug variable for cost complexity pruning ..
82
83 Float_t fNSigEvents; // sum of weights of signal event in the node
84 Float_t fNBkgEvents; // sum of weights of backgr event in the node
85 Float_t fNEvents; // number of events in that entered the node (during training)
86 Float_t fNSigEvents_unweighted; // sum of signal event in the node
87 Float_t fNBkgEvents_unweighted; // sum of backgr event in the node
88 Float_t fNEvents_unweighted; // number of events in that entered the node (during training)
89 Float_t fNSigEvents_unboosted; // sum of signal event in the node
90 Float_t fNBkgEvents_unboosted; // sum of backgr event in the node
91 Float_t fNEvents_unboosted; // number of events in that entered the node (during training)
92 Float_t fSeparationIndex; // measure of "purity" (separation between S and B) AT this node
93 Float_t fSeparationGain; // measure of "purity", separation, or information gained BY this nodes selection
94
95 // copy constructor
97 fSampleMin(),fSampleMax(), // Samplemin and max are reset in copy constructor
99 fAlpha(n.fAlpha), fG(n.fG),
101 fNB(n.fNB), fNS(n.fNS),
102 fSumTarget(0),fSumTarget2(0), // SumTarget reset in copy constructor
103 fCC(0),
105 fNEvents ( n.fNEvents ),
111 { }
112 };
113
114 class Event;
115 class MsgLogger;
116
117 class DecisionTreeNode: public Node {
118
119 public:
120
121 // constructor of an essentially "empty" node floating in space
123 // constructor of a daughter node as a daughter of 'p'
124 DecisionTreeNode (Node* p, char pos);
125
126 // copy constructor
127 DecisionTreeNode (const DecisionTreeNode &n, DecisionTreeNode* parent = NULL);
128
129 // destructor
130 virtual ~DecisionTreeNode();
131
132 virtual Node* CreateNode() const { return new DecisionTreeNode(); }
133
134 inline void SetNFisherCoeff(Int_t nvars){fFisherCoeff.resize(nvars);}
135 inline UInt_t GetNFisherCoeff() const { return fFisherCoeff.size();}
136 // set fisher coefficients
137 void SetFisherCoeff(Int_t ivar, Double_t coeff);
138 // get fisher coefficients
139 Double_t GetFisherCoeff(Int_t ivar) const {return fFisherCoeff.at(ivar);}
140
141 // test event if it decends the tree at this node to the right
142 virtual Bool_t GoesRight( const Event & ) const;
143
144 // test event if it decends the tree at this node to the left
145 virtual Bool_t GoesLeft ( const Event & ) const;
146
147 // set index of variable used for discrimination at this node
148 void SetSelector( Short_t i) { fSelector = i; }
149 // return index of variable used for discrimination at this node
150 Short_t GetSelector() const { return fSelector; }
151
152 // set the cut value applied at this node
154 // return the cut value applied at this node
155 Float_t GetCutValue ( void ) const { return fCutValue; }
156
157 // set true: if event variable > cutValue ==> signal , false otherwise
158 void SetCutType( Bool_t t ) { fCutType = t; }
159 // return kTRUE: Cuts select signal, kFALSE: Cuts select bkg
160 Bool_t GetCutType( void ) const { return fCutType; }
161
162 // set node type: 1 signal node, -1 bkg leave, 0 intermediate Node
163 void SetNodeType( Int_t t ) { fNodeType = t;}
164 // return node type: 1 signal node, -1 bkg leave, 0 intermediate Node
165 Int_t GetNodeType( void ) const { return fNodeType; }
166
167 //return S/(S+B) (purity) at this node (from training)
168 Float_t GetPurity( void ) const { return fPurity;}
169 //calculate S/(S+B) (purity) at this node (from training)
170 void SetPurity( void );
171
172 //set the response of the node (for regression)
174
175 //return the response of the node (for regression)
176 Float_t GetResponse( void ) const { return fResponse;}
177
178 //set the RMS of the response of the node (for regression)
179 void SetRMS( Float_t r ) { fRMS = r;}
180
181 //return the RMS of the response of the node (for regression)
182 Float_t GetRMS( void ) const { return fRMS;}
183
184 // set the sum of the signal weights in the node
186
187 // set the sum of the backgr weights in the node
189
190 // set the number of events that entered the node (during training)
191 void SetNEvents( Float_t nev ){ fTrainInfo->fNEvents =nev ; }
192
193 // set the sum of the unweighted signal events in the node
195
196 // set the sum of the unweighted backgr events in the node
198
199 // set the number of unweighted events that entered the node (during training)
201
202 // set the sum of the unboosted signal events in the node
204
205 // set the sum of the unboosted backgr events in the node
207
208 // set the number of unboosted events that entered the node (during training)
210
211 // increment the sum of the signal weights in the node
213
214 // increment the sum of the backgr weights in the node
216
217 // increment the number of events that entered the node (during training)
219
220 // increment the sum of the signal weights in the node
222
223 // increment the sum of the backgr weights in the node
225
226 // increment the number of events that entered the node (during training)
228
229 // return the sum of the signal weights in the node
230 Float_t GetNSigEvents( void ) const { return fTrainInfo->fNSigEvents; }
231
232 // return the sum of the backgr weights in the node
233 Float_t GetNBkgEvents( void ) const { return fTrainInfo->fNBkgEvents; }
234
235 // return the number of events that entered the node (during training)
236 Float_t GetNEvents( void ) const { return fTrainInfo->fNEvents; }
237
238 // return the sum of unweighted signal weights in the node
240
241 // return the sum of unweighted backgr weights in the node
243
244 // return the number of unweighted events that entered the node (during training)
246
247 // return the sum of unboosted signal weights in the node
249
250 // return the sum of unboosted backgr weights in the node
252
253 // return the number of unboosted events that entered the node (during training)
255
256
257 // set the choosen index, measure of "purity" (separation between S and B) AT this node
259 // return the separation index AT this node
261
262 // set the separation, or information gained BY this nodes selection
264 // return the gain in separation obtained by this nodes selection
266
267 // printout of the node
268 virtual void Print( std::ostream& os ) const;
269
270 // recursively print the node and its daughters (--> print the 'tree')
271 virtual void PrintRec( std::ostream& os ) const;
272
273 virtual void AddAttributesToNode(void* node) const;
274 virtual void AddContentToNode(std::stringstream& s) const;
275
276 // recursively clear the nodes content (S/N etc, but not the cut criteria)
278
279 // get pointers to children, mother in the tree
280
281 // return pointer to the left/right daughter or parent node
282 inline virtual DecisionTreeNode* GetLeft( ) const { return static_cast<DecisionTreeNode*>(fLeft); }
283 inline virtual DecisionTreeNode* GetRight( ) const { return static_cast<DecisionTreeNode*>(fRight); }
284 inline virtual DecisionTreeNode* GetParent( ) const { return static_cast<DecisionTreeNode*>(fParent); }
285
286 // set pointer to the left/right daughter and parent node
287 inline virtual void SetLeft (Node* l) { fLeft = l;}
288 inline virtual void SetRight (Node* r) { fRight = r;}
289 inline virtual void SetParent(Node* p) { fParent = p;}
290
291
292
293
294 // the node resubstitution estimate, R(t), for Cost Complexity pruning
295 inline void SetNodeR( Double_t r ) { fTrainInfo->fNodeR = r; }
296 inline Double_t GetNodeR( ) const { return fTrainInfo->fNodeR; }
297
298 // the resubstitution estimate, R(T_t), of the tree rooted at this node
300 inline Double_t GetSubTreeR( ) const { return fTrainInfo->fSubTreeR; }
301
302 // R(t) - R(T_t)
303 // the critical point alpha = -------------
304 // |~T_t| - 1
305 inline void SetAlpha( Double_t alpha ) { fTrainInfo->fAlpha = alpha; }
306 inline Double_t GetAlpha( ) const { return fTrainInfo->fAlpha; }
307
308 // the minimum alpha in the tree rooted at this node
310 inline Double_t GetAlphaMinSubtree( ) const { return fTrainInfo->fG; }
311
312 // number of terminal nodes in the subtree rooted here
313 inline void SetNTerminal( Int_t n ) { fTrainInfo->fNTerminal = n; }
314 inline Int_t GetNTerminal( ) const { return fTrainInfo->fNTerminal; }
315
316 // number of background/signal events from the pruning validation sample
317 inline void SetNBValidation( Double_t b ) { fTrainInfo->fNB = b; }
318 inline void SetNSValidation( Double_t s ) { fTrainInfo->fNS = s; }
319 inline Double_t GetNBValidation( ) const { return fTrainInfo->fNB; }
320 inline Double_t GetNSValidation( ) const { return fTrainInfo->fNS; }
321
322
325
328
329 inline Float_t GetSumTarget() const {return fTrainInfo? fTrainInfo->fSumTarget : -9999;}
330 inline Float_t GetSumTarget2() const {return fTrainInfo? fTrainInfo->fSumTarget2: -9999;}
331
332
333 // reset the pruning validation data
334 void ResetValidationData( );
335
336 // flag indicates whether this node is terminal
337 inline Bool_t IsTerminal() const { return fIsTerminalNode; }
338 inline void SetTerminal( Bool_t s = kTRUE ) { fIsTerminalNode = s; }
339 void PrintPrune( std::ostream& os ) const ;
340 void PrintRecPrune( std::ostream& os ) const;
341
342 void SetCC(Double_t cc);
343 Double_t GetCC() const {return (fTrainInfo? fTrainInfo->fCC : -1.);}
344
345 Float_t GetSampleMin(UInt_t ivar) const;
346 Float_t GetSampleMax(UInt_t ivar) const;
347 void SetSampleMin(UInt_t ivar, Float_t xmin);
348 void SetSampleMax(UInt_t ivar, Float_t xmax);
349
350 static void SetIsTraining(bool on);
351 static void SetTmvaVersionCode(UInt_t code);
352
353 static bool IsTraining();
354 static UInt_t GetTmvaVersionCode();
355
356 virtual Bool_t ReadDataRecord( std::istream& is, UInt_t tmva_Version_Code = TMVA_VERSION_CODE );
357 virtual void ReadAttributes(void* node, UInt_t tmva_Version_Code = TMVA_VERSION_CODE );
358 virtual void ReadContent(std::stringstream& s);
359
360 protected:
361
362 static MsgLogger& Log();
363
364 static bool fgIsTraining; // static variable to flag training phase in which we need fTrainInfo
365 static UInt_t fgTmva_Version_Code; // set only when read from weightfile
366
367 std::vector<Double_t> fFisherCoeff; // the fisher coeff (offset at the last element)
368
369 Float_t fCutValue; // cut value appplied on this node to discriminate bkg against sig
370 Bool_t fCutType; // true: if event variable > cutValue ==> signal , false otherwise
371 Short_t fSelector; // index of variable used in node selection (decision tree)
372
373 Float_t fResponse; // response value in case of regression
374 Float_t fRMS; // response RMS of the regression node
375 Int_t fNodeType; // Type of node: -1 == Bkg-leaf, 1 == Signal-leaf, 0 = internal
376 Float_t fPurity; // the node purity
377
378 Bool_t fIsTerminalNode; //! flag to set node as terminal (i.e., without deleting its descendants)
379
381
382 private:
383
384 ClassDef(DecisionTreeNode,0); // Node for the Decision Tree
385 };
386} // namespace TMVA
387
388#endif
ROOT::R::TRInterface & r
Definition Object.C:4
#define b(i)
Definition RSha256.hxx:100
#define c(i)
Definition RSha256.hxx:101
#define g(i)
Definition RSha256.hxx:105
unsigned int UInt_t
Definition RtypesCore.h:46
bool Bool_t
Definition RtypesCore.h:63
short Short_t
Definition RtypesCore.h:39
double Double_t
Definition RtypesCore.h:59
float Float_t
Definition RtypesCore.h:57
const Bool_t kTRUE
Definition RtypesCore.h:91
#define ClassDef(name, id)
Definition Rtypes.h:325
float xmin
float xmax
#define TMVA_VERSION_CODE
Definition Version.h:47
std::vector< Float_t > fSampleMax
DTNodeTrainingInfo(const DTNodeTrainingInfo &n)
std::vector< Float_t > fSampleMin
virtual void AddContentToNode(std::stringstream &s) const
adding attributes to tree node (well, was used in BinarySearchTree, and somehow I guess someone progr...
void SetNEvents_unweighted(Float_t nev)
Float_t GetNBkgEvents_unboosted(void) const
DTNodeTrainingInfo * fTrainInfo
flag to set node as terminal (i.e., without deleting its descendants)
virtual ~DecisionTreeNode()
destructor
Float_t GetNSigEvents_unweighted(void) const
Float_t GetNBkgEvents_unweighted(void) const
Double_t GetSubTreeR() const
Float_t GetSeparationIndex(void) const
void SetSeparationGain(Float_t sep)
void SetNBkgEvents(Float_t b)
Float_t GetNSigEvents_unboosted(void) const
Double_t GetNSValidation() const
static void SetIsTraining(bool on)
void PrintPrune(std::ostream &os) const
printout of the node (can be read in with ReadDataRecord)
Float_t GetSumTarget() const
void PrintRecPrune(std::ostream &os) const
recursive printout of the node and its daughters
void SetFisherCoeff(Int_t ivar, Double_t coeff)
set fisher coefficients
void SetNSigEvents_unboosted(Float_t s)
void SetSumTarget2(Float_t t2)
void SetAlphaMinSubtree(Double_t g)
static UInt_t fgTmva_Version_Code
void IncrementNBkgEvents(Float_t b)
void SetNEvents_unboosted(Float_t nev)
Float_t GetNSigEvents(void) const
virtual void SetLeft(Node *l)
Double_t GetAlphaMinSubtree() const
void SetTerminal(Bool_t s=kTRUE)
Float_t GetNEvents_unweighted(void) const
void SetResponse(Float_t r)
UInt_t GetNFisherCoeff() const
void SetSampleMax(UInt_t ivar, Float_t xmax)
set the maximum of variable ivar from the training sample that pass/end up in this node
void ClearNodeAndAllDaughters()
clear the nodes (their S/N, Nevents etc), just keep the structure of the tree
virtual Bool_t GoesLeft(const Event &) const
test event if it descends the tree at this node to the left
static void SetTmvaVersionCode(UInt_t code)
virtual void ReadContent(std::stringstream &s)
reading attributes from tree node (well, was used in BinarySearchTree, and somehow I guess someone pr...
void SetNBValidation(Double_t b)
Float_t GetRMS(void) const
void IncrementNEvents(Float_t nev)
void SetPurity(void)
return the S/(S+B) (purity) for the node REM: even if nodes with purity 0.01 are very PURE background...
void SetSubTreeR(Double_t r)
void AddToSumTarget2(Float_t t2)
virtual void Print(std::ostream &os) const
print the node
virtual DecisionTreeNode * GetLeft() const
Double_t GetNodeR() const
Float_t GetSumTarget2() const
virtual Bool_t GoesRight(const Event &) const
test event if it descends the tree at this node to the right
DecisionTreeNode()
constructor of an essentially "empty" node floating in space
void SetNFisherCoeff(Int_t nvars)
virtual void AddAttributesToNode(void *node) const
add attribute to xml
Short_t GetSelector() const
static UInt_t GetTmvaVersionCode()
virtual void ReadAttributes(void *node, UInt_t tmva_Version_Code=TMVA_VERSION_CODE)
void SetNSigEvents(Float_t s)
Float_t GetResponse(void) const
Float_t GetCutValue(void) const
Int_t GetNodeType(void) const
Double_t GetAlpha() const
Bool_t GetCutType(void) const
static MsgLogger & Log()
void ResetValidationData()
temporary stored node values (number of events, etc.) that originate not from the training but from t...
virtual void PrintRec(std::ostream &os) const
recursively print the node and its daughters (--> print the 'tree')
void SetNSigEvents_unweighted(Float_t s)
Float_t GetNEvents(void) const
virtual Node * CreateNode() const
Double_t GetNBValidation() const
void SetAlpha(Double_t alpha)
void SetSeparationIndex(Float_t sep)
virtual void SetRight(Node *r)
virtual Bool_t ReadDataRecord(std::istream &is, UInt_t tmva_Version_Code=TMVA_VERSION_CODE)
Read the data block.
void SetSumTarget(Float_t t)
virtual void SetParent(Node *p)
void SetNodeR(Double_t r)
void SetNBkgEvents_unboosted(Float_t b)
Float_t GetPurity(void) const
Float_t GetNEvents_unboosted(void) const
void IncrementNSigEvents(Float_t s)
Float_t GetSeparationGain(void) const
Float_t GetSampleMax(UInt_t ivar) const
return the maximum of variable ivar from the training sample that pass/end up in this node
void SetCutValue(Float_t c)
Float_t GetNBkgEvents(void) const
Float_t GetSampleMin(UInt_t ivar) const
return the minimum of variable ivar from the training sample that pass/end up in this node
void SetSampleMin(UInt_t ivar, Float_t xmin)
set the minimum of variable ivar from the training sample that pass/end up in this node
void SetSelector(Short_t i)
std::vector< Double_t > fFisherCoeff
virtual DecisionTreeNode * GetParent() const
Double_t GetFisherCoeff(Int_t ivar) const
void SetNBkgEvents_unweighted(Float_t b)
void SetNSValidation(Double_t s)
void AddToSumTarget(Float_t t)
void SetNEvents(Float_t nev)
virtual DecisionTreeNode * GetRight() const
ostringstream derivative to redirect and format output
Definition MsgLogger.h:59
Node for the BinarySearch or Decision Trees.
Definition Node.h:58
Node * fLeft
Definition Node.h:139
Node * fParent
Definition Node.h:138
Node * fRight
Definition Node.h:140
const Int_t n
Definition legend1.C:16
create variable transformations
auto * l
Definition textangle.C:4