Logo ROOT   6.08/07
Reference Guide
MethodDT.cxx
Go to the documentation of this file.
1 // @(#)root/tmva $Id$
2 // Author: Andreas Hoecker, Joerg Stelzer, Helge Voss, Kai Voss
3 
4 /**********************************************************************************
5  * Project: TMVA - a Root-integrated toolkit for multivariate data analysis *
6  * Package: TMVA *
7  * Class : MethodDT (DT = Decision Trees) *
8  * Web : http://tmva.sourceforge.net *
9  * *
10  * Description: *
11  * Analysis of Boosted Decision Trees *
12  * *
13  * Authors (alphabetical): *
14  * Andreas Hoecker <Andreas.Hocker@cern.ch> - CERN, Switzerland *
15  * Helge Voss <Helge.Voss@cern.ch> - MPI-K Heidelberg, Germany *
16  * Or Cohen <orcohenor@gmail.com> - Weizmann Inst., Israel *
17  * *
18  * Copyright (c) 2005: *
19  * CERN, Switzerland *
20  * MPI-K Heidelberg, Germany *
21  * *
22  * Redistribution and use in source and binary forms, with or without *
23  * modification, are permitted according to the terms listed in LICENSE *
24  * (http://tmva.sourceforge.net/LICENSE) *
25  **********************************************************************************/
26 
27 //_______________________________________________________________________
28 //
29 // Analysis of Boosted Decision Trees
30 //
31 // Boosted decision trees have been successfully used in High Energy
32 // Physics analysis for example by the MiniBooNE experiment
33 // (Yang-Roe-Zhu, physics/0508045). In Boosted Decision Trees, the
34 // selection is done on a majority vote on the result of several decision
35 // trees, which are all derived from the same training sample by
36 // supplying different event weights during the training.
37 //
38 // Decision trees:
39 //
40 // successive decision nodes are used to categorize the
41 // events out of the sample as either signal or background. Each node
42 // uses only a single discriminating variable to decide if the event is
43 // signal-like ("goes right") or background-like ("goes left"). This
44 // forms a tree like structure with "baskets" at the end (leave nodes),
45 // and an event is classified as either signal or background according to
46 // whether the basket where it ends up has been classified signal or
47 // background during the training. Training of a decision tree is the
48 // process to define the "cut criteria" for each node. The training
49 // starts with the root node. Here one takes the full training event
50 // sample and selects the variable and corresponding cut value that gives
51 // the best separation between signal and background at this stage. Using
52 // this cut criterion, the sample is then divided into two subsamples, a
53 // signal-like (right) and a background-like (left) sample. Two new nodes
54 // are then created for each of the two sub-samples and they are
55 // constructed using the same mechanism as described for the root
56 // node. The devision is stopped once a certain node has reached either a
57 // minimum number of events, or a minimum or maximum signal purity. These
58 // leave nodes are then called "signal" or "background" if they contain
59 // more signal respective background events from the training sample.
60 //
61 // Boosting:
62 //
63 // the idea behind the boosting is, that signal events from the training
64 // sample, that *end up in a background node (and vice versa) are given a
65 // larger weight than events that are in the correct leave node. This
66 // results in a re-weighed training event sample, with which then a new
67 // decision tree can be developed. The boosting can be applied several
68 // times (typically 100-500 times) and one ends up with a set of decision
69 // trees (a forest).
70 //
71 // Bagging:
72 //
73 // In this particular variant of the Boosted Decision Trees the boosting
74 // is not done on the basis of previous training results, but by a simple
75 // stochasitc re-sampling of the initial training event sample.
76 //
77 // Analysis:
78 //
79 // applying an individual decision tree to a test event results in a
80 // classification of the event as either signal or background. For the
81 // boosted decision tree selection, an event is successively subjected to
82 // the whole set of decision trees and depending on how often it is
83 // classified as signal, a "likelihood" estimator is constructed for the
84 // event being signal or background. The value of this estimator is the
85 // one which is then used to select the events from an event sample, and
86 // the cut value on this estimator defines the efficiency and purity of
87 // the selection.
88 //*
89 //_______________________________________________________________________
90 
91 #include "TMVA/MethodDT.h"
92 
93 #include "TMVA/BinarySearchTree.h"
94 #include "TMVA/CCPruner.h"
95 #include "TMVA/ClassifierFactory.h"
96 #include "TMVA/Configurable.h"
97 #include "TMVA/CrossEntropy.h"
98 #include "TMVA/DataSet.h"
99 #include "TMVA/DecisionTree.h"
100 #include "TMVA/GiniIndex.h"
101 #include "TMVA/IMethod.h"
102 #include "TMVA/MethodBase.h"
103 #include "TMVA/MethodBoost.h"
105 #include "TMVA/MsgLogger.h"
106 #include "TMVA/Ranking.h"
107 #include "TMVA/SdivSqrtSplusB.h"
108 #include "TMVA/SeparationBase.h"
109 #include "TMVA/Timer.h"
110 #include "TMVA/Tools.h"
111 #include "TMVA/Types.h"
112 
113 #include "Riostream.h"
114 #include "TRandom3.h"
115 #include "TMath.h"
116 #include "TObjString.h"
117 
118 #include <algorithm>
119 
120 using std::vector;
121 
122 REGISTER_METHOD(DT)
123 
125 
126 ////////////////////////////////////////////////////////////////////////////////
127 /// the standard constructor for just an ordinar "decision trees"
128 
129  TMVA::MethodDT::MethodDT( const TString& jobName,
130  const TString& methodTitle,
131  DataSetInfo& theData,
132  const TString& theOption) :
133  TMVA::MethodBase( jobName, Types::kDT, methodTitle, theData, theOption)
134  , fTree(0)
135  , fSepType(0)
136  , fMinNodeEvents(0)
137  , fMinNodeSize(0)
138  , fNCuts(0)
139  , fUseYesNoLeaf(kFALSE)
140  , fNodePurityLimit(0)
141  , fMaxDepth(0)
142  , fErrorFraction(0)
143  , fPruneStrength(0)
144  , fPruneMethod(DecisionTree::kNoPruning)
145  , fAutomatic(kFALSE)
146  , fRandomisedTrees(kFALSE)
147  , fUseNvars(0)
148  , fUsePoissonNvars(0) // don't use this initialisation, only here to make Coverity happy. Is set in Init()
149  , fDeltaPruneStrength(0)
150 {
151 }
152 
153 ////////////////////////////////////////////////////////////////////////////////
154 ///constructor from Reader
155 
157  const TString& theWeightFile) :
158  TMVA::MethodBase( Types::kDT, dsi, theWeightFile)
159  , fTree(0)
160  , fSepType(0)
161  , fMinNodeEvents(0)
162  , fMinNodeSize(0)
163  , fNCuts(0)
164  , fUseYesNoLeaf(kFALSE)
165  , fNodePurityLimit(0)
166  , fMaxDepth(0)
167  , fErrorFraction(0)
168  , fPruneStrength(0)
169  , fPruneMethod(DecisionTree::kNoPruning)
170  , fAutomatic(kFALSE)
171  , fRandomisedTrees(kFALSE)
172  , fUseNvars(0)
173  , fDeltaPruneStrength(0)
174 {
175 }
176 
177 ////////////////////////////////////////////////////////////////////////////////
178 /// FDA can handle classification with 2 classes and regression with one regression-target
179 
181 {
182  if( type == Types::kClassification && numberClasses == 2 ) return kTRUE;
183  return kFALSE;
184 }
185 
186 
187 ////////////////////////////////////////////////////////////////////////////////
188 /// define the options (their key words) that can be set in the option string
189 /// UseRandomisedTrees choose at each node splitting a random set of variables
190 /// UseNvars use UseNvars variables in randomised trees
191 /// SeparationType the separation criterion applied in the node splitting
192 /// known: GiniIndex
193 /// MisClassificationError
194 /// CrossEntropy
195 /// SDivSqrtSPlusB
196 /// nEventsMin: the minimum number of events in a node (leaf criteria, stop splitting)
197 /// nCuts: the number of steps in the optimisation of the cut for a node (if < 0, then
198 /// step size is determined by the events)
199 /// UseYesNoLeaf decide if the classification is done simply by the node type, or the S/B
200 /// (from the training) in the leaf node
201 /// NodePurityLimit the minimum purity to classify a node as a signal node (used in pruning and boosting to determine
202 /// misclassification error rate)
203 /// PruneMethod The Pruning method:
204 /// known: NoPruning // switch off pruning completely
205 /// ExpectedError
206 /// CostComplexity
207 /// PruneStrength a parameter to adjust the amount of pruning. Should be large enouth such that overtraining is avoided");
208 
210 {
211  DeclareOptionRef(fRandomisedTrees,"UseRandomisedTrees","Choose at each node splitting a random set of variables and *bagging*");
212  DeclareOptionRef(fUseNvars,"UseNvars","Number of variables used if randomised Tree option is chosen");
213  DeclareOptionRef(fUsePoissonNvars,"UsePoissonNvars", "Interpret \"UseNvars\" not as fixed number but as mean of a Possion distribution in each split with RandomisedTree option");
214  DeclareOptionRef(fUseYesNoLeaf=kTRUE, "UseYesNoLeaf",
215  "Use Sig or Bkg node type or the ratio S/B as classification in the leaf node");
216  DeclareOptionRef(fNodePurityLimit=0.5, "NodePurityLimit", "In boosting/pruning, nodes with purity > NodePurityLimit are signal; background otherwise.");
217  DeclareOptionRef(fSepTypeS="GiniIndex", "SeparationType", "Separation criterion for node splitting");
218  AddPreDefVal(TString("MisClassificationError"));
219  AddPreDefVal(TString("GiniIndex"));
220  AddPreDefVal(TString("CrossEntropy"));
221  AddPreDefVal(TString("SDivSqrtSPlusB"));
222  DeclareOptionRef(fMinNodeEvents=-1, "nEventsMin", "deprecated !!! Minimum number of events required in a leaf node");
223  DeclareOptionRef(fMinNodeSizeS, "MinNodeSize", "Minimum percentage of training events required in a leaf node (default: Classification: 10%, Regression: 1%)");
224  DeclareOptionRef(fNCuts, "nCuts", "Number of steps during node cut optimisation");
225  DeclareOptionRef(fPruneStrength, "PruneStrength", "Pruning strength (negative value == automatic adjustment)");
226  DeclareOptionRef(fPruneMethodS="NoPruning", "PruneMethod", "Pruning method: NoPruning (switched off), ExpectedError or CostComplexity");
227 
228  AddPreDefVal(TString("NoPruning"));
229  AddPreDefVal(TString("ExpectedError"));
230  AddPreDefVal(TString("CostComplexity"));
231 
232  if (DoRegression()) {
233  DeclareOptionRef(fMaxDepth=50,"MaxDepth","Max depth of the decision tree allowed");
234  }else{
235  DeclareOptionRef(fMaxDepth=3,"MaxDepth","Max depth of the decision tree allowed");
236  }
237 }
238 
240  // options that are used ONLY for the READER to ensure backward compatibility
241 
243 
244  DeclareOptionRef(fPruneBeforeBoost=kFALSE, "PruneBeforeBoost",
245  "--> removed option .. only kept for reader backward compatibility");
246 }
247 
248 ////////////////////////////////////////////////////////////////////////////////
249 /// the option string is decoded, for available options see "DeclareOptions"
250 
252 {
253  fSepTypeS.ToLower();
254  if (fSepTypeS == "misclassificationerror") fSepType = new MisClassificationError();
255  else if (fSepTypeS == "giniindex") fSepType = new GiniIndex();
256  else if (fSepTypeS == "crossentropy") fSepType = new CrossEntropy();
257  else if (fSepTypeS == "sdivsqrtsplusb") fSepType = new SdivSqrtSplusB();
258  else {
259  Log() << kINFO << GetOptions() << Endl;
260  Log() << kFATAL << "<ProcessOptions> unknown Separation Index option called" << Endl;
261  }
262 
263  // std::cout << "fSeptypes " << fSepTypeS << " fseptype " << fSepType << std::endl;
264 
267  else if (fPruneMethodS == "costcomplexity" ) fPruneMethod = DecisionTree::kCostComplexityPruning;
268  else if (fPruneMethodS == "nopruning" ) fPruneMethod = DecisionTree::kNoPruning;
269  else {
270  Log() << kINFO << GetOptions() << Endl;
271  Log() << kFATAL << "<ProcessOptions> unknown PruneMethod option:" << fPruneMethodS <<" called" << Endl;
272  }
273 
274  if (fPruneStrength < 0) fAutomatic = kTRUE;
275  else fAutomatic = kFALSE;
277  Log() << kFATAL
278  << "Sorry autmoatic pruning strength determination is not implemented yet for ExpectedErrorPruning" << Endl;
279  }
280 
281 
282  if (this->Data()->HasNegativeEventWeights()){
283  Log() << kINFO << " You are using a Monte Carlo that has also negative weights. "
284  << "That should in principle be fine as long as on average you end up with "
285  << "something positive. For this you have to make sure that the minimal number "
286  << "of (un-weighted) events demanded for a tree node (currently you use: MinNodeSize="
287  <<fMinNodeSizeS
288  <<", (or the deprecated equivalent nEventsMin) you can set this via the "
289  <<"MethodDT option string when booking the "
290  << "classifier) is large enough to allow for reasonable averaging!!! "
291  << " If this does not help.. maybe you want to try the option: IgnoreNegWeightsInTraining "
292  << "which ignores events with negative weight in the training. " << Endl
293  << Endl << "Note: You'll get a WARNING message during the training if that should ever happen" << Endl;
294  }
295 
296  if (fRandomisedTrees){
297  Log() << kINFO << " Randomised trees should use *bagging* as *boost* method. Did you set this in the *MethodBoost* ? . Here I can enforce only the *no pruning*" << Endl;
299  // fBoostType = "Bagging";
300  }
301 
302  if (fMinNodeEvents > 0){
304  Log() << kWARNING << "You have explicitly set *nEventsMin*, the min ablsolut number \n"
305  << "of events in a leaf node. This is DEPRECATED, please use the option \n"
306  << "*MinNodeSize* giving the relative number as percentage of training \n"
307  << "events instead. \n"
308  << "nEventsMin="<<fMinNodeEvents<< "--> MinNodeSize="<<fMinNodeSize<<"%"
309  << Endl;
310  }else{
312  }
313 }
314 
316  if (sizeInPercent > 0 && sizeInPercent < 50){
317  fMinNodeSize=sizeInPercent;
318 
319  } else {
320  Log() << kERROR << "you have demanded a minimal node size of "
321  << sizeInPercent << "% of the training events.. \n"
322  << " that somehow does not make sense "<<Endl;
323  }
324 
325 }
327  sizeInPercent.ReplaceAll("%","");
328  if (sizeInPercent.IsAlnum()) SetMinNodeSize(sizeInPercent.Atof());
329  else {
330  Log() << kERROR << "I had problems reading the option MinNodeEvents, which\n"
331  << "after removing a possible % sign now reads " << sizeInPercent << Endl;
332  }
333 }
334 
335 
336 
337 ////////////////////////////////////////////////////////////////////////////////
338 /// common initialisation with defaults for the DT-Method
339 
341 {
342  fMinNodeEvents = -1;
343  fMinNodeSize = 5;
344  fMinNodeSizeS = "5%";
345  fNCuts = 20;
347  fPruneStrength = 5; // -1 means automatic determination of the prune strength using a validation sample
350  fUseNvars = GetNvar();
352 
353  // reference cut value to distingiush signal-like from background-like events
356  fMaxDepth = 3;
357  }else {
358  fMaxDepth = 50;
359  }
360 }
361 
362 ////////////////////////////////////////////////////////////////////////////////
363 ///destructor
364 
366 {
367  delete fTree;
368 }
369 
370 ////////////////////////////////////////////////////////////////////////////////
371 
373 {
377  fTree->SetNVars(GetNvar());
378  if (fRandomisedTrees) Log()<<kWARNING<<" randomised Trees do not work yet in this framework,"
379  << " as I do not know how to give each tree a new random seed, now they"
380  << " will be all the same and that is not good " << Endl;
382 
383  //fTree->BuildTree(GetEventCollection(Types::kTraining));
385  UInt_t nevents = Data()->GetNTrainingEvents();
386  std::vector<const TMVA::Event*> tmp;
387  for (Long64_t ievt=0; ievt<nevents; ievt++) {
388  const Event *event = GetEvent(ievt);
389  tmp.push_back(event);
390  }
391  fTree->BuildTree(tmp);
393 
396 }
397 
398 ////////////////////////////////////////////////////////////////////////////////
399 /// prune the decision tree if requested (good for individual trees that are best grown out, and then
400 /// pruned back, while boosted decision trees are best 'small' trees to start with. Well, at least the
401 /// standard "optimal pruning algorithms" don't result in 'weak enough' classifiers !!
402 
404 {
405  // remember the number of nodes beforehand (for monitoring purposes)
406 
407 
408  if (fAutomatic && fPruneMethod == DecisionTree::kCostComplexityPruning) { // automatic cost complexity pruning
409  CCPruner* pruneTool = new CCPruner(fTree, this->Data() , fSepType);
410  pruneTool->Optimize();
411  std::vector<DecisionTreeNode*> nodes = pruneTool->GetOptimalPruneSequence();
413  for(UInt_t i = 0; i < nodes.size(); i++)
414  fTree->PruneNode(nodes[i]);
415  delete pruneTool;
416  }
418  /*
419 
420  Double_t alpha = 0;
421  Double_t delta = fDeltaPruneStrength;
422 
423  DecisionTree* dcopy;
424  std::vector<Double_t> q;
425  multimap<Double_t,Double_t> quality;
426  Int_t nnodes=fTree->GetNNodes();
427 
428  // find the maxiumum prune strength that still leaves some nodes
429  Bool_t forceStop = kFALSE;
430  Int_t troubleCount=0, previousNnodes=nnodes;
431 
432 
433  nnodes=fTree->GetNNodes();
434  while (nnodes > 3 && !forceStop) {
435  dcopy = new DecisionTree(*fTree);
436  dcopy->SetPruneStrength(alpha+=delta);
437  dcopy->PruneTree();
438  q.push_back(TestTreeQuality(dcopy));
439  quality.insert(std::pair<const Double_t,Double_t>(q.back(),alpha));
440  nnodes=dcopy->GetNNodes();
441  if (previousNnodes == nnodes) troubleCount++;
442  else {
443  troubleCount=0; // reset counter
444  if (nnodes < previousNnodes / 2 ) fDeltaPruneStrength /= 2.;
445  }
446  previousNnodes = nnodes;
447  if (troubleCount > 20) {
448  if (methodIndex == 0 && fPruneStrength <=0) {//maybe you need larger stepsize ??
449  fDeltaPruneStrength *= 5;
450  Log() << kINFO << "<PruneTree> trouble determining optimal prune strength"
451  << " for Tree " << methodIndex
452  << " --> first try to increase the step size"
453  << " currently Prunestrenght= " << alpha
454  << " stepsize " << fDeltaPruneStrength << " " << Endl;
455  troubleCount = 0; // try again
456  fPruneStrength = 1; // if it was for the first time..
457  } else if (methodIndex == 0 && fPruneStrength <=2) {//maybe you need much larger stepsize ??
458  fDeltaPruneStrength *= 5;
459  Log() << kINFO << "<PruneTree> trouble determining optimal prune strength"
460  << " for Tree " << methodIndex
461  << " --> try to increase the step size even more.. "
462  << " if that still didn't work, TRY IT BY HAND"
463  << " currently Prunestrenght= " << alpha
464  << " stepsize " << fDeltaPruneStrength << " " << Endl;
465  troubleCount = 0; // try again
466  fPruneStrength = 3; // if it was for the first time..
467  } else {
468  forceStop=kTRUE;
469  Log() << kINFO << "<PruneTree> trouble determining optimal prune strength"
470  << " for Tree " << methodIndex << " at tested prune strength: " << alpha << " --> abort forced, use same strength as for previous tree:"
471  << fPruneStrength << Endl;
472  }
473  }
474  if (fgDebugLevel==1) Log() << kINFO << "Pruneed with ("<<alpha
475  << ") give quality: " << q.back()
476  << " and #nodes: " << nnodes
477  << Endl;
478  delete dcopy;
479  }
480  if (!forceStop) {
481  multimap<Double_t,Double_t>::reverse_iterator it=quality.rend();
482  it++;
483  fPruneStrength = it->second;
484  // adjust the step size for the next tree.. think that 20 steps are sort of
485  // fine enough.. could become a tunable option later..
486  fDeltaPruneStrength *= Double_t(q.size())/20.;
487  }
488 
489  fTree->SetPruneStrength(fPruneStrength);
490  fTree->PruneTree();
491  */
492  }
493  else {
495  fTree->PruneTree();
496  }
497 
498  return fPruneStrength;
499 }
500 
501 ////////////////////////////////////////////////////////////////////////////////
502 
504 {
506  // test the tree quality.. in terms of Miscalssification
507  Double_t SumCorrect=0,SumWrong=0;
508  for (Long64_t ievt=0; ievt<Data()->GetNEvents(); ievt++)
509  {
510  const Event * ev = Data()->GetEvent(ievt);
511  if ((dt->CheckEvent(ev) > dt->GetNodePurityLimit() ) == DataInfo().IsSignal(ev)) SumCorrect+=ev->GetWeight();
512  else SumWrong+=ev->GetWeight();
513  }
515  return SumCorrect / (SumCorrect + SumWrong);
516 }
517 
518 ////////////////////////////////////////////////////////////////////////////////
519 
520 void TMVA::MethodDT::AddWeightsXMLTo( void* parent ) const
521 {
522  fTree->AddXMLTo(parent);
523  //Log() << kFATAL << "Please implement writing of weights as XML" << Endl;
524 }
525 
526 ////////////////////////////////////////////////////////////////////////////////
527 
529 {
530  if(fTree)
531  delete fTree;
532  fTree = new DecisionTree();
534 }
535 
536 ////////////////////////////////////////////////////////////////////////////////
537 
538 void TMVA::MethodDT::ReadWeightsFromStream( std::istream& istr )
539 {
540  delete fTree;
541  fTree = new DecisionTree();
542  fTree->Read(istr);
543 }
544 
545 ////////////////////////////////////////////////////////////////////////////////
546 /// returns MVA value
547 
549 {
550  // cannot determine error
551  NoErrorCalc(err, errUpper);
552 
554 }
555 
556 ////////////////////////////////////////////////////////////////////////////////
557 
559 {
560 }
561 ////////////////////////////////////////////////////////////////////////////////
562 
564 {
565  return 0;
566 }
Types::EAnalysisType fAnalysisType
Definition: MethodBase.h:589
virtual void * AddXMLTo(void *parent) const
add attributes to XML
Definition: BinaryTree.cxx:135
void Optimize()
determine the pruning sequence
Definition: CCPruner.cxx:100
MsgLogger & Endl(MsgLogger &ml)
Definition: MsgLogger.h:162
Bool_t fUsePoissonNvars
Definition: MethodDT.h:141
long long Long64_t
Definition: RtypesCore.h:69
Double_t CheckEvent(const TMVA::Event *, Bool_t UseYesNoLeaf=kFALSE) const
the event e is put into the decision tree (starting at the root node) and the output is NodeType (sig...
void Init(void)
common initialisation with defaults for the DT-Method
Definition: MethodDT.cxx:340
virtual void Read(std::istream &istr, UInt_t tmva_Version_Code=TMVA_VERSION_CODE)
Read the binary tree from an input stream.
Definition: BinaryTree.cxx:170
TString & ReplaceAll(const TString &s1, const TString &s2)
Definition: TString.h:635
UInt_t GetNvar() const
Definition: MethodBase.h:340
Double_t GetNodePurityLimit() const
Definition: DecisionTree.h:170
TString fPruneMethodS
Definition: MethodDT.h:137
DecisionTree::EPruneMethod fPruneMethod
Definition: MethodDT.h:136
MsgLogger & Log() const
Definition: Configurable.h:128
OptionBase * DeclareOptionRef(T &ref, const TString &name, const TString &desc="")
EAnalysisType
Definition: Types.h:129
Float_t GetOptimalPruneStrength() const
Definition: CCPruner.h:92
Basic string class.
Definition: TString.h:137
void ToLower()
Change string to lower-case.
Definition: TString.cxx:1089
bool Bool_t
Definition: RtypesCore.h:59
virtual Bool_t HasAnalysisType(Types::EAnalysisType type, UInt_t numberClasses, UInt_t numberTargets)
FDA can handle classification with 2 classes and regression with one regression-target.
Definition: MethodDT.cxx:180
const Bool_t kFALSE
Definition: Rtypes.h:92
Double_t fNodePurityLimit
Definition: MethodDT.h:130
Double_t PruneTree()
prune the decision tree if requested (good for individual trees that are best grown out...
Definition: MethodDT.cxx:403
Bool_t fPruneBeforeBoost
Definition: MethodDT.h:149
void SetMinNodeSize(Double_t sizeInPercent)
Definition: MethodDT.cxx:315
Double_t fPruneStrength
Definition: MethodDT.h:135
void SetAnalysisType(Types::EAnalysisType t)
Definition: DecisionTree.h:197
Int_t fUseNvars
Definition: MethodDT.h:140
Bool_t fAutomatic
Definition: MethodDT.h:138
UInt_t fMaxDepth
Definition: MethodDT.h:131
UInt_t GetTrainingTMVAVersionCode() const
Definition: MethodBase.h:385
void ReadWeightsFromStream(std::istream &istr)
Definition: MethodDT.cxx:538
const Event * GetEvent() const
Definition: MethodBase.h:745
Int_t fMinNodeEvents
Definition: MethodDT.h:124
DataSet * Data() const
Definition: MethodBase.h:405
Bool_t IsAlnum() const
Returns true if all characters in string are alphanumeric.
Definition: TString.cxx:1790
void DeclareOptions()
define the options (their key words) that can be set in the option string UseRandomisedTrees choose a...
Definition: MethodDT.cxx:209
DataSetInfo & DataInfo() const
Definition: MethodBase.h:406
Bool_t DoRegression() const
Definition: MethodBase.h:434
Double_t fDeltaPruneStrength
Definition: MethodDT.h:144
Double_t GetWeight() const
return the event weight - depending on whether the flag IgnoreNegWeightsInTraining is or not...
Definition: Event.cxx:378
Long64_t GetNTrainingEvents() const
Definition: DataSet.h:93
void AddWeightsXMLTo(void *parent) const
Definition: MethodDT.cxx:520
void ProcessOptions()
the option string is decoded, for available options see "DeclareOptions"
Definition: MethodDT.cxx:251
void SetNVars(Int_t n)
Definition: DecisionTree.h:202
TString fSepTypeS
Definition: MethodDT.h:123
void SetPruneStrength(Double_t p)
Definition: DecisionTree.h:154
unsigned int UInt_t
Definition: RtypesCore.h:42
DecisionTree * fTree
Definition: MethodDT.h:120
virtual void ReadXML(void *node, UInt_t tmva_Version_Code=TMVA_VERSION_CODE)
read attributes from XML
Definition: BinaryTree.cxx:145
Float_t fMinNodeSize
Definition: MethodDT.h:125
Double_t TestTreeQuality(DecisionTree *dt)
Definition: MethodDT.cxx:503
Int_t fNCuts
Definition: MethodDT.h:128
#define ClassImp(name)
Definition: Rtypes.h:279
double Double_t
Definition: RtypesCore.h:55
std::vector< TMVA::DecisionTreeNode * > GetOptimalPruneSequence() const
return the prune strength (=alpha) corresponding to the prune sequence
Definition: CCPruner.cxx:216
Bool_t fRandomisedTrees
Definition: MethodDT.h:139
int type
Definition: TGX11.cxx:120
Double_t GetMvaValue(Double_t *err=0, Double_t *errUpper=0)
returns MVA value
Definition: MethodDT.cxx:548
void SetCurrentType(Types::ETreeType type) const
Definition: DataSet.h:114
TString fMinNodeSizeS
Definition: MethodDT.h:126
void AddPreDefVal(const T &)
Definition: Configurable.h:174
void Train(void)
Definition: MethodDT.cxx:372
void ExitFromTraining()
Definition: MethodBase.h:458
MethodDT(const TString &jobName, const TString &methodTitle, DataSetInfo &theData, const TString &theOption="")
the standard constructor for just an ordinar "decision trees"
Definition: MethodDT.cxx:129
const TString & GetOptions() const
Definition: Configurable.h:90
void DeclareCompatibilityOptions()
options that are used ONLY for the READER to ensure backward compatibility they are hence without any...
Definition: MethodDT.cxx:239
#define REGISTER_METHOD(CLASS)
for example
Double_t PruneTree(const EventConstList *validationSample=NULL)
prune (get rid of internal nodes) the Decision tree to avoid overtraining serveral different pruning ...
Abstract ClassifierFactory template that handles arbitrary types.
virtual void DeclareCompatibilityOptions()
options that are used ONLY for the READER to ensure backward compatibility they are hence without any...
Definition: MethodBase.cxx:590
UInt_t BuildTree(const EventConstList &eventSample, DecisionTreeNode *node=NULL)
building the decision tree by recursively calling the splitting of one (root-) node into two daughter...
virtual ~MethodDT(void)
destructor
Definition: MethodDT.cxx:365
void ReadWeightsFromXML(void *wghtnode)
Definition: MethodDT.cxx:528
Long64_t GetNEvents(Types::ETreeType type=Types::kMaxTreeType) const
Definition: DataSet.h:229
Bool_t IsSignal(const Event *ev) const
Double_t Atof() const
Return floating-point value contained in string.
Definition: TString.cxx:2031
Types::EAnalysisType GetAnalysisType() const
Definition: MethodBase.h:433
Bool_t fUseYesNoLeaf
Definition: MethodDT.h:129
const Bool_t kTRUE
Definition: Rtypes.h:91
const Event * GetEvent() const
Definition: DataSet.cxx:211
void GetHelpMessage() const
Definition: MethodDT.cxx:558
void NoErrorCalc(Double_t *const err, Double_t *const errUpper)
Definition: MethodBase.cxx:819
void SetSignalReferenceCut(Double_t cut)
Definition: MethodBase.h:360
const Ranking * CreateRanking()
Definition: MethodDT.cxx:563
void PruneNode(TMVA::DecisionTreeNode *node)
prune away the subtree below the node
SeparationBase * fSepType
Definition: MethodDT.h:122