// @(#)root/tmva $Id$
// Author: Andreas Hoecker, Joerg Stelzer, Helge Voss, Kai Voss, Jan Therhaag, Eckhard von Toerne 

/**********************************************************************************
 * Project: TMVA - a Root-integrated toolkit for multivariate data analysis       *
 * Package: TMVA                                                                  *
 * Class  : DecisionTree                                                          *
 * Web    : http://tmva.sourceforge.net                                           *
 *                                                                                *
 * Description:                                                                   *
 *      Implementation of a Decision Tree                                         *
 *                                                                                *
 * Authors (alphabetical):                                                        *
 *      Andreas Hoecker <Andreas.Hocker@cern.ch> - CERN, Switzerland              *
 *      Helge Voss      <Helge.Voss@cern.ch>     - MPI-K Heidelberg, Germany      *
 *      Kai Voss        <Kai.Voss@cern.ch>       - U. of Victoria, Canada         *
 *      Jan Therhaag       <Jan.Therhaag@cern.ch>     - U of Bonn, Germany        *
 *      Eckhard v. Toerne  <evt@uni-bonn.de>          - U of Bonn, Germany        *
 *                                                                                *
 * Copyright (c) 2005-2011:                                                       *
 *      CERN, Switzerland                                                         * 
 *      U. of Victoria, Canada                                                    * 
 *      MPI-K Heidelberg, Germany                                                 * 
 *      U. of Bonn, Germany                                                       *
 *                                                                                *
 * Redistribution and use in source and binary forms, with or without             *
 * modification, are permitted according to the terms listed in LICENSE           *
 * (http://mva.sourceforge.net/license.txt)                                       *
 *                                                                                *
 **********************************************************************************/

#ifndef ROOT_TMVA_DecisionTree
#define ROOT_TMVA_DecisionTree

//////////////////////////////////////////////////////////////////////////
//                                                                      //
// DecisionTree                                                         //
//                                                                      //
// Implementation of a Decision Tree                                    //
//                                                                      //
//////////////////////////////////////////////////////////////////////////

#ifndef ROOT_TH2
#include "TH2.h"
#endif

#ifndef ROOT_TMVA_Types
#include "TMVA/Types.h"
#endif
#ifndef ROOT_TMVA_DecisionTreeNode
#include "TMVA/DecisionTreeNode.h"
#endif
#ifndef ROOT_TMVA_BinaryTree
#include "TMVA/BinaryTree.h"
#endif
#ifndef ROOT_TMVA_BinarySearchTree
#include "TMVA/BinarySearchTree.h"
#endif
#ifndef ROOT_TMVA_SeparationBase
#include "TMVA/SeparationBase.h"
#endif
#ifndef ROOT_TMVA_RegressionVariance
#include "TMVA/RegressionVariance.h"
#endif
#include "TMVA/DataSetInfo.h"

class TRandom3;

namespace TMVA {

   class Event;

   class DecisionTree : public BinaryTree {

   private:

      static const Int_t fgRandomSeed; // set nonzero for debugging and zero for random seeds

   public:

      typedef std::vector<TMVA::Event*> EventList;
      typedef std::vector<const TMVA::Event*> EventConstList;

      // the constructur needed for the "reading" of the decision tree from weight files
      DecisionTree( void );

      // the constructur needed for constructing the decision tree via training with events
      DecisionTree( SeparationBase *sepType, Float_t minSize,
                    Int_t nCuts, DataSetInfo* = NULL,
                    UInt_t cls =0,
                    Bool_t randomisedTree=kFALSE, Int_t useNvars=0, Bool_t usePoissonNvars=kFALSE, 
                    UInt_t nMaxDepth=9999999, 
                    Int_t iSeed=fgRandomSeed, Float_t purityLimit=0.5,
                    Int_t treeID = 0);

      // copy constructor
      DecisionTree (const DecisionTree &d);

      virtual ~DecisionTree( void );

      // Retrieves the address of the root node
      virtual DecisionTreeNode* GetRoot() const { return dynamic_cast<TMVA::DecisionTreeNode*>(fRoot); }
      virtual DecisionTreeNode * CreateNode(UInt_t) const { return new DecisionTreeNode(); }
      virtual BinaryTree* CreateTree() const { return new DecisionTree(); }
      static  DecisionTree* CreateFromXML(void* node, UInt_t tmva_Version_Code = TMVA_VERSION_CODE);
      virtual const char* ClassName() const { return "DecisionTree"; }

      // building of a tree by recursivly splitting the nodes

//      UInt_t BuildTree( const EventList & eventSample,
//                        DecisionTreeNode *node = NULL);
      UInt_t BuildTree( const EventConstList & eventSample,
                        DecisionTreeNode *node = NULL);
      // determine the way how a node is split (which variable, which cut value)

      Double_t TrainNode( const EventConstList & eventSample,  DecisionTreeNode *node ) { return TrainNodeFast( eventSample, node ); }
      Double_t TrainNodeFast( const EventConstList & eventSample,  DecisionTreeNode *node );
      Double_t TrainNodeFull( const EventConstList & eventSample,  DecisionTreeNode *node );
      void    GetRandomisedVariables(Bool_t *useVariable, UInt_t *variableMap, UInt_t & nVars);
      std::vector<Double_t>  GetFisherCoefficients(const EventConstList &eventSample, UInt_t nFisherVars, UInt_t *mapVarInFisher);
    
      // fill at tree with a given structure already (just see how many signa/bkgr
      // events end up in each node

      void FillTree( const EventList & eventSample);

      // fill the existing the decision tree structure by filling event
      // in from the top node and see where they happen to end up
      void FillEvent( const TMVA::Event & event,
                      TMVA::DecisionTreeNode *node  );
    
      // returns: 1 = Signal (right),  -1 = Bkg (left)

      Double_t CheckEvent( const TMVA::Event * , Bool_t UseYesNoLeaf = kFALSE ) const;     
      TMVA::DecisionTreeNode* GetEventNode(const TMVA::Event & e) const;

      // return the individual relative variable importance 
      std::vector< Double_t > GetVariableImportance();

      Double_t GetVariableImportance(UInt_t ivar);
    
      // clear the tree nodes (their S/N, Nevents etc), just keep the structure of the tree

      void ClearTree();
    
      // set pruning method
      enum EPruneMethod { kExpectedErrorPruning=0, kCostComplexityPruning, kNoPruning };
      void SetPruneMethod( EPruneMethod m = kCostComplexityPruning ) { fPruneMethod = m; }
    
      // recursive pruning of the tree, validation sample required for automatic pruning
      Double_t PruneTree( const EventConstList* validationSample = NULL );
    
      // manage the pruning strength parameter (iff < 0 -> automate the pruning process)
      void SetPruneStrength( Double_t p ) { fPruneStrength = p; }
      Double_t GetPruneStrength( ) const { return fPruneStrength; }

      // apply pruning validation sample to a decision tree
      void ApplyValidationSample( const EventConstList* validationSample ) const;
    
      // return the misclassification rate of a pruned tree
      Double_t TestPrunedTreeQuality( const DecisionTreeNode* dt = NULL, Int_t mode=0 ) const;
    
      // pass a single validation event throught a pruned decision tree
      void CheckEventWithPrunedTree( const TMVA::Event* ) const;

      // calculate the normalization factor for a pruning validation sample
      Double_t GetSumWeights( const EventConstList* validationSample ) const;
    
      void SetNodePurityLimit( Double_t p ) { fNodePurityLimit = p; }
      Double_t GetNodePurityLimit( ) const { return fNodePurityLimit; }

      void DescendTree( Node *n = NULL );
      void SetParentTreeInNodes( Node *n = NULL );
        
      // retrieve node from the tree. Its position (up to a maximal tree depth of 64)
      // is coded as a sequence of left-right moves starting from the root, coded as
      // 0-1 bit patterns stored in the "long-integer" together with the depth
      Node* GetNode( ULong_t sequence, UInt_t depth );
    
      UInt_t CleanTree(DecisionTreeNode *node=NULL);
     
      void PruneNode(TMVA::DecisionTreeNode *node);    
    
      // prune a node from the tree without deleting its descendants; allows one to
      // effectively prune a tree many times without making deep copies
      void PruneNodeInPlace( TMVA::DecisionTreeNode* node );

      Int_t GetNNodesBeforePruning(){return (fNNodesBeforePruning)?fNNodesBeforePruning:fNNodesBeforePruning=GetNNodes();}
      

      UInt_t CountLeafNodes(TMVA::Node *n = NULL);

      void  SetTreeID(Int_t treeID){fTreeID = treeID;};
      Int_t GetTreeID(){return fTreeID;};

      Bool_t DoRegression() const { return fAnalysisType == Types::kRegression; }
      void SetAnalysisType (Types::EAnalysisType t) { fAnalysisType = t;}
      Types::EAnalysisType GetAnalysisType ( void ) { return fAnalysisType;}
      inline void SetUseFisherCuts(Bool_t t=kTRUE)  { fUseFisherCuts = t;}
      inline void SetMinLinCorrForFisher(Double_t min){fMinLinCorrForFisher = min;}
      inline void SetUseExclusiveVars(Bool_t t=kTRUE){fUseExclusiveVars = t;}
      inline void SetNVars(Int_t n){fNvars = n;}


   private:
      // utility functions
     
      // calculate the Purity out of the number of sig and bkg events collected
      // from individual samples.
    
      // calculates the purity S/(S+B) of a given event sample
      Double_t SamplePurity(EventList eventSample);

      UInt_t    fNvars;          // number of variables used to separate S and B
      Int_t     fNCuts;          // number of grid point in variable cut scans
      Bool_t    fUseFisherCuts;  // use multivariate splits using the Fisher criterium
      Double_t  fMinLinCorrForFisher; // the minimum linear correlation between two variables demanded for use in fisher criterium in node splitting
      Bool_t    fUseExclusiveVars; // individual variables already used in fisher criterium are not anymore analysed individually for node splitting

      SeparationBase *fSepType;  // the separation crition
      RegressionVariance *fRegType;  // the separation crition used in Regression
    
      Double_t  fMinSize;        // min number of events in node
      Double_t  fMinNodeSize;    // min fraction of training events in node
      Double_t  fMinSepGain;     // min number of separation gain to perform node splitting
    
      Bool_t    fUseSearchTree;  // cut scan done with binary trees or simple event loop.
      Double_t  fPruneStrength;  // a parameter to set the "amount" of pruning..needs to be adjusted 
    
      EPruneMethod fPruneMethod; // method used for prunig 
      Int_t    fNNodesBeforePruning; //remember this one (in case of pruning, it allows to monitor the before/after

      Double_t  fNodePurityLimit;// purity limit to decide whether a node is signal
    
      Bool_t    fRandomisedTree; // choose at each node splitting a random set of variables 
      Int_t     fUseNvars;       // the number of variables used in randomised trees;
      Bool_t    fUsePoissonNvars; // use "fUseNvars" not as fixed number but as mean of a possion distr. in each split
    
      TRandom3  *fMyTrandom;     // random number generator for randomised trees
    
      std::vector< Double_t > fVariableImportance; // the relative importance of the different variables 

      UInt_t     fMaxDepth;      // max depth
      UInt_t     fSigClass;      // class which is treated as signal when building the tree
      static const Int_t  fgDebugLevel = 0;     // debug level determining some printout/control plots etc.
      Int_t     fTreeID;        // just an ID number given to the tree.. makes debugging easier as tree knows who he is.

      Types::EAnalysisType  fAnalysisType;   // kClassification(=0=false) or kRegression(=1=true)

      DataSetInfo*  fDataSetInfo;


      ClassDef(DecisionTree,0)               // implementation of a Decision Tree
   };
  
} // namespace TMVA

#endif 
 DecisionTree.h:1
 DecisionTree.h:2
 DecisionTree.h:3
 DecisionTree.h:4
 DecisionTree.h:5
 DecisionTree.h:6
 DecisionTree.h:7
 DecisionTree.h:8
 DecisionTree.h:9
 DecisionTree.h:10
 DecisionTree.h:11
 DecisionTree.h:12
 DecisionTree.h:13
 DecisionTree.h:14
 DecisionTree.h:15
 DecisionTree.h:16
 DecisionTree.h:17
 DecisionTree.h:18
 DecisionTree.h:19
 DecisionTree.h:20
 DecisionTree.h:21
 DecisionTree.h:22
 DecisionTree.h:23
 DecisionTree.h:24
 DecisionTree.h:25
 DecisionTree.h:26
 DecisionTree.h:27
 DecisionTree.h:28
 DecisionTree.h:29
 DecisionTree.h:30
 DecisionTree.h:31
 DecisionTree.h:32
 DecisionTree.h:33
 DecisionTree.h:34
 DecisionTree.h:35
 DecisionTree.h:36
 DecisionTree.h:37
 DecisionTree.h:38
 DecisionTree.h:39
 DecisionTree.h:40
 DecisionTree.h:41
 DecisionTree.h:42
 DecisionTree.h:43
 DecisionTree.h:44
 DecisionTree.h:45
 DecisionTree.h:46
 DecisionTree.h:47
 DecisionTree.h:48
 DecisionTree.h:49
 DecisionTree.h:50
 DecisionTree.h:51
 DecisionTree.h:52
 DecisionTree.h:53
 DecisionTree.h:54
 DecisionTree.h:55
 DecisionTree.h:56
 DecisionTree.h:57
 DecisionTree.h:58
 DecisionTree.h:59
 DecisionTree.h:60
 DecisionTree.h:61
 DecisionTree.h:62
 DecisionTree.h:63
 DecisionTree.h:64
 DecisionTree.h:65
 DecisionTree.h:66
 DecisionTree.h:67
 DecisionTree.h:68
 DecisionTree.h:69
 DecisionTree.h:70
 DecisionTree.h:71
 DecisionTree.h:72
 DecisionTree.h:73
 DecisionTree.h:74
 DecisionTree.h:75
 DecisionTree.h:76
 DecisionTree.h:77
 DecisionTree.h:78
 DecisionTree.h:79
 DecisionTree.h:80
 DecisionTree.h:81
 DecisionTree.h:82
 DecisionTree.h:83
 DecisionTree.h:84
 DecisionTree.h:85
 DecisionTree.h:86
 DecisionTree.h:87
 DecisionTree.h:88
 DecisionTree.h:89
 DecisionTree.h:90
 DecisionTree.h:91
 DecisionTree.h:92
 DecisionTree.h:93
 DecisionTree.h:94
 DecisionTree.h:95
 DecisionTree.h:96
 DecisionTree.h:97
 DecisionTree.h:98
 DecisionTree.h:99
 DecisionTree.h:100
 DecisionTree.h:101
 DecisionTree.h:102
 DecisionTree.h:103
 DecisionTree.h:104
 DecisionTree.h:105
 DecisionTree.h:106
 DecisionTree.h:107
 DecisionTree.h:108
 DecisionTree.h:109
 DecisionTree.h:110
 DecisionTree.h:111
 DecisionTree.h:112
 DecisionTree.h:113
 DecisionTree.h:114
 DecisionTree.h:115
 DecisionTree.h:116
 DecisionTree.h:117
 DecisionTree.h:118
 DecisionTree.h:119
 DecisionTree.h:120
 DecisionTree.h:121
 DecisionTree.h:122
 DecisionTree.h:123
 DecisionTree.h:124
 DecisionTree.h:125
 DecisionTree.h:126
 DecisionTree.h:127
 DecisionTree.h:128
 DecisionTree.h:129
 DecisionTree.h:130
 DecisionTree.h:131
 DecisionTree.h:132
 DecisionTree.h:133
 DecisionTree.h:134
 DecisionTree.h:135
 DecisionTree.h:136
 DecisionTree.h:137
 DecisionTree.h:138
 DecisionTree.h:139
 DecisionTree.h:140
 DecisionTree.h:141
 DecisionTree.h:142
 DecisionTree.h:143
 DecisionTree.h:144
 DecisionTree.h:145
 DecisionTree.h:146
 DecisionTree.h:147
 DecisionTree.h:148
 DecisionTree.h:149
 DecisionTree.h:150
 DecisionTree.h:151
 DecisionTree.h:152
 DecisionTree.h:153
 DecisionTree.h:154
 DecisionTree.h:155
 DecisionTree.h:156
 DecisionTree.h:157
 DecisionTree.h:158
 DecisionTree.h:159
 DecisionTree.h:160
 DecisionTree.h:161
 DecisionTree.h:162
 DecisionTree.h:163
 DecisionTree.h:164
 DecisionTree.h:165
 DecisionTree.h:166
 DecisionTree.h:167
 DecisionTree.h:168
 DecisionTree.h:169
 DecisionTree.h:170
 DecisionTree.h:171
 DecisionTree.h:172
 DecisionTree.h:173
 DecisionTree.h:174
 DecisionTree.h:175
 DecisionTree.h:176
 DecisionTree.h:177
 DecisionTree.h:178
 DecisionTree.h:179
 DecisionTree.h:180
 DecisionTree.h:181
 DecisionTree.h:182
 DecisionTree.h:183
 DecisionTree.h:184
 DecisionTree.h:185
 DecisionTree.h:186
 DecisionTree.h:187
 DecisionTree.h:188
 DecisionTree.h:189
 DecisionTree.h:190
 DecisionTree.h:191
 DecisionTree.h:192
 DecisionTree.h:193
 DecisionTree.h:194
 DecisionTree.h:195
 DecisionTree.h:196
 DecisionTree.h:197
 DecisionTree.h:198
 DecisionTree.h:199
 DecisionTree.h:200
 DecisionTree.h:201
 DecisionTree.h:202
 DecisionTree.h:203
 DecisionTree.h:204
 DecisionTree.h:205
 DecisionTree.h:206
 DecisionTree.h:207
 DecisionTree.h:208
 DecisionTree.h:209
 DecisionTree.h:210
 DecisionTree.h:211
 DecisionTree.h:212
 DecisionTree.h:213
 DecisionTree.h:214
 DecisionTree.h:215
 DecisionTree.h:216
 DecisionTree.h:217
 DecisionTree.h:218
 DecisionTree.h:219
 DecisionTree.h:220
 DecisionTree.h:221
 DecisionTree.h:222
 DecisionTree.h:223
 DecisionTree.h:224
 DecisionTree.h:225
 DecisionTree.h:226
 DecisionTree.h:227
 DecisionTree.h:228
 DecisionTree.h:229
 DecisionTree.h:230
 DecisionTree.h:231
 DecisionTree.h:232
 DecisionTree.h:233
 DecisionTree.h:234
 DecisionTree.h:235
 DecisionTree.h:236
 DecisionTree.h:237
 DecisionTree.h:238
 DecisionTree.h:239
 DecisionTree.h:240
 DecisionTree.h:241
 DecisionTree.h:242
 DecisionTree.h:243
 DecisionTree.h:244
 DecisionTree.h:245
 DecisionTree.h:246
 DecisionTree.h:247
 DecisionTree.h:248
 DecisionTree.h:249
 DecisionTree.h:250
 DecisionTree.h:251
 DecisionTree.h:252
 DecisionTree.h:253
 DecisionTree.h:254
 DecisionTree.h:255
 DecisionTree.h:256
 DecisionTree.h:257
 DecisionTree.h:258