Logo ROOT   6.10/09
Reference Guide
RuleFit.h
Go to the documentation of this file.
1 // @(#)root/tmva $Id$
2 // Author: Andreas Hoecker, Joerg Stelzer, Fredrik Tegenfeldt, Helge Voss
3 
4 /**********************************************************************************
5  * Project: TMVA - a Root-integrated toolkit for multivariate data analysis *
6  * Package: TMVA *
7  * Class : RuleFit *
8  * Web : http://tmva.sourceforge.net *
9  * *
10  * Description: *
11  * A class implementing various fits of rule ensembles *
12  * *
13  * Authors (alphabetical): *
14  * Fredrik Tegenfeldt <Fredrik.Tegenfeldt@cern.ch> - Iowa State U., USA *
15  * Helge Voss <Helge.Voss@cern.ch> - MPI-KP Heidelberg, Ger. *
16  * *
17  * Copyright (c) 2005: *
18  * CERN, Switzerland *
19  * Iowa State U. *
20  * MPI-K Heidelberg, Germany *
21  * *
22  * Redistribution and use in source and binary forms, with or without *
23  * modification, are permitted according to the terms listed in LICENSE *
24  * (http://tmva.sourceforge.net/LICENSE) *
25  **********************************************************************************/
26 
27 #ifndef ROOT_TMVA_RuleFit
28 #define ROOT_TMVA_RuleFit
29 
30 #include <algorithm>
31 
32 #include "TMVA/DecisionTree.h"
33 #include "TMVA/RuleEnsemble.h"
34 #include "TMVA/RuleFitParams.h"
35 #include "TMVA/Event.h"
36 
37 namespace TMVA {
38 
39 
40  class MethodBase;
41  class MethodRuleFit;
42  class MsgLogger;
43 
44  class RuleFit {
45 
46  public:
47 
48  // main constructor
49  RuleFit( const TMVA::MethodBase *rfbase );
50 
51  // empty constructor
52  RuleFit( void );
53 
54  virtual ~RuleFit( void );
55 
56  void InitNEveEff();
57  void InitPtrs( const TMVA::MethodBase *rfbase );
58  void Initialize( const TMVA::MethodBase *rfbase );
59 
60  void SetMsgType( EMsgType t );
61 
62  void SetTrainingEvents( const std::vector<const TMVA::Event *> & el );
63 
64  void ReshuffleEvents() { std::random_shuffle(fTrainingEventsRndm.begin(),fTrainingEventsRndm.end()); }
65 
66  void SetMethodBase( const MethodBase *rfbase );
67 
68  // make the forest of trees for rule generation
69  void MakeForest();
70 
71  // build a tree
72  void BuildTree( TMVA::DecisionTree *dt );
73 
74  // save event weights
75  void SaveEventWeights();
76 
77  // restore saved event weights
78  void RestoreEventWeights();
79 
80  // boost events based on the given tree
81  void Boost( TMVA::DecisionTree *dt );
82 
83  // calculate and print some statistics on the given forest
84  void ForestStatistics();
85 
86  // calculate the discriminating variable for the given event
87  Double_t EvalEvent( const Event& e );
88 
89  // calculate sum of
90  Double_t CalcWeightSum( const std::vector<const TMVA::Event *> *events, UInt_t neve=0 );
91 
92  // do the fitting of the coefficients
93  void FitCoefficients();
94 
95  // calculate variable and rule importance from a set of events
96  void CalcImportance();
97 
98  // set usage of linear term
100  // set usage of rules
102  // set usage of linear term
104  // set minimum importance allowed
106  // set minimum rule distance - see RuleEnsemble
108  // set path related parameters
109  void SetGDTau( Double_t t=0.0 ) { fRuleFitParams.SetGDTau(t); }
112  // make visualization histograms
116  void MakeVisHists();
117  void FillVisHistCut(const Rule * rule, std::vector<TH2F *> & hlist);
118  void FillVisHistCorr(const Rule * rule, std::vector<TH2F *> & hlist);
119  void FillCut(TH2F* h2,const TMVA::Rule *rule,Int_t vind);
120  void FillLin(TH2F* h2,Int_t vind);
121  void FillCorr(TH2F* h2,const TMVA::Rule *rule,Int_t v1, Int_t v2);
122  void NormVisHists(std::vector<TH2F *> & hlist);
123  void MakeDebugHists();
124  Bool_t GetCorrVars(TString & title, TString & var1, TString & var2);
125  // accessors
126  UInt_t GetNTreeSample() const { return fNTreeSample; }
127  Double_t GetNEveEff() const { return fNEveEffTrain; } // reweighted number of events = sum(wi)
128  const Event* GetTrainingEvent(UInt_t i) const { return static_cast< const Event *>(fTrainingEvents[i]); }
129  Double_t GetTrainingEventWeight(UInt_t i) const { return fTrainingEvents[i]->GetWeight(); }
130 
131  // const Event* GetTrainingEvent(UInt_t i, UInt_t isub) const { return &(fTrainingEvents[fSubsampleEvents[isub]])[i]; }
132 
133  const std::vector< const TMVA::Event * > & GetTrainingEvents() const { return fTrainingEvents; }
134  // const std::vector< Int_t > & GetSubsampleEvents() const { return fSubsampleEvents; }
135 
136  // void GetSubsampleEvents(Int_t sub, UInt_t & ibeg, UInt_t & iend) const;
137  void GetRndmSampleEvents(std::vector< const TMVA::Event * > & evevec, UInt_t nevents);
138  //
139  const std::vector< const TMVA::DecisionTree *> & GetForest() const { return fForest; }
140  const RuleEnsemble & GetRuleEnsemble() const { return fRuleEnsemble; }
142  const RuleFitParams & GetRuleFitParams() const { return fRuleFitParams; }
144  const MethodRuleFit * GetMethodRuleFit() const { return fMethodRuleFit; }
145  const MethodBase * GetMethodBase() const { return fMethodBase; }
146 
147  private:
148 
149  // copy constructor
150  RuleFit( const RuleFit & other );
151 
152  // copy method
153  void Copy( const RuleFit & other );
154 
155  std::vector<const TMVA::Event *> fTrainingEvents; // all training events
156  std::vector<const TMVA::Event *> fTrainingEventsRndm; // idem, but randomly shuffled
157  std::vector<Double_t> fEventWeights; // original weights of the events - follows fTrainingEvents
158  UInt_t fNTreeSample; // number of events in sub sample = frac*neve
159 
160  Double_t fNEveEffTrain; // reweighted number of events = sum(wi)
161  std::vector< const TMVA::DecisionTree *> fForest; // the input forest of decision trees
162  RuleEnsemble fRuleEnsemble; // the ensemble of rules
163  RuleFitParams fRuleFitParams; // fit rule parameters
164  const MethodRuleFit *fMethodRuleFit; // pointer the method which initialized this RuleFit instance
165  const MethodBase *fMethodBase; // pointer the method base which initialized this RuleFit instance
166  Bool_t fVisHistsUseImp; // if true, use importance as weight; else coef in vis hists
167 
168  mutable MsgLogger* fLogger; // message logger
169  MsgLogger& Log() const { return *fLogger; }
170 
171  static const Int_t randSEED = 0; // set to 1 for debugging purposes or to zero for random seeds
172 
173  ClassDef(RuleFit,0); // Calculations for Friedman's RuleFit method
174  };
175 }
176 
177 #endif
std::vector< const TMVA::Event * > fTrainingEventsRndm
Definition: RuleFit.h:156
void ForestStatistics()
summary of statistics of all trees
Definition: RuleFit.cxx:385
A class doing the actual fitting of a linear model using rules as base functions. ...
Definition: RuleFitParams.h:53
void MakeForest()
make a forest of decisiontrees
Definition: RuleFit.cxx:221
J Friedman&#39;s RuleFit method.
Definition: MethodRuleFit.h:47
const RuleEnsemble & GetRuleEnsemble() const
Definition: RuleFit.h:140
void SetVisHistsUseImp(Bool_t f)
Definition: RuleFit.h:113
void SetGDTau(Double_t t=0.0)
Definition: RuleFit.h:109
void CalcImportance()
calculates the importance of each rule
Definition: RuleFit.cxx:417
const std::vector< const TMVA::Event *> & GetTrainingEvents() const
Definition: RuleFit.h:133
A class implementing various fits of rule ensembles.
Definition: RuleFit.h:44
MsgLogger & Log() const
Definition: RuleFit.h:169
void NormVisHists(std::vector< TH2F *> &hlist)
normalize rule importance hists
Definition: RuleFit.cxx:485
void SetGDTau(Double_t t)
Definition: RuleFitParams.h:86
void SetMsgType(EMsgType t)
set the current message type to that of mlog for this class and all other subtools ...
Definition: RuleFit.cxx:190
const MethodBase * fMethodBase
Definition: RuleFit.h:165
Virtual base Class for all MVA method.
Definition: MethodBase.h:106
Bool_t GetCorrVars(TString &title, TString &var1, TString &var2)
get first and second variables from title
Definition: RuleFit.cxx:753
void InitNEveEff()
init effective number of events (using event weights)
Definition: RuleFit.cxx:97
void FitCoefficients()
Fit the coefficients for the rule ensemble.
Definition: RuleFit.cxx:408
Basic string class.
Definition: TString.h:129
int Int_t
Definition: RtypesCore.h:41
bool Bool_t
Definition: RtypesCore.h:59
void SetModelFull()
Definition: RuleFit.h:103
std::vector< Double_t > fEventWeights
Definition: RuleFit.h:157
Double_t fNEveEffTrain
Definition: RuleFit.h:160
const Event * GetTrainingEvent(UInt_t i) const
Definition: RuleFit.h:128
void SetTrainingEvents(const std::vector< const TMVA::Event *> &el)
set the training events randomly
Definition: RuleFit.cxx:439
void SetModelLinear()
Definition: RuleFit.h:99
void SetModelRules()
Definition: RuleFit.h:101
Implementation of a rule.
Definition: Rule.h:48
const std::vector< const TMVA::DecisionTree * > & GetForest() const
Definition: RuleFit.h:139
RuleFit(void)
default constructor
Definition: RuleFit.cxx:75
#define ClassDef(name, id)
Definition: Rtypes.h:297
void BuildTree(TMVA::DecisionTree *dt)
build the decision tree using fNTreeSample events from fTrainingEventsRndm
Definition: RuleFit.cxx:200
void UseImportanceVisHists()
Definition: RuleFit.h:114
void ReshuffleEvents()
Definition: RuleFit.h:64
void GetRndmSampleEvents(std::vector< const TMVA::Event * > &evevec, UInt_t nevents)
draw a random subsample of the training events without replacement
Definition: RuleFit.cxx:466
const RuleFitParams & GetRuleFitParams() const
Definition: RuleFit.h:142
virtual ~RuleFit(void)
destructor
Definition: RuleFit.cxx:89
RuleEnsemble * GetRuleEnsemblePtr()
Definition: RuleFit.h:141
void SetGDNPathSteps(Int_t n=100)
Definition: RuleFit.h:111
void SetMethodBase(const MethodBase *rfbase)
set MethodBase
Definition: RuleFit.cxx:150
void SetGDNPathSteps(Int_t np)
Definition: RuleFitParams.h:69
UInt_t fNTreeSample
Definition: RuleFit.h:158
void RestoreEventWeights()
save event weights - must be done before making the forest
Definition: RuleFit.cxx:320
const MethodBase * GetMethodBase() const
Definition: RuleFit.h:145
RuleFitParams * GetRuleFitParamsPtr()
Definition: RuleFit.h:143
void Copy(const RuleFit &other)
copy method
Definition: RuleFit.cxx:159
void FillVisHistCut(const Rule *rule, std::vector< TH2F *> &hlist)
help routine to MakeVisHists() - fills for all variables
Definition: RuleFit.cxx:683
void FillCorr(TH2F *h2, const TMVA::Rule *rule, Int_t v1, Int_t v2)
fill rule correlation between vx and vy, weighted with either the importance or the coefficient ...
Definition: RuleFit.cxx:607
void MakeDebugHists()
this will create a histograms intended rather for debugging or for the curious user ...
Definition: RuleFit.cxx:936
static const Int_t randSEED
Definition: RuleFit.h:171
tomato 2-D histogram with a float per channel (see TH1 documentation)}
Definition: TH2.h:249
Implementation of a Decision Tree.
Definition: DecisionTree.h:59
unsigned int UInt_t
Definition: RtypesCore.h:42
void SetImportanceCut(Double_t minimp=0)
Definition: RuleEnsemble.h:131
void FillLin(TH2F *h2, Int_t vind)
fill lin
Definition: RuleFit.cxx:583
RuleEnsemble fRuleEnsemble
Definition: RuleFit.h:162
void Boost(TMVA::DecisionTree *dt)
Boost the events.
Definition: RuleFit.cxx:338
const Bool_t kFALSE
Definition: RtypesCore.h:92
void SetGDPathStep(Double_t s=0.01)
Definition: RuleFit.h:110
void SaveEventWeights()
save event weights - must be done before making the forest
Definition: RuleFit.cxx:308
Double_t GetNEveEff() const
Definition: RuleFit.h:127
double f(double x)
Double_t GetTrainingEventWeight(UInt_t i) const
Definition: RuleFit.h:129
double Double_t
Definition: RtypesCore.h:55
const MethodRuleFit * fMethodRuleFit
Definition: RuleFit.h:164
void MakeVisHists()
this will create histograms visualizing the rule ensemble
Definition: RuleFit.cxx:776
you should not use this method at all Int_t Int_t Double_t Double_t Double_t e
Definition: TRolke.cxx:630
void FillVisHistCorr(const Rule *rule, std::vector< TH2F *> &hlist)
help routine to MakeVisHists() - fills for all correlation plots
Definition: RuleFit.cxx:714
void InitPtrs(const TMVA::MethodBase *rfbase)
initialize pointers
Definition: RuleFit.cxx:109
ostringstream derivative to redirect and format output
Definition: MsgLogger.h:59
void SetRuleMinDist(Double_t d)
Definition: RuleFit.h:107
Abstract ClassifierFactory template that handles arbitrary types.
void FillCut(TH2F *h2, const TMVA::Rule *rule, Int_t vind)
Fill cut.
Definition: RuleFit.cxx:532
Bool_t fVisHistsUseImp
Definition: RuleFit.h:166
RuleFitParams fRuleFitParams
Definition: RuleFit.h:163
Double_t EvalEvent(const Event &e)
evaluate single event
Definition: RuleFit.cxx:431
std::vector< const TMVA::DecisionTree * > fForest
Definition: RuleFit.h:161
void UseCoefficientsVisHists()
Definition: RuleFit.h:115
void Initialize(const TMVA::MethodBase *rfbase)
initialize the parameters of the RuleFit method and make rules
Definition: RuleFit.cxx:119
Double_t CalcWeightSum(const std::vector< const TMVA::Event *> *events, UInt_t neve=0)
calculate the sum of weights
Definition: RuleFit.cxx:175
const MethodRuleFit * GetMethodRuleFit() const
Definition: RuleFit.h:144
std::vector< const TMVA::Event * > fTrainingEvents
Definition: RuleFit.h:155
UInt_t GetNTreeSample() const
Definition: RuleFit.h:126
const Bool_t kTRUE
Definition: RtypesCore.h:91
const Int_t n
Definition: legend1.C:16
void SetGDPathStep(Double_t s)
Definition: RuleFitParams.h:72
void SetRuleMinDist(Double_t d)
Definition: RuleEnsemble.h:128
void SetImportanceCut(Double_t minimp=0)
Definition: RuleFit.h:105
MsgLogger * fLogger
Definition: RuleFit.h:168