Logo ROOT   6.14/05
Reference Guide
RuleEnsemble.h
Go to the documentation of this file.
1 // @(#)root/tmva $Id$
2 // Author: Andreas Hoecker, Joerg Stelzer, Fredrik Tegenfeldt, Helge Voss
3 
4 /**********************************************************************************
5  * Project: TMVA - a Root-integrated toolkit for multivariate data analysis *
6  * Package: TMVA *
7  * Class : RuleEnsemble *
8  * Web : http://tmva.sourceforge.net *
9  * *
10  * Description: *
11  * A class generating an ensemble of rules *
12  * Input: a forest of decision trees *
13  * Output: an ensemble of rules *
14  * *
15  * Authors (alphabetical): *
16  * Fredrik Tegenfeldt <Fredrik.Tegenfeldt@cern.ch> - Iowa State U., USA *
17  * Helge Voss <Helge.Voss@cern.ch> - MPI-KP Heidelberg, Ger. *
18  * *
19  * Copyright (c) 2005: *
20  * CERN, Switzerland *
21  * Iowa State U. *
22  * MPI-K Heidelberg, Germany *
23  * *
24  * Redistribution and use in source and binary forms, with or without *
25  * modification, are permitted according to the terms listed in LICENSE *
26  * (http://tmva.sourceforge.net/LICENSE) *
27  **********************************************************************************/
28 
29 #ifndef ROOT_TMVA_RuleEnsemble
30 #define ROOT_TMVA_RuleEnsemble
31 
32 // #if ROOT_VERSION_CODE >= 364802
33 // #ifndef ROOT_TMathBase
34 // #include "TMathBase.h"
35 // #endif
36 // #else
37 #include "TMath.h"
38 // #endif
39 
40 #include "TMVA/DecisionTree.h"
41 #include "TMVA/Event.h"
42 #include "TMVA/Rule.h"
43 #include "TMVA/Types.h"
44 
45 class TH1F;
46 
47 namespace TMVA {
48 
49  class MethodBase;
50  class RuleFit;
51  class MethodRuleFit;
52  class RuleEnsemble;
53  class MsgLogger;
54 
55  std::ostream& operator<<( std::ostream& os, const RuleEnsemble& event );
56 
57  class RuleEnsemble {
58 
59  // output operator for a RuleEnsemble
60  friend std::ostream& operator<< ( std::ostream& os, const RuleEnsemble& rules );
61 
62  public:
63 
64  enum ELearningModel { kFull=0, kRules=1, kLinear=2 };
65 
66  // main constructor
67  RuleEnsemble( RuleFit* rf );
68 
69  // copy constructor
70  RuleEnsemble( const RuleEnsemble& other );
71 
72  // empty constructor
73  RuleEnsemble();
74 
75  // destructor
76  virtual ~RuleEnsemble();
77 
78  // initialize
79  void Initialize( const RuleFit* rf );
80 
81  // set message type
82  void SetMsgType( EMsgType t );
83 
84  // makes the model - calls MakeRules() and MakeLinearTerms()
85  void MakeModel();
86 
87  // generates the rules from a given forest of decision trees
88  void MakeRules( const std::vector< const TMVA::DecisionTree *>& forest );
89 
90  // make the linear terms
91  void MakeLinearTerms();
92 
93  // select linear model
95 
96  // select rule model
98 
99  // select full (linear+rules) model
101 
102  // set rule collection (if not created by MakeRules())
103  void SetRules( const std::vector< TMVA::Rule *> & rules );
104 
105  // set RuleFit ptr
106  void SetRuleFit( const RuleFit *rf ) { fRuleFit = rf; }
107 
108  // set coefficients
109  void SetCoefficients( const std::vector< Double_t >& v );
110  void SetCoefficient( UInt_t i, Double_t v ) { if (i<fRules.size()) fRules[i]->SetCoefficient(v); }
111  //
112  void SetOffset(Double_t v=0.0) { fOffset=v; }
113  void AddOffset(Double_t v) { fOffset+=v; }
114  void SetLinCoefficients( const std::vector< Double_t >& v ) { fLinCoefficients = v; }
116  void SetLinDM( const std::vector<Double_t> & xmin ) { fLinDM = xmin; }
117  void SetLinDP( const std::vector<Double_t> & xmax ) { fLinDP = xmax; }
118  void SetLinNorm( const std::vector<Double_t> & norm ) { fLinNorm = norm; }
119 
120  Double_t CalcLinNorm( Double_t stdev ) { return ( stdev>0 ? fAverageRuleSigma/stdev : 1.0 ); }
121 
122  // clear coefficients
123  void ClearCoefficients( Double_t val=0 ) { for (UInt_t i=0; i<fRules.size(); i++) fRules[i]->SetCoefficient(val); }
124  void ClearLinCoefficients( Double_t val=0 ) { for (UInt_t i=0; i<fLinCoefficients.size(); i++) fLinCoefficients[i]=val; }
125  void ClearLinNorm( Double_t val=1.0 ) { for (UInt_t i=0; i<fLinNorm.size(); i++) fLinNorm[i]=val; }
126 
127  // set maximum allowed distance between equal rules
129 
130  // set minimum rule importance - used by CleanupRules()
131  void SetImportanceCut(Double_t minimp=0) { fImportanceCut=minimp; }
132 
133  // set the quantile for linear terms
135 
136  // set average sigma for rules
137  void SetAverageRuleSigma(Double_t v) { if (v>0.5) v=0.5; fAverageRuleSigma = v; fAverageSupport = 0.5*(1.0+TMath::Sqrt(1.0-4.0*v*v)); }
138 
139  // Calculate the number of possible rules from a given tree
140  Int_t CalcNRules( const TMVA::DecisionTree* dtree );
141  // Recursively search for end-nodes; used by CalcNRules()
142  void FindNEndNodes( const TMVA::Node* node, Int_t& nendnodes );
143 
144  // set current event to be used
145  void SetEvent( const Event & e ) { fEvent = &e; fEventCacheOK = kFALSE; }
146 
147  // fill cached values of rule/linear respons
148  void UpdateEventVal();
149 
150  // fill binary rule respons for all events (or selected subset)
151  void MakeRuleMap(const std::vector<const TMVA::Event *> *events=0, UInt_t ifirst=0, UInt_t ilast=0);
152 
153  // clear rule map
154  void ClearRuleMap() { fRuleMap.clear(); fRuleMapEvents=0; }
155 
156  // evaluates the event using the ensemble of rules
157  // the following uses fEventCache, that is per event saved in cache
158  Double_t EvalEvent() const;
159  Double_t EvalEvent( const Event & e );
160 
161  // same as previous but using other model coefficients
163  const std::vector<Double_t> & coefs,
164  const std::vector<Double_t> & lincoefs) const;
165  Double_t EvalEvent( const Event & e,
166  Double_t ofs,
167  const std::vector<Double_t> & coefs,
168  const std::vector<Double_t> & lincoefs);
169 
170  // same as above but using the event index
171  // these will use fRuleMap - MUST call MakeRuleMap() before - no check...
172  Double_t EvalEvent( UInt_t evtidx ) const;
173  Double_t EvalEvent( UInt_t evtidx,
174  Double_t ofs,
175  const std::vector<Double_t> & coefs,
176  const std::vector<Double_t> & lincoefs) const;
177 
178  // evaluate the linear term using event by reference
179  // Double_t EvalLinEvent( UInt_t vind ) const;
180  Double_t EvalLinEvent() const;
181  Double_t EvalLinEvent( const std::vector<Double_t> & coefs ) const;
182  Double_t EvalLinEvent( const Event &e );
183  Double_t EvalLinEvent( const Event &e, UInt_t vind );
184  Double_t EvalLinEvent( const Event &e, const std::vector<Double_t> & coefs );
185 
186  // idem but using evtidx - must call MakeRuleMap() first
187  Double_t EvalLinEvent( UInt_t evtidx ) const;
188  Double_t EvalLinEvent( UInt_t evtidx, const std::vector<Double_t> & coefs ) const;
189  Double_t EvalLinEvent( UInt_t evtidx, UInt_t vind ) const;
190  Double_t EvalLinEvent( UInt_t evtidx, UInt_t vind, Double_t coefs ) const;
191 
192  // evaluate linear terms used to fill fEventLinearVal
193  Double_t EvalLinEventRaw( UInt_t vind, const Event &e, Bool_t norm ) const;
194  Double_t EvalLinEventRaw( UInt_t vind, UInt_t evtidx, Bool_t norm ) const;
195 
196  // calculate p(y=1|x) for a given event using the linear terms
197  Double_t PdfLinear( Double_t & nsig, Double_t & ntot ) const;
198 
199  // calculate p(y=1|x) for a given event using the rules
200  Double_t PdfRule( Double_t & nsig, Double_t & ntot ) const;
201 
202  // calculate F* = 2*p(y=1|x) - 1
203  Double_t FStar() const;
204  Double_t FStar(const TMVA::Event & e );
205 
206  // set reference importance for all model objects
207  void SetImportanceRef(Double_t impref);
208 
209  // calculates the support for all rules given the set of events
210  void CalcRuleSupport();
211 
212  // calculates rule importance
213  void CalcImportance();
214 
215  // calculates rule importance
217 
218  // calculates linear importance
220 
221  // calculates variable importance
222  void CalcVarImportance();
223 
224  // remove rules of low importance
225  void CleanupRules();
226 
227  // remove linear terms of low importance
228  void CleanupLinear();
229 
230  // remove similar rules
231  void RemoveSimilarRules();
232 
233  // get rule statistics
234  void RuleStatistics();
235 
236  // get rule response stats
237  void RuleResponseStats();
238 
239  // copy operator
240  void operator=( const RuleEnsemble& other ) { Copy( other ); }
241 
242  // calculate sum of the squared coefficients
244 
245  // fill the vector with the coefficients
246  void GetCoefficients( std::vector< Double_t >& v );
247 
248  // accessors
249  const MethodRuleFit* GetMethodRuleFit() const;
250  const MethodBase* GetMethodBase() const;
251  const RuleFit* GetRuleFit() const { return fRuleFit; }
252  //
253  const std::vector<const TMVA::Event *>* GetTrainingEvents() const;
254  const Event* GetTrainingEvent(UInt_t i) const;
255  const Event* GetEvent() const { return fEvent; }
256  //
259  Bool_t DoOnlyRules() const { return (fLearningModel==kRules); }
260  Bool_t DoOnlyLinear() const { return (fLearningModel==kLinear); }
261  Bool_t DoFull() const { return (fLearningModel==kFull); }
265  Double_t GetOffset() const { return fOffset; }
266  UInt_t GetNRules() const { return (DoRules() ? fRules.size():0); }
267  const std::vector<TMVA::Rule*>& GetRulesConst() const { return fRules; }
268  std::vector<TMVA::Rule*>& GetRules() { return fRules; }
269  const std::vector< Double_t >& GetLinCoefficients() const { return fLinCoefficients; }
270  const std::vector< Double_t >& GetLinNorm() const { return fLinNorm; }
271  const std::vector< Double_t >& GetLinImportance() const { return fLinImportance; }
272  const std::vector< Double_t >& GetVarImportance() const { return fVarImportance; }
273  UInt_t GetNLinear() const { return (DoLinear() ? fLinNorm.size():0); }
275 
276  const Rule *GetRulesConst(int i) const { return fRules[i]; }
277  Rule *GetRules(int i) { return fRules[i]; }
278 
279  UInt_t GetRulesNCuts(int i) const { return fRules[i]->GetRuleCut()->GetNcuts(); }
281  Double_t GetLinCoefficients(int i) const { return fLinCoefficients[i]; }
282  Double_t GetLinNorm(int i) const { return fLinNorm[i]; }
283  Double_t GetLinDM(int i) const { return fLinDM[i]; }
284  Double_t GetLinDP(int i) const { return fLinDP[i]; }
285  Double_t GetLinImportance(int i) const { return fLinImportance[i]; }
286  Double_t GetVarImportance(int i) const { return fVarImportance[i]; }
287  Double_t GetRulePTag(int i) const { return fRulePTag[i]; }
288  Double_t GetRulePSS(int i) const { return fRulePSS[i]; }
289  Double_t GetRulePSB(int i) const { return fRulePSB[i]; }
290  Double_t GetRulePBS(int i) const { return fRulePBS[i]; }
291  Double_t GetRulePBB(int i) const { return fRulePBB[i]; }
292 
293  Bool_t IsLinTermOK(int i) const { return fLinTermOK[i]; }
294  //
297  Double_t GetEventRuleVal(UInt_t i) const { return (fEventRuleVal[i] ? 1.0:0.0); }
300  //
301  const std::vector<UInt_t> & GetEventRuleMap(UInt_t evtidx) const { return fRuleMap[evtidx]; }
302  const TMVA::Event *GetRuleMapEvent(UInt_t evtidx) const { return (*fRuleMapEvents)[evtidx]; }
303  Bool_t IsRuleMapOK() const { return fRuleMapOK; }
304 
305  // print rule generation info
306  void PrintRuleGen() const;
307 
308  // print the ensemble
309  void Print() const;
310 
311  // print the model in a cryptic way
312  void PrintRaw ( std::ostream& os ) const; // obsolete
313  void* AddXMLTo ( void* parent ) const;
314 
315  // read the model from input stream
316  void ReadRaw ( std::istream& istr ); // obsolete
317  void ReadFromXML( void* wghtnode );
318 
319 
320  private:
321 
322  // delete all rules
323  void DeleteRules() { for (UInt_t i=0; i<fRules.size(); i++) delete fRules[i]; fRules.clear(); }
324 
325  // copy method
326  void Copy( RuleEnsemble const& other );
327 
328  // set all coeffs to default values
329  void ResetCoefficients();
330 
331  // make rules form one decision tree
332  void MakeRulesFromTree( const DecisionTree *dtree );
333 
334  // add a rule with the given end-node
335  void AddRule( const Node *node );
336 
337  // make a rule
338  Rule *MakeTheRule( const Node *node );
339 
340 
341  ELearningModel fLearningModel; // can be full (rules+linear), rules, linear
342  Double_t fImportanceCut; // minimum importance accepted
343  Double_t fLinQuantile; // quantile cut to remove outliers
344  Double_t fOffset; // offset in discriminator function
345  std::vector< TMVA::Rule* > fRules; // vector of rules
346  std::vector< Char_t > fLinTermOK; // flags linear terms with sufficient strong importance <-- stores boolean
347  std::vector< Double_t > fLinDP; // delta+ in eq 24, ref 2
348  std::vector< Double_t > fLinDM; // delta-
349  std::vector< Double_t > fLinCoefficients; // linear coefficients, one per variable
350  std::vector< Double_t > fLinNorm; // norm of ditto, see after eq 26 in ref 2
351  std::vector< TH1F* > fLinPDFB; // pdfs for each variable, background
352  std::vector< TH1F* > fLinPDFS; // pdfs for each variable, signal
353  std::vector< Double_t > fLinImportance; // linear term importance
354  std::vector< Double_t > fVarImportance; // one importance per input variable
355  Double_t fImportanceRef; // reference importance (max)
356  Double_t fAverageSupport; // average support (over all rules)
357  Double_t fAverageRuleSigma; // average rule sigma
358  //
359  std::vector< Double_t > fRuleVarFrac; // fraction of rules using a given variable - size of vector = n(variables)
360  std::vector< Double_t > fRulePSS; // p(tag as S|S) - tagged as S if rule is SIG and the event is accepted
361  std::vector< Double_t > fRulePSB; // p(tag as S|B)
362  std::vector< Double_t > fRulePBS; // p(tag as B|S)
363  std::vector< Double_t > fRulePBB; // p(tag as B|B)
364  std::vector< Double_t > fRulePTag; // p(tag)
365  Double_t fRuleFSig; // N(sig)/N(sig)+N(bkg)
366  Double_t fRuleNCave; // N(cuts) average
367  Double_t fRuleNCsig; // idem sigma
368  //
369  Double_t fRuleMinDist; // minimum rule distance
370  UInt_t fNRulesGenerated; // number of rules generated, before cleanup
371  //
372  const Event* fEvent; // current event.
373  Bool_t fEventCacheOK; // true if rule/linear respons are updated
374  std::vector<Char_t> fEventRuleVal; // the rule respons of current event <----- stores boolean
375  std::vector<Double_t> fEventLinearVal; // linear respons
376  //
377  Bool_t fRuleMapOK; // true if MakeRuleMap() has been called
378  std::vector< std::vector<UInt_t> > fRuleMap; // map of rule responses
379  UInt_t fRuleMapInd0; // start index
380  UInt_t fRuleMapInd1; // last index
381  const std::vector<const TMVA::Event *> *fRuleMapEvents; // pointer to vector of events used
382  //
383  const RuleFit* fRuleFit; // pointer to rule fit object
384 
385  mutable MsgLogger* fLogger; //! message logger
386  MsgLogger& Log() const { return *fLogger; }
387  };
388 }
389 
390 //_______________________________________________________________________
392 {
393  //
394  // Update rule and linear respons using the current event
395  //
396  if (fEventCacheOK) return;
397  //
398  if (DoRules()) {
399  UInt_t nrules = fRules.size();
400  fEventRuleVal.resize(nrules,kFALSE);
401  for (UInt_t r=0; r<nrules; r++) {
402  fEventRuleVal[r] = fRules[r]->EvalEvent(*fEvent);
403  }
404  }
405  if (DoLinear()) {
406  UInt_t nlin = fLinTermOK.size();
407  fEventLinearVal.resize(nlin,0);
408  for (UInt_t r=0; r<nlin; r++) {
409  fEventLinearVal[r] = EvalLinEventRaw(r,*fEvent,kFALSE); // not normalised!
410  }
411  }
413 }
414 
415 //_____________________________________________________________________
417 {
418  // evaluate current event
419 
420  Int_t nrules = fRules.size();
421  Double_t rval=fOffset;
422  Double_t linear=0;
423  //
424  // evaluate all rules
425  // normally it should NOT use the normalized rules - the flag should be kFALSE
426  //
427  if (DoRules()) {
428  for ( Int_t i=0; i<nrules; i++ ) {
429  if (fEventRuleVal[i])
430  rval += fRules[i]->GetCoefficient();
431  }
432  }
433  //
434  // Include linear part - the call below incorporates both coefficient and normalisation (fLinNorm)
435  //
436  if (DoLinear()) linear = EvalLinEvent();
437  rval +=linear;
438 
439  return rval;
440 }
441 
442 //_____________________________________________________________________
444  const std::vector<Double_t> & coefs,
445  const std::vector<Double_t> & lincoefs ) const
446 {
447  // evaluate current event with given offset and coefs
448 
449  Int_t nrules = fRules.size();
450  Double_t rval = ofs;
451  Double_t linear = 0;
452  //
453  // evaluate all rules
454  //
455  if (DoRules()) {
456  for ( Int_t i=0; i<nrules; i++ ) {
457  if (fEventRuleVal[i])
458  rval += coefs[i];
459  }
460  }
461  //
462  // Include linear part - the call below incorporates both coefficient and normalisation (fLinNorm)
463  //
464  if (DoLinear()) linear = EvalLinEvent(lincoefs);
465  rval +=linear;
466 
467  return rval;
468 }
469 
470 //_____________________________________________________________________
472 {
473  // evaluate event e
474  SetEvent(e);
475  UpdateEventVal();
476  return EvalEvent();
477 }
478 
479 //_____________________________________________________________________
481  Double_t ofs,
482  const std::vector<Double_t> & coefs,
483  const std::vector<Double_t> & lincoefs )
484 {
485  // evaluate event e
486  SetEvent(e);
487  UpdateEventVal();
488  return EvalEvent(ofs,coefs,lincoefs);
489 }
490 
491 //_____________________________________________________________________
493 {
494  // evaluate event with index evtidx
495  if ((evtidx<fRuleMapInd0) || (evtidx>fRuleMapInd1)) return 0;
496  //
497  Double_t rval=fOffset;
498  if (DoRules()) {
499  UInt_t nrules = fRuleMap[evtidx].size();
500  UInt_t rind;
501  for (UInt_t ir = 0; ir<nrules; ir++) {
502  rind = fRuleMap[evtidx][ir];
503  rval += fRules[rind]->GetCoefficient();
504  }
505  }
506  if (DoLinear()) {
507  UInt_t nlin = fLinTermOK.size();
508  for (UInt_t r=0; r<nlin; r++) {
509  if (fLinTermOK[r]) {
510  rval += fLinCoefficients[r] * EvalLinEventRaw(r,*(*fRuleMapEvents)[evtidx],kTRUE);
511  }
512  }
513  }
514  return rval;
515 }
516 
517 //_____________________________________________________________________
519  Double_t ofs,
520  const std::vector<Double_t> & coefs,
521  const std::vector<Double_t> & lincoefs ) const
522 {
523  // evaluate event with index evtidx and user given model coefficients
524  //
525  if ((evtidx<fRuleMapInd0) || (evtidx>fRuleMapInd1)) return 0;
526  Double_t rval=ofs;
527  if (DoRules()) {
528  UInt_t nrules = fRuleMap[evtidx].size();
529  UInt_t rind;
530  for (UInt_t ir = 0; ir<nrules; ir++) {
531  rind = fRuleMap[evtidx][ir];
532  rval += coefs[rind];
533  }
534  }
535  if (DoLinear()) {
536  rval += EvalLinEvent( evtidx, lincoefs );
537  }
538  return rval;
539 }
540 
541 //_______________________________________________________________________
543 {
544  // evaluate the event linearly (not normalized)
545 
546  Double_t val = e.GetValue(vind);
547  Double_t rval = TMath::Min( fLinDP[vind], TMath::Max( fLinDM[vind], val ) );
548  if (norm) rval *= fLinNorm[vind];
549  return rval;
550 }
551 
552 //_______________________________________________________________________
554 {
555  // evaluate the event linearly (not normalized)
556 
557  Double_t val = (*fRuleMapEvents)[evtidx]->GetValue(vind);
558  Double_t rval = TMath::Min( fLinDP[vind], TMath::Max( fLinDM[vind], val ) );
559  if (norm) rval *= fLinNorm[vind];
560  return rval;
561 }
562 
563 //_______________________________________________________________________
565 {
566  // evaluate event linearly
567 
568  Double_t rval=0;
569  for (UInt_t v=0; v<fLinTermOK.size(); v++) {
570  if (fLinTermOK[v])
572  }
573  return rval;
574 }
575 
576 //_______________________________________________________________________
577 inline Double_t TMVA::RuleEnsemble::EvalLinEvent(const std::vector<Double_t> & coefs) const
578 {
579  // evaluate event linearly using the given coefficients
580 
581  Double_t rval=0;
582  for (UInt_t v=0; v<fLinTermOK.size(); v++) {
583  if (fLinTermOK[v])
584  rval += coefs[v]*fEventLinearVal[v]*fLinNorm[v];
585  }
586  return rval;
587 }
588 
589 //_______________________________________________________________________
591 {
592  // evaluate event linearly
593 
594  SetEvent(e);
595  UpdateEventVal();
596  return EvalLinEvent();
597 }
598 
599 //_______________________________________________________________________
601 {
602  // evaluate linear term vind
603 
604  SetEvent(e);
605  UpdateEventVal();
606  return GetEventLinearValNorm(vind);
607 }
608 
609 //_______________________________________________________________________
610 inline Double_t TMVA::RuleEnsemble::EvalLinEvent( const TMVA::Event& e, const std::vector<Double_t> & coefs )
611 {
612  // evaluate event linearly using the given coefficients
613 
614  SetEvent(e);
615  UpdateEventVal();
616  return EvalLinEvent(coefs);
617 }
618 
619 //_______________________________________________________________________
620 inline Double_t TMVA::RuleEnsemble::EvalLinEvent( UInt_t evtidx, const std::vector<Double_t> & coefs ) const
621 {
622  // evaluate event linearly using the given coefficients
623  if ((evtidx<fRuleMapInd0) || (evtidx>fRuleMapInd1)) return 0;
624  Double_t rval=0;
625  UInt_t nlin = fLinTermOK.size();
626  for (UInt_t r=0; r<nlin; r++) {
627  if (fLinTermOK[r]) {
628  rval += coefs[r] * EvalLinEventRaw(r,*(*fRuleMapEvents)[evtidx],kTRUE);
629  }
630  }
631  return rval;
632 }
633 
634 //_______________________________________________________________________
636 {
637  // evaluate event linearly using the given coefficients
638  if ((evtidx<fRuleMapInd0) || (evtidx>fRuleMapInd1)) return 0;
639  Double_t rval=0;
640  UInt_t nlin = fLinTermOK.size();
641  for (UInt_t r=0; r<nlin; r++) {
642  if (fLinTermOK[r]) {
643  rval += fLinCoefficients[r] * EvalLinEventRaw(r,*(*fRuleMapEvents)[evtidx],kTRUE);
644  }
645  }
646  return rval;
647 }
648 
649 //_______________________________________________________________________
651 {
652  // evaluate event linearly using the given coefficients
653  if ((evtidx<fRuleMapInd0) || (evtidx>fRuleMapInd1)) return 0;
654  Double_t rval;
655  rval = fLinCoefficients[vind] * EvalLinEventRaw(vind,*(*fRuleMapEvents)[evtidx],kTRUE);
656  return rval;
657 }
658 
659 //_______________________________________________________________________
661 {
662  // evaluate event linearly using the given coefficients
663  if ((evtidx<fRuleMapInd0) || (evtidx>fRuleMapInd1)) return 0;
664  Double_t rval;
665  rval = coefs * EvalLinEventRaw(vind,*(*fRuleMapEvents)[evtidx],kTRUE);
666  return rval;
667 }
668 
669 #endif
Double_t GetLinCoefficients(int i) const
Definition: RuleEnsemble.h:281
Double_t fAverageSupport
Definition: RuleEnsemble.h:356
MsgLogger * fLogger
Definition: RuleEnsemble.h:385
Double_t GetImportanceRef() const
Definition: RuleEnsemble.h:264
void MakeRuleMap(const std::vector< const TMVA::Event *> *events=0, UInt_t ifirst=0, UInt_t ilast=0)
Makes rule map for all events.
J Friedman&#39;s RuleFit method.
Definition: MethodRuleFit.h:47
float xmin
Definition: THbookFile.cxx:93
void SetLinDP(const std::vector< Double_t > &xmax)
Definition: RuleEnsemble.h:117
Double_t GetRulePBS(int i) const
Definition: RuleEnsemble.h:290
void SetEvent(const Event &e)
Definition: RuleEnsemble.h:145
Double_t GetRulePTag(int i) const
Definition: RuleEnsemble.h:287
void SetLinDM(const std::vector< Double_t > &xmin)
Definition: RuleEnsemble.h:116
void ClearLinNorm(Double_t val=1.0)
Definition: RuleEnsemble.h:125
std::vector< TH1F *> fLinPDFS
Definition: RuleEnsemble.h:352
const std::vector< Double_t > & GetLinNorm() const
Definition: RuleEnsemble.h:270
A class implementing various fits of rule ensembles.
Definition: RuleFit.h:45
Double_t EvalLinEventRaw(UInt_t vind, const Event &e, Bool_t norm) const
Definition: RuleEnsemble.h:542
RuleEnsemble()
constructor
Rule * MakeTheRule(const Node *node)
Make a Rule from a given Node.
UInt_t GetNLinear() const
Definition: RuleEnsemble.h:273
Int_t CalcNRules(const TMVA::DecisionTree *dtree)
calculate the number of rules
Double_t CalcLinNorm(Double_t stdev)
Definition: RuleEnsemble.h:120
const std::vector< UInt_t > & GetEventRuleMap(UInt_t evtidx) const
Definition: RuleEnsemble.h:301
virtual ~RuleEnsemble()
destructor
const std::vector< TMVA::Rule * > & GetRulesConst() const
Definition: RuleEnsemble.h:267
Virtual base Class for all MVA method.
Definition: MethodBase.h:109
Double_t GetAverageRuleSigma() const
Definition: RuleEnsemble.h:296
1-D histogram with a float per channel (see TH1 documentation)}
Definition: TH1.h:567
std::vector< Double_t > fLinDP
Definition: RuleEnsemble.h:347
const std::vector< Double_t > & GetVarImportance() const
Definition: RuleEnsemble.h:272
Short_t Min(Short_t a, Short_t b)
Definition: TMathBase.h:168
int Int_t
Definition: RtypesCore.h:41
bool Bool_t
Definition: RtypesCore.h:59
std::vector< Double_t > fRulePBB
Definition: RuleEnsemble.h:363
const Event * GetEvent() const
Definition: RuleEnsemble.h:255
Double_t GetVarImportance(int i) const
Definition: RuleEnsemble.h:286
void CleanupLinear()
cleanup linear model
const std::vector< Double_t > & GetLinCoefficients() const
Definition: RuleEnsemble.h:269
Double_t GetEventLinearValNorm(UInt_t i) const
Definition: RuleEnsemble.h:299
std::vector< TMVA::Rule *> fRules
Definition: RuleEnsemble.h:345
std::vector< Char_t > fLinTermOK
Definition: RuleEnsemble.h:346
Implementation of a rule.
Definition: Rule.h:48
void SetAverageRuleSigma(Double_t v)
Definition: RuleEnsemble.h:137
Bool_t DoOnlyLinear() const
Definition: RuleEnsemble.h:260
void SetLinCoefficients(const std::vector< Double_t > &v)
Definition: RuleEnsemble.h:114
void SetMsgType(EMsgType t)
void SetImportanceRef(Double_t impref)
set reference importance
void RuleResponseStats()
calculate various statistics for this rule
friend std::ostream & operator<<(std::ostream &os, const RuleEnsemble &rules)
Double_t GetRuleMinDist() const
Definition: RuleEnsemble.h:280
std::vector< Double_t > fLinNorm
Definition: RuleEnsemble.h:350
void RemoveSimilarRules()
remove rules that behave similar
void Copy(RuleEnsemble const &other)
copy function
Double_t GetLinNorm(int i) const
Definition: RuleEnsemble.h:282
Double_t GetEventLinearVal(UInt_t i) const
Definition: RuleEnsemble.h:298
Double_t GetEventRuleVal(UInt_t i) const
Definition: RuleEnsemble.h:297
const std::vector< Double_t > & GetLinImportance() const
Definition: RuleEnsemble.h:271
UInt_t GetNRules() const
Definition: RuleEnsemble.h:266
Double_t PdfRule(Double_t &nsig, Double_t &ntot) const
This function returns Pr( y = 1 | x ) for rules.
Double_t FStar() const
We want to estimate F* = argmin Eyx( L(y,F(x) ), min wrt F(x) F(x) = FL(x) + FR(x) ...
void SetRules(const std::vector< TMVA::Rule *> &rules)
set rules
void Print() const
print function
std::vector< TH1F *> fLinPDFB
Definition: RuleEnsemble.h:351
Bool_t DoOnlyRules() const
Definition: RuleEnsemble.h:259
void SetRuleFit(const RuleFit *rf)
Definition: RuleEnsemble.h:106
std::vector< Double_t > fRulePSB
Definition: RuleEnsemble.h:361
void CalcImportance()
calculate the importance of each rule
ELearningModel GetLearningModel() const
Definition: RuleEnsemble.h:262
std::vector< Double_t > fLinCoefficients
Definition: RuleEnsemble.h:349
void CleanupRules()
cleanup rules
Rule * GetRules(int i)
Definition: RuleEnsemble.h:277
const RuleFit * fRuleFit
Definition: RuleEnsemble.h:383
Double_t GetRulePSB(int i) const
Definition: RuleEnsemble.h:289
Double_t GetImportanceCut() const
Definition: RuleEnsemble.h:263
Double_t GetRulePBB(int i) const
Definition: RuleEnsemble.h:291
UInt_t GetRulesNCuts(int i) const
Definition: RuleEnsemble.h:279
void * AddXMLTo(void *parent) const
write rules to XML
void CalcVarImportance()
Calculates variable importance using eq (35) in RuleFit paper by Friedman et.al.
ROOT::R::TRInterface & r
Definition: Object.C:4
Bool_t DoLinear() const
Definition: RuleEnsemble.h:257
SVector< double, 2 > v
Definition: Dict.h:5
ELearningModel fLearningModel
Definition: RuleEnsemble.h:341
Double_t CalcRuleImportance()
calculate importance of each rule
void PrintRuleGen() const
print rule generation info
std::vector< Double_t > fLinDM
Definition: RuleEnsemble.h:348
void MakeRulesFromTree(const DecisionTree *dtree)
create rules from the decision tree structure
Double_t CoefficientRadius()
Calculates sqrt(Sum(a_i^2)), i=1..N (NOTE do not include a0)
void SetOffset(Double_t v=0.0)
Definition: RuleEnsemble.h:112
void ClearCoefficients(Double_t val=0)
Definition: RuleEnsemble.h:123
const TMVA::Event * GetRuleMapEvent(UInt_t evtidx) const
Definition: RuleEnsemble.h:302
void AddRule(const Node *node)
add a new rule to the tree
Implementation of a Decision Tree.
Definition: DecisionTree.h:64
unsigned int UInt_t
Definition: RtypesCore.h:42
std::ostream & operator<<(std::ostream &os, const BinaryTree &tree)
void SetImportanceCut(Double_t minimp=0)
Definition: RuleEnsemble.h:131
MsgLogger & Log() const
message logger
Definition: RuleEnsemble.h:386
Double_t PdfLinear(Double_t &nsig, Double_t &ntot) const
This function returns Pr( y = 1 | x ) for the linear terms.
void RuleStatistics()
calculate various statistics for this rule
const RuleFit * GetRuleFit() const
Definition: RuleEnsemble.h:251
float xmax
Definition: THbookFile.cxx:93
void ClearLinCoefficients(Double_t val=0)
Definition: RuleEnsemble.h:124
Bool_t IsLinTermOK(int i) const
Definition: RuleEnsemble.h:293
Bool_t IsRuleMapOK() const
Definition: RuleEnsemble.h:303
Double_t fImportanceRef
Definition: RuleEnsemble.h:355
Double_t GetRulePSS(int i) const
Definition: RuleEnsemble.h:288
void ReadFromXML(void *wghtnode)
read rules from XML
const std::vector< const TMVA::Event * > * GetTrainingEvents() const
get list of training events from the rule fitter
const Bool_t kFALSE
Definition: RtypesCore.h:88
Float_t GetValue(UInt_t ivar) const
return value of i&#39;th variable
Definition: Event.cxx:237
void SetLinCoefficient(UInt_t i, Double_t v)
Definition: RuleEnsemble.h:115
Double_t GetLinQuantile() const
Definition: RuleEnsemble.h:274
#define d(i)
Definition: RSha256.hxx:102
void PrintRaw(std::ostream &os) const
write rules to stream
std::vector< Char_t > fEventRuleVal
Definition: RuleEnsemble.h:374
void FindNEndNodes(const TMVA::Node *node, Int_t &nendnodes)
find the number of leaf nodes
void Initialize(const RuleFit *rf)
Initializes all member variables with default values.
void SetCoefficients(const std::vector< Double_t > &v)
set all rule coefficients
double Double_t
Definition: RtypesCore.h:55
std::vector< Double_t > fVarImportance
Definition: RuleEnsemble.h:354
std::vector< Double_t > fEventLinearVal
Definition: RuleEnsemble.h:375
Double_t GetLinDM(int i) const
Definition: RuleEnsemble.h:283
void MakeLinearTerms()
Make the linear terms as in eq 25, ref 2 For this the b and (1-b) quantiles are needed.
std::vector< Double_t > fLinImportance
Definition: RuleEnsemble.h:353
you should not use this method at all Int_t Int_t Double_t Double_t Double_t e
Definition: TRolke.cxx:630
Double_t fAverageRuleSigma
Definition: RuleEnsemble.h:357
void MakeRules(const std::vector< const TMVA::DecisionTree *> &forest)
Makes rules from the given decision tree.
Double_t GetAverageSupport() const
Definition: RuleEnsemble.h:295
std::vector< Double_t > fRulePBS
Definition: RuleEnsemble.h:362
void CalcRuleSupport()
calculate the support for all rules
ostringstream derivative to redirect and format output
Definition: MsgLogger.h:59
const std::vector< const TMVA::Event * > * fRuleMapEvents
Definition: RuleEnsemble.h:381
const MethodBase * GetMethodBase() const
Get a pointer to the original MethodRuleFit.
std::vector< Double_t > fRulePSS
Definition: RuleEnsemble.h:360
void operator=(const RuleEnsemble &other)
Definition: RuleEnsemble.h:240
void MakeModel()
create model
Double_t GetOffset() const
Definition: RuleEnsemble.h:265
Abstract ClassifierFactory template that handles arbitrary types.
Node for the BinarySearch or Decision Trees.
Definition: Node.h:56
Bool_t DoFull() const
Definition: RuleEnsemble.h:261
const Rule * GetRulesConst(int i) const
Definition: RuleEnsemble.h:276
Double_t EvalLinEvent() const
Definition: RuleEnsemble.h:564
Bool_t DoRules() const
Definition: RuleEnsemble.h:258
Short_t Max(Short_t a, Short_t b)
Definition: TMathBase.h:200
const MethodRuleFit * GetMethodRuleFit() const
Get a pointer to the original MethodRuleFit.
void SetLinNorm(const std::vector< Double_t > &norm)
Definition: RuleEnsemble.h:118
void ResetCoefficients()
reset all rule coefficients
Double_t GetLinDP(int i) const
Definition: RuleEnsemble.h:284
std::vector< TMVA::Rule * > & GetRules()
Definition: RuleEnsemble.h:268
void GetCoefficients(std::vector< Double_t > &v)
Retrieve all rule coefficients.
void AddOffset(Double_t v)
Definition: RuleEnsemble.h:113
Double_t EvalEvent() const
Definition: RuleEnsemble.h:416
Double_t Sqrt(Double_t x)
Definition: TMath.h:690
Double_t CalcLinImportance()
calculate the linear importance for each rule
float * q
Definition: THbookFile.cxx:87
void SetCoefficient(UInt_t i, Double_t v)
Definition: RuleEnsemble.h:110
Double_t GetLinImportance(int i) const
Definition: RuleEnsemble.h:285
const Bool_t kTRUE
Definition: RtypesCore.h:87
std::vector< Double_t > fRulePTag
Definition: RuleEnsemble.h:364
void SetRuleMinDist(Double_t d)
Definition: RuleEnsemble.h:128
Double_t fImportanceCut
Definition: RuleEnsemble.h:342
const Event * GetTrainingEvent(UInt_t i) const
get the training event from the rule fitter
std::vector< Double_t > fRuleVarFrac
Definition: RuleEnsemble.h:359
void SetLinQuantile(Double_t q)
Definition: RuleEnsemble.h:134
std::vector< std::vector< UInt_t > > fRuleMap
Definition: RuleEnsemble.h:378
const Event * fEvent
Definition: RuleEnsemble.h:372
void ReadRaw(std::istream &istr)
read rule ensemble from stream