ROOT  6.06/09
Reference Guide
RuleEnsemble.h
Go to the documentation of this file.
1 // @(#)root/tmva $Id$
2 // Author: Andreas Hoecker, Joerg Stelzer, Fredrik Tegenfeldt, Helge Voss
3 
4 /**********************************************************************************
5  * Project: TMVA - a Root-integrated toolkit for multivariate data analysis *
6  * Package: TMVA *
7  * Class : RuleEnsemble *
8  * Web : http://tmva.sourceforge.net *
9  * *
10  * Description: *
11  * A class generating an ensemble of rules *
12  * Input: a forest of decision trees *
13  * Output: an ensemble of rules *
14  * *
15  * Authors (alphabetical): *
16  * Fredrik Tegenfeldt <Fredrik.Tegenfeldt@cern.ch> - Iowa State U., USA *
17  * Helge Voss <Helge.Voss@cern.ch> - MPI-KP Heidelberg, Ger. *
18  * *
19  * Copyright (c) 2005: *
20  * CERN, Switzerland *
21  * Iowa State U. *
22  * MPI-K Heidelberg, Germany *
23  * *
24  * Redistribution and use in source and binary forms, with or without *
25  * modification, are permitted according to the terms listed in LICENSE *
26  * (http://tmva.sourceforge.net/LICENSE) *
27  **********************************************************************************/
28 
29 #ifndef ROOT_TMVA_RuleEnsemble
30 #define ROOT_TMVA_RuleEnsemble
31 
32 // #if ROOT_VERSION_CODE >= 364802
33 // #ifndef ROOT_TMathBase
34 // #include "TMathBase.h"
35 // #endif
36 // #else
37 #ifndef ROOT_TMath
38 #include "TMath.h"
39 #endif
40 // #endif
41 
42 #ifndef ROOT_TMVA_DecisionTree
43 #include "TMVA/DecisionTree.h"
44 #endif
45 #ifndef ROOT_TMVA_Event
46 #include "TMVA/Event.h"
47 #endif
48 #ifndef ROOT_TMVA_Rule
49 #include "TMVA/Rule.h"
50 #endif
51 #ifndef ROOT_TMVA_Types
52 #include "TMVA/Types.h"
53 #endif
54 
55 class TH1F;
56 
57 namespace TMVA {
58 
59  class MethodBase;
60  class RuleFit;
61  class MethodRuleFit;
62  class RuleEnsemble;
63  class MsgLogger;
64 
65  std::ostream& operator<<( std::ostream& os, const RuleEnsemble& event );
66 
67  class RuleEnsemble {
68 
69  // output operator for a RuleEnsemble
70  friend std::ostream& operator<< ( std::ostream& os, const RuleEnsemble& rules );
71 
72  public:
73 
74  enum ELearningModel { kFull=0, kRules=1, kLinear=2 };
75 
76  // main constructor
77  RuleEnsemble( RuleFit* rf );
78 
79  // copy constructor
80  RuleEnsemble( const RuleEnsemble& other );
81 
82  // empty constructor
83  RuleEnsemble();
84 
85  // destructor
86  virtual ~RuleEnsemble();
87 
88  // initialize
89  void Initialize( const RuleFit* rf );
90 
91  // set message type
92  void SetMsgType( EMsgType t );
93 
94  // makes the model - calls MakeRules() and MakeLinearTerms()
95  void MakeModel();
96 
97  // generates the rules from a given forest of decision trees
98  void MakeRules( const std::vector< const TMVA::DecisionTree *>& forest );
99 
100  // make the linear terms
101  void MakeLinearTerms();
102 
103  // select linear model
105 
106  // select rule model
108 
109  // select full (linear+rules) model
111 
112  // set rule collection (if not created by MakeRules())
113  void SetRules( const std::vector< TMVA::Rule *> & rules );
114 
115  // set RuleFit ptr
116  void SetRuleFit( const RuleFit *rf ) { fRuleFit = rf; }
117 
118  // set coefficients
119  void SetCoefficients( const std::vector< Double_t >& v );
120  void SetCoefficient( UInt_t i, Double_t v ) { if (i<fRules.size()) fRules[i]->SetCoefficient(v); }
121  //
122  void SetOffset(Double_t v=0.0) { fOffset=v; }
124  void SetLinCoefficients( const std::vector< Double_t >& v ) { fLinCoefficients = v; }
126  void SetLinDM( const std::vector<Double_t> & xmin ) { fLinDM = xmin; }
127  void SetLinDP( const std::vector<Double_t> & xmax ) { fLinDP = xmax; }
128  void SetLinNorm( const std::vector<Double_t> & norm ) { fLinNorm = norm; }
129 
130  Double_t CalcLinNorm( Double_t stdev ) { return ( stdev>0 ? fAverageRuleSigma/stdev : 1.0 ); }
131 
132  // clear coefficients
133  void ClearCoefficients( Double_t val=0 ) { for (UInt_t i=0; i<fRules.size(); i++) fRules[i]->SetCoefficient(val); }
134  void ClearLinCoefficients( Double_t val=0 ) { for (UInt_t i=0; i<fLinCoefficients.size(); i++) fLinCoefficients[i]=val; }
135  void ClearLinNorm( Double_t val=1.0 ) { for (UInt_t i=0; i<fLinNorm.size(); i++) fLinNorm[i]=val; }
136 
137  // set maximum allowed distance between equal rules
139 
140  // set minimum rule importance - used by CleanupRules()
141  void SetImportanceCut(Double_t minimp=0) { fImportanceCut=minimp; }
142 
143  // set the quantile for linear terms
145 
146  // set average sigma for rules
147  void SetAverageRuleSigma(Double_t v) { if (v>0.5) v=0.5; fAverageRuleSigma = v; fAverageSupport = 0.5*(1.0+TMath::Sqrt(1.0-4.0*v*v)); }
148 
149  // Calculate the number of possible rules from a given tree
150  Int_t CalcNRules( const TMVA::DecisionTree* dtree );
151  // Recursivly search for end-nodes; used by CalcNRules()
152  void FindNEndNodes( const TMVA::Node* node, Int_t& nendnodes );
153 
154  // set current event to be used
155  void SetEvent( const Event & e ) { fEvent = &e; fEventCacheOK = kFALSE; }
156 
157  // fill cached values of rule/linear respons
158  void UpdateEventVal();
159 
160  // fill binary rule respons for all events (or selected subset)
161  void MakeRuleMap(const std::vector<const TMVA::Event *> *events=0, UInt_t ifirst=0, UInt_t ilast=0);
162 
163  // clear rule map
164  void ClearRuleMap() { fRuleMap.clear(); fRuleMapEvents=0; }
165 
166  // evaluates the event using the ensemble of rules
167  // the following uses fEventCache, that is per event saved in cache
168  Double_t EvalEvent() const;
169  Double_t EvalEvent( const Event & e );
170 
171  // same as previous but using other model coefficients
173  const std::vector<Double_t> & coefs,
174  const std::vector<Double_t> & lincoefs) const;
175  Double_t EvalEvent( const Event & e,
176  Double_t ofs,
177  const std::vector<Double_t> & coefs,
178  const std::vector<Double_t> & lincoefs);
179 
180  // same as above but using the event index
181  // these will use fRuleMap - MUST call MakeRuleMap() before - no check...
182  Double_t EvalEvent( UInt_t evtidx ) const;
183  Double_t EvalEvent( UInt_t evtidx,
184  Double_t ofs,
185  const std::vector<Double_t> & coefs,
186  const std::vector<Double_t> & lincoefs) const;
187 
188  // evaluate the linear term using event by reference
189  // Double_t EvalLinEvent( UInt_t vind ) const;
190  Double_t EvalLinEvent() const;
191  Double_t EvalLinEvent( const std::vector<Double_t> & coefs ) const;
192  Double_t EvalLinEvent( const Event &e );
193  Double_t EvalLinEvent( const Event &e, UInt_t vind );
194  Double_t EvalLinEvent( const Event &e, const std::vector<Double_t> & coefs );
195 
196  // idem but using evtidx - must call MakeRuleMap() first
197  Double_t EvalLinEvent( UInt_t evtidx ) const;
198  Double_t EvalLinEvent( UInt_t evtidx, const std::vector<Double_t> & coefs ) const;
199  Double_t EvalLinEvent( UInt_t evtidx, UInt_t vind ) const;
200  Double_t EvalLinEvent( UInt_t evtidx, UInt_t vind, Double_t coefs ) const;
201 
202  // evaluate linear terms used to fill fEventLinearVal
203  Double_t EvalLinEventRaw( UInt_t vind, const Event &e, Bool_t norm ) const;
204  Double_t EvalLinEventRaw( UInt_t vind, UInt_t evtidx, Bool_t norm ) const;
205 
206  // calculate p(y=1|x) for a given event using the linear terms
207  Double_t PdfLinear( Double_t & nsig, Double_t & ntot ) const;
208 
209  // calculate p(y=1|x) for a given event using the rules
210  Double_t PdfRule( Double_t & nsig, Double_t & ntot ) const;
211 
212  // calculate F* = 2*p(y=1|x) - 1
213  Double_t FStar() const;
214  Double_t FStar(const TMVA::Event & e );
215 
216  // set reference importance for all model objects
217  void SetImportanceRef(Double_t impref);
218 
219  // calculates the support for all rules given the set of events
220  void CalcRuleSupport();
221 
222  // calculates rule importance
223  void CalcImportance();
224 
225  // calculates rule importance
227 
228  // calculates linear importance
230 
231  // calculates variable importance
232  void CalcVarImportance();
233 
234  // remove rules of low importance
235  void CleanupRules();
236 
237  // remove linear terms of low importance
238  void CleanupLinear();
239 
240  // remove similar rules
241  void RemoveSimilarRules();
242 
243  // get rule statistics
244  void RuleStatistics();
245 
246  // get rule response stats
247  void RuleResponseStats();
248 
249  // copy operator
250  void operator=( const RuleEnsemble& other ) { Copy( other ); }
251 
252  // calculate sum of the squared coefficents
254 
255  // fill the vector with the coefficients
256  void GetCoefficients( std::vector< Double_t >& v );
257 
258  // accessors
259  const MethodRuleFit* GetMethodRuleFit() const;
260  const MethodBase* GetMethodBase() const;
261  const RuleFit* GetRuleFit() const { return fRuleFit; }
262  //
263  const std::vector<const TMVA::Event *>* GetTrainingEvents() const;
264  const Event* GetTrainingEvent(UInt_t i) const;
265  const Event* GetEvent() const { return fEvent; }
266  //
269  Bool_t DoOnlyRules() const { return (fLearningModel==kRules); }
270  Bool_t DoOnlyLinear() const { return (fLearningModel==kLinear); }
271  Bool_t DoFull() const { return (fLearningModel==kFull); }
275  Double_t GetOffset() const { return fOffset; }
276  UInt_t GetNRules() const { return (DoRules() ? fRules.size():0); }
277  const std::vector<TMVA::Rule*>& GetRulesConst() const { return fRules; }
278  std::vector<TMVA::Rule*>& GetRules() { return fRules; }
279  const std::vector< Double_t >& GetLinCoefficients() const { return fLinCoefficients; }
280  const std::vector< Double_t >& GetLinNorm() const { return fLinNorm; }
281  const std::vector< Double_t >& GetLinImportance() const { return fLinImportance; }
282  const std::vector< Double_t >& GetVarImportance() const { return fVarImportance; }
283  UInt_t GetNLinear() const { return (DoLinear() ? fLinNorm.size():0); }
285 
286  const Rule *GetRulesConst(int i) const { return fRules[i]; }
287  Rule *GetRules(int i) { return fRules[i]; }
288 
289  UInt_t GetRulesNCuts(int i) const { return fRules[i]->GetRuleCut()->GetNcuts(); }
291  Double_t GetLinCoefficients(int i) const { return fLinCoefficients[i]; }
292  Double_t GetLinNorm(int i) const { return fLinNorm[i]; }
293  Double_t GetLinDM(int i) const { return fLinDM[i]; }
294  Double_t GetLinDP(int i) const { return fLinDP[i]; }
295  Double_t GetLinImportance(int i) const { return fLinImportance[i]; }
296  Double_t GetVarImportance(int i) const { return fVarImportance[i]; }
297  Double_t GetRulePTag(int i) const { return fRulePTag[i]; }
298  Double_t GetRulePSS(int i) const { return fRulePSS[i]; }
299  Double_t GetRulePSB(int i) const { return fRulePSB[i]; }
300  Double_t GetRulePBS(int i) const { return fRulePBS[i]; }
301  Double_t GetRulePBB(int i) const { return fRulePBB[i]; }
302 
303  Bool_t IsLinTermOK(int i) const { return fLinTermOK[i]; }
304  //
307  Double_t GetEventRuleVal(UInt_t i) const { return (fEventRuleVal[i] ? 1.0:0.0); }
310  //
311  const std::vector<UInt_t> & GetEventRuleMap(UInt_t evtidx) const { return fRuleMap[evtidx]; }
312  const TMVA::Event *GetRuleMapEvent(UInt_t evtidx) const { return (*fRuleMapEvents)[evtidx]; }
313  Bool_t IsRuleMapOK() const { return fRuleMapOK; }
314 
315  // print rule generation info
316  void PrintRuleGen() const;
317 
318  // print the ensemble
319  void Print() const;
320 
321  // print the model in a cryptic way
322  void PrintRaw ( std::ostream& os ) const; // obsolete
323  void* AddXMLTo ( void* parent ) const;
324 
325  // read the model from input stream
326  void ReadRaw ( std::istream& istr ); // obsolete
327  void ReadFromXML( void* wghtnode );
328 
329 
330  private:
331 
332  // delete all rules
333  void DeleteRules() { for (UInt_t i=0; i<fRules.size(); i++) delete fRules[i]; fRules.clear(); }
334 
335  // copy method
336  void Copy( RuleEnsemble const& other );
337 
338  // set all coeffs to default values
339  void ResetCoefficients();
340 
341  // make rules form one decision tree
342  void MakeRulesFromTree( const DecisionTree *dtree );
343 
344  // add a rule with tghe given end-node
345  void AddRule( const Node *node );
346 
347  // make a rule
348  Rule *MakeTheRule( const Node *node );
349 
350 
351  ELearningModel fLearningModel; // can be full (rules+linear), rules, linear
352  Double_t fImportanceCut; // minimum importance accepted
353  Double_t fLinQuantile; // quantile cut to remove outliers
354  Double_t fOffset; // offset in discriminator function
355  std::vector< TMVA::Rule* > fRules; // vector of rules
356  std::vector< Char_t > fLinTermOK; // flags linear terms with sufficient strong importance <-- stores boolean
357  std::vector< Double_t > fLinDP; // delta+ in eq 24, ref 2
358  std::vector< Double_t > fLinDM; // delta-
359  std::vector< Double_t > fLinCoefficients; // linear coefficients, one per variable
360  std::vector< Double_t > fLinNorm; // norm of ditto, see after eq 26 in ref 2
361  std::vector< TH1F* > fLinPDFB; // pdfs for each variable, background
362  std::vector< TH1F* > fLinPDFS; // pdfs for each variable, signal
363  std::vector< Double_t > fLinImportance; // linear term importance
364  std::vector< Double_t > fVarImportance; // one importance per input variable
365  Double_t fImportanceRef; // reference importance (max)
366  Double_t fAverageSupport; // average support (over all rules)
367  Double_t fAverageRuleSigma; // average rule sigma
368  //
369  std::vector< Double_t > fRuleVarFrac; // fraction of rules using a given variable - size of vector = n(variables)
370  std::vector< Double_t > fRulePSS; // p(tag as S|S) - tagged as S if rule is SIG and the event is accepted
371  std::vector< Double_t > fRulePSB; // p(tag as S|B)
372  std::vector< Double_t > fRulePBS; // p(tag as B|S)
373  std::vector< Double_t > fRulePBB; // p(tag as B|B)
374  std::vector< Double_t > fRulePTag; // p(tag)
375  Double_t fRuleFSig; // N(sig)/N(sig)+N(bkg)
376  Double_t fRuleNCave; // N(cuts) average
377  Double_t fRuleNCsig; // idem sigma
378  //
379  Double_t fRuleMinDist; // minimum rule distance
380  UInt_t fNRulesGenerated; // number of rules generated, before cleanup
381  //
382  const Event* fEvent; // current event.
383  Bool_t fEventCacheOK; // true if rule/linear respons are updated
384  std::vector<Char_t> fEventRuleVal; // the rule respons of current event <----- stores boolean
385  std::vector<Double_t> fEventLinearVal; // linear respons
386  //
387  Bool_t fRuleMapOK; // true if MakeRuleMap() has been called
388  std::vector< std::vector<UInt_t> > fRuleMap; // map of rule responses
389  UInt_t fRuleMapInd0; // start index
390  UInt_t fRuleMapInd1; // last index
391  const std::vector<const TMVA::Event *> *fRuleMapEvents; // pointer to vector of events used
392  //
393  const RuleFit* fRuleFit; // pointer to rule fit object
394 
395  mutable MsgLogger* fLogger; //! message logger
396  MsgLogger& Log() const { return *fLogger; }
397  };
398 }
399 
400 //_______________________________________________________________________
402 {
403  //
404  // Update rule and linear respons using the current event
405  //
406  if (fEventCacheOK) return;
407  //
408  if (DoRules()) {
409  UInt_t nrules = fRules.size();
410  fEventRuleVal.resize(nrules,kFALSE);
411  for (UInt_t r=0; r<nrules; r++) {
412  fEventRuleVal[r] = fRules[r]->EvalEvent(*fEvent);
413  }
414  }
415  if (DoLinear()) {
416  UInt_t nlin = fLinTermOK.size();
417  fEventLinearVal.resize(nlin,0);
418  for (UInt_t r=0; r<nlin; r++) {
419  fEventLinearVal[r] = EvalLinEventRaw(r,*fEvent,kFALSE); // not normalised!
420  }
421  }
423 }
424 
425 //_____________________________________________________________________
427 {
428  // evaluate current event
429 
430  Int_t nrules = fRules.size();
431  Double_t rval=fOffset;
432  Double_t linear=0;
433  //
434  // evaluate all rules
435  // normally it should NOT use the normalized rules - the flag should be kFALSE
436  //
437  if (DoRules()) {
438  for ( Int_t i=0; i<nrules; i++ ) {
439  if (fEventRuleVal[i])
440  rval += fRules[i]->GetCoefficient();
441  }
442  }
443  //
444  // Include linear part - the call below incorporates both coefficient and normalisation (fLinNorm)
445  //
446  if (DoLinear()) linear = EvalLinEvent();
447  rval +=linear;
448 
449  return rval;
450 }
451 
452 //_____________________________________________________________________
454  const std::vector<Double_t> & coefs,
455  const std::vector<Double_t> & lincoefs ) const
456 {
457  // evaluate current event with given offset and coefs
458 
459  Int_t nrules = fRules.size();
460  Double_t rval = ofs;
461  Double_t linear = 0;
462  //
463  // evaluate all rules
464  //
465  if (DoRules()) {
466  for ( Int_t i=0; i<nrules; i++ ) {
467  if (fEventRuleVal[i])
468  rval += coefs[i];
469  }
470  }
471  //
472  // Include linear part - the call below incorporates both coefficient and normalisation (fLinNorm)
473  //
474  if (DoLinear()) linear = EvalLinEvent(lincoefs);
475  rval +=linear;
476 
477  return rval;
478 }
479 
480 //_____________________________________________________________________
482 {
483  // evaluate event e
484  SetEvent(e);
485  UpdateEventVal();
486  return EvalEvent();
487 }
488 
489 //_____________________________________________________________________
491  Double_t ofs,
492  const std::vector<Double_t> & coefs,
493  const std::vector<Double_t> & lincoefs )
494 {
495  // evaluate event e
496  SetEvent(e);
497  UpdateEventVal();
498  return EvalEvent(ofs,coefs,lincoefs);
499 }
500 
501 //_____________________________________________________________________
503 {
504  // evaluate event with index evtidx
505  if ((evtidx<fRuleMapInd0) || (evtidx>fRuleMapInd1)) return 0;
506  //
507  Double_t rval=fOffset;
508  if (DoRules()) {
509  UInt_t nrules = fRuleMap[evtidx].size();
510  UInt_t rind;
511  for (UInt_t ir = 0; ir<nrules; ir++) {
512  rind = fRuleMap[evtidx][ir];
513  rval += fRules[rind]->GetCoefficient();
514  }
515  }
516  if (DoLinear()) {
517  UInt_t nlin = fLinTermOK.size();
518  for (UInt_t r=0; r<nlin; r++) {
519  if (fLinTermOK[r]) {
520  rval += fLinCoefficients[r] * EvalLinEventRaw(r,*(*fRuleMapEvents)[evtidx],kTRUE);
521  }
522  }
523  }
524  return rval;
525 }
526 
527 //_____________________________________________________________________
529  Double_t ofs,
530  const std::vector<Double_t> & coefs,
531  const std::vector<Double_t> & lincoefs ) const
532 {
533  // evaluate event with index evtidx and user given model coefficients
534  //
535  if ((evtidx<fRuleMapInd0) || (evtidx>fRuleMapInd1)) return 0;
536  Double_t rval=ofs;
537  if (DoRules()) {
538  UInt_t nrules = fRuleMap[evtidx].size();
539  UInt_t rind;
540  for (UInt_t ir = 0; ir<nrules; ir++) {
541  rind = fRuleMap[evtidx][ir];
542  rval += coefs[rind];
543  }
544  }
545  if (DoLinear()) {
546  rval += EvalLinEvent( evtidx, lincoefs );
547  }
548  return rval;
549 }
550 
551 //_______________________________________________________________________
553 {
554  // evaluate the event linearly (not normalized)
555 
556  Double_t val = e.GetValue(vind);
557  Double_t rval = TMath::Min( fLinDP[vind], TMath::Max( fLinDM[vind], val ) );
558  if (norm) rval *= fLinNorm[vind];
559  return rval;
560 }
561 
562 //_______________________________________________________________________
564 {
565  // evaluate the event linearly (not normalized)
566 
567  Double_t val = (*fRuleMapEvents)[evtidx]->GetValue(vind);
568  Double_t rval = TMath::Min( fLinDP[vind], TMath::Max( fLinDM[vind], val ) );
569  if (norm) rval *= fLinNorm[vind];
570  return rval;
571 }
572 
573 //_______________________________________________________________________
575 {
576  // evaluate event linearly
577 
578  Double_t rval=0;
579  for (UInt_t v=0; v<fLinTermOK.size(); v++) {
580  if (fLinTermOK[v])
581  rval += fLinCoefficients[v]*fEventLinearVal[v]*fLinNorm[v];
582  }
583  return rval;
584 }
585 
586 //_______________________________________________________________________
587 inline Double_t TMVA::RuleEnsemble::EvalLinEvent(const std::vector<Double_t> & coefs) const
588 {
589  // evaluate event linearly using the given coefficients
590 
591  Double_t rval=0;
592  for (UInt_t v=0; v<fLinTermOK.size(); v++) {
593  if (fLinTermOK[v])
594  rval += coefs[v]*fEventLinearVal[v]*fLinNorm[v];
595  }
596  return rval;
597 }
598 
599 //_______________________________________________________________________
601 {
602  // evaluate event linearly
603 
604  SetEvent(e);
605  UpdateEventVal();
606  return EvalLinEvent();
607 }
608 
609 //_______________________________________________________________________
611 {
612  // evaluate linear term vind
613 
614  SetEvent(e);
615  UpdateEventVal();
616  return GetEventLinearValNorm(vind);
617 }
618 
619 //_______________________________________________________________________
620 inline Double_t TMVA::RuleEnsemble::EvalLinEvent( const TMVA::Event& e, const std::vector<Double_t> & coefs )
621 {
622  // evaluate event linearly using the given coefficients
623 
624  SetEvent(e);
625  UpdateEventVal();
626  return EvalLinEvent(coefs);
627 }
628 
629 //_______________________________________________________________________
630 inline Double_t TMVA::RuleEnsemble::EvalLinEvent( UInt_t evtidx, const std::vector<Double_t> & coefs ) const
631 {
632  // evaluate event linearly using the given coefficients
633  if ((evtidx<fRuleMapInd0) || (evtidx>fRuleMapInd1)) return 0;
634  Double_t rval=0;
635  UInt_t nlin = fLinTermOK.size();
636  for (UInt_t r=0; r<nlin; r++) {
637  if (fLinTermOK[r]) {
638  rval += coefs[r] * EvalLinEventRaw(r,*(*fRuleMapEvents)[evtidx],kTRUE);
639  }
640  }
641  return rval;
642 }
643 
644 //_______________________________________________________________________
646 {
647  // evaluate event linearly using the given coefficients
648  if ((evtidx<fRuleMapInd0) || (evtidx>fRuleMapInd1)) return 0;
649  Double_t rval=0;
650  UInt_t nlin = fLinTermOK.size();
651  for (UInt_t r=0; r<nlin; r++) {
652  if (fLinTermOK[r]) {
653  rval += fLinCoefficients[r] * EvalLinEventRaw(r,*(*fRuleMapEvents)[evtidx],kTRUE);
654  }
655  }
656  return rval;
657 }
658 
659 //_______________________________________________________________________
661 {
662  // evaluate event linearly using the given coefficients
663  if ((evtidx<fRuleMapInd0) || (evtidx>fRuleMapInd1)) return 0;
664  Double_t rval;
665  rval = fLinCoefficients[vind] * EvalLinEventRaw(vind,*(*fRuleMapEvents)[evtidx],kTRUE);
666  return rval;
667 }
668 
669 //_______________________________________________________________________
671 {
672  // evaluate event linearly using the given coefficients
673  if ((evtidx<fRuleMapInd0) || (evtidx>fRuleMapInd1)) return 0;
674  Double_t rval;
675  rval = coefs * EvalLinEventRaw(vind,*(*fRuleMapEvents)[evtidx],kTRUE);
676  return rval;
677 }
678 
679 #endif
const std::vector< const TMVA::Event * > * GetTrainingEvents() const
get list of training events from the rule fitter
Double_t fAverageSupport
Definition: RuleEnsemble.h:366
MsgLogger * fLogger
Definition: RuleEnsemble.h:395
Bool_t DoOnlyRules() const
Definition: RuleEnsemble.h:269
float xmin
Definition: THbookFile.cxx:93
Double_t GetImportanceCut() const
Definition: RuleEnsemble.h:273
Double_t PdfLinear(Double_t &nsig, Double_t &ntot) const
This function returns Pr( y = 1 | x ) for the linear terms.
Bool_t IsRuleMapOK() const
Definition: RuleEnsemble.h:313
Double_t GetLinCoefficients(int i) const
Definition: RuleEnsemble.h:291
void SetLinDP(const std::vector< Double_t > &xmax)
Definition: RuleEnsemble.h:127
ELearningModel GetLearningModel() const
Definition: RuleEnsemble.h:272
const std::vector< Double_t > & GetVarImportance() const
Definition: RuleEnsemble.h:282
Double_t EvalLinEventRaw(UInt_t vind, const Event &e, Bool_t norm) const
Definition: RuleEnsemble.h:552
void SetEvent(const Event &e)
Definition: RuleEnsemble.h:155
void SetLinDM(const std::vector< Double_t > &xmin)
Definition: RuleEnsemble.h:126
void ClearLinNorm(Double_t val=1.0)
Definition: RuleEnsemble.h:135
std::vector< TH1F * > fLinPDFS
Definition: RuleEnsemble.h:362
Bool_t DoFull() const
Definition: RuleEnsemble.h:271
Double_t GetRulePBS(int i) const
Definition: RuleEnsemble.h:300
const std::vector< Double_t > & GetLinCoefficients() const
Definition: RuleEnsemble.h:279
RuleEnsemble()
constructor
Rule * MakeTheRule(const Node *node)
Make a Rule from a given Node.
Double_t GetLinDP(int i) const
Definition: RuleEnsemble.h:294
Int_t CalcNRules(const TMVA::DecisionTree *dtree)
calculate the number of rules
Double_t CalcLinNorm(Double_t stdev)
Definition: RuleEnsemble.h:130
Bool_t DoOnlyLinear() const
Definition: RuleEnsemble.h:270
Double_t GetEventRuleVal(UInt_t i) const
Definition: RuleEnsemble.h:307
virtual ~RuleEnsemble()
destructor
Double_t GetAverageSupport() const
Definition: RuleEnsemble.h:305
const Event * GetEvent() const
Definition: RuleEnsemble.h:265
1-D histogram with a float per channel (see TH1 documentation)}
Definition: TH1.h:570
std::vector< Double_t > fLinDP
Definition: RuleEnsemble.h:357
Short_t Min(Short_t a, Short_t b)
Definition: TMathBase.h:170
int Int_t
Definition: RtypesCore.h:41
bool Bool_t
Definition: RtypesCore.h:59
Double_t GetLinQuantile() const
Definition: RuleEnsemble.h:284
const Bool_t kFALSE
Definition: Rtypes.h:92
std::vector< Double_t > fRulePBB
Definition: RuleEnsemble.h:373
void Print() const
print function
void PrintRuleGen() const
print rule generation info
Double_t GetImportanceRef() const
Definition: RuleEnsemble.h:274
void CleanupLinear()
cleanup linear model
const std::vector< UInt_t > & GetEventRuleMap(UInt_t evtidx) const
Definition: RuleEnsemble.h:311
void MakeRuleMap(const std::vector< const TMVA::Event * > *events=0, UInt_t ifirst=0, UInt_t ilast=0)
Makes rule map for all events.
MsgLogger & Log() const
message logger
Definition: RuleEnsemble.h:396
std::vector< TMVA::Rule * > fRules
Definition: RuleEnsemble.h:355
Float_t GetValue(UInt_t ivar) const
return value of i'th variable
Definition: Event.cxx:231
Double_t GetAverageRuleSigma() const
Definition: RuleEnsemble.h:306
std::vector< Char_t > fLinTermOK
Definition: RuleEnsemble.h:356
void SetAverageRuleSigma(Double_t v)
Definition: RuleEnsemble.h:147
Double_t GetRulePSS(int i) const
Definition: RuleEnsemble.h:298
const std::vector< Double_t > & GetLinImportance() const
Definition: RuleEnsemble.h:281
void SetLinCoefficients(const std::vector< Double_t > &v)
Definition: RuleEnsemble.h:124
void SetMsgType(EMsgType t)
const Event * GetTrainingEvent(UInt_t i) const
get the training event from the rule fitter
void SetImportanceRef(Double_t impref)
set reference importance
void RuleResponseStats()
calculate various statistics for this rule
friend std::ostream & operator<<(std::ostream &os, const RuleEnsemble &rules)
Double_t GetEventLinearValNorm(UInt_t i) const
Definition: RuleEnsemble.h:309
std::vector< Double_t > fLinNorm
Definition: RuleEnsemble.h:360
Double_t PdfRule(Double_t &nsig, Double_t &ntot) const
This function returns Pr( y = 1 | x ) for rules.
void RemoveSimilarRules()
remove rules that behave similar
void Copy(RuleEnsemble const &other)
copy function
void PrintRaw(std::ostream &os) const
write rules to stream
Bool_t IsLinTermOK(int i) const
Definition: RuleEnsemble.h:303
Double_t GetLinNorm(int i) const
Definition: RuleEnsemble.h:292
const MethodRuleFit * GetMethodRuleFit() const
Get a pointer to the original MethodRuleFit.
std::vector< TH1F * > fLinPDFB
Definition: RuleEnsemble.h:361
void SetRuleFit(const RuleFit *rf)
Definition: RuleEnsemble.h:116
std::vector< Double_t > fRulePSB
Definition: RuleEnsemble.h:371
void CalcImportance()
calculate the importance of each rule
const TMVA::Event * GetRuleMapEvent(UInt_t evtidx) const
Definition: RuleEnsemble.h:312
std::vector< Double_t > fLinCoefficients
Definition: RuleEnsemble.h:359
Double_t GetLinDM(int i) const
Definition: RuleEnsemble.h:293
void CleanupRules()
cleanup rules
Rule * GetRules(int i)
Definition: RuleEnsemble.h:287
const RuleFit * fRuleFit
Definition: RuleEnsemble.h:393
Double_t FStar() const
We want to estimate F* = argmin Eyx( L(y,F(x) ), min wrt F(x) F(x) = FL(x) + FR(x) ...
Bool_t DoRules() const
Definition: RuleEnsemble.h:268
Double_t GetRulePSB(int i) const
Definition: RuleEnsemble.h:299
void CalcVarImportance()
Calculates variable importance using eq (35) in RuleFit paper by Friedman et.al.
Double_t EvalLinEvent() const
Definition: RuleEnsemble.h:574
ROOT::R::TRInterface & r
Definition: Object.C:4
SVector< double, 2 > v
Definition: Dict.h:5
const RuleFit * GetRuleFit() const
Definition: RuleEnsemble.h:261
ELearningModel fLearningModel
Definition: RuleEnsemble.h:351
Double_t CalcRuleImportance()
calculate importance of each rule
EMsgType
Definition: Types.h:61
std::vector< Double_t > fLinDM
Definition: RuleEnsemble.h:358
void MakeRulesFromTree(const DecisionTree *dtree)
create rules from the decsision tree structure
Double_t CoefficientRadius()
Calculates sqrt(Sum(a_i^2)), i=1..N (NOTE do not include a0)
void SetOffset(Double_t v=0.0)
Definition: RuleEnsemble.h:122
void ClearCoefficients(Double_t val=0)
Definition: RuleEnsemble.h:133
void AddRule(const Node *node)
add a new rule to the tree
unsigned int UInt_t
Definition: RtypesCore.h:42
void SetImportanceCut(Double_t minimp=0)
Definition: RuleEnsemble.h:141
Double_t GetRulePTag(int i) const
Definition: RuleEnsemble.h:297
void RuleStatistics()
calculate various statistics for this rule
void MakeRules(const std::vector< const TMVA::DecisionTree * > &forest)
Makes rules from the given decision tree.
const std::vector< TMVA::Rule * > & GetRulesConst() const
Definition: RuleEnsemble.h:277
float xmax
Definition: THbookFile.cxx:93
void ClearLinCoefficients(Double_t val=0)
Definition: RuleEnsemble.h:134
std::ostream & operator<<(std::ostream &os, const BinaryTree &tree)
print the tree recursinvely using the << operator
Definition: BinaryTree.cxx:155
Double_t GetVarImportance(int i) const
Definition: RuleEnsemble.h:296
Double_t fImportanceRef
Definition: RuleEnsemble.h:365
void ReadFromXML(void *wghtnode)
read rules from XML
void SetLinCoefficient(UInt_t i, Double_t v)
Definition: RuleEnsemble.h:125
const Rule * GetRulesConst(int i) const
Definition: RuleEnsemble.h:286
std::vector< Char_t > fEventRuleVal
Definition: RuleEnsemble.h:384
void FindNEndNodes(const TMVA::Node *node, Int_t &nendnodes)
find the number of leaf nodes
void Initialize(const RuleFit *rf)
Initializes all member variables with default values.
void SetCoefficients(const std::vector< Double_t > &v)
set all rule coefficients
UInt_t GetNLinear() const
Definition: RuleEnsemble.h:283
double Double_t
Definition: RtypesCore.h:55
std::vector< Double_t > fVarImportance
Definition: RuleEnsemble.h:364
Double_t GetRulePBB(int i) const
Definition: RuleEnsemble.h:301
std::vector< Double_t > fEventLinearVal
Definition: RuleEnsemble.h:385
void MakeLinearTerms()
Make the linear terms as in eq 25, ref 2 For this the b and (1-b) quatiles are needed.
std::vector< Double_t > fLinImportance
Definition: RuleEnsemble.h:363
Double_t fAverageRuleSigma
Definition: RuleEnsemble.h:367
std::vector< Double_t > fRulePBS
Definition: RuleEnsemble.h:372
void CalcRuleSupport()
calculate the support for all rules
const std::vector< const TMVA::Event * > * fRuleMapEvents
Definition: RuleEnsemble.h:391
std::vector< Double_t > fRulePSS
Definition: RuleEnsemble.h:370
Double_t GetRuleMinDist() const
Definition: RuleEnsemble.h:290
void operator=(const RuleEnsemble &other)
Definition: RuleEnsemble.h:250
void MakeModel()
create model
void SetRules(const std::vector< TMVA::Rule * > &rules)
set rules
Double_t GetEventLinearVal(UInt_t i) const
Definition: RuleEnsemble.h:308
Abstract ClassifierFactory template that handles arbitrary types.
UInt_t GetNRules() const
Definition: RuleEnsemble.h:276
Short_t Max(Short_t a, Short_t b)
Definition: TMathBase.h:202
Double_t GetLinImportance(int i) const
Definition: RuleEnsemble.h:295
void SetLinNorm(const std::vector< Double_t > &norm)
Definition: RuleEnsemble.h:128
void ResetCoefficients()
reset all rule coefficients
std::vector< TMVA::Rule * > & GetRules()
Definition: RuleEnsemble.h:278
void * AddXMLTo(void *parent) const
write rules to XML
void GetCoefficients(std::vector< Double_t > &v)
Retrieve all rule coefficients.
void AddOffset(Double_t v)
Definition: RuleEnsemble.h:123
const std::vector< Double_t > & GetLinNorm() const
Definition: RuleEnsemble.h:280
Double_t Sqrt(Double_t x)
Definition: TMath.h:464
Double_t CalcLinImportance()
calculate the linear importance for each rule
Double_t EvalEvent() const
Definition: RuleEnsemble.h:426
const Bool_t kTRUE
Definition: Rtypes.h:91
float * q
Definition: THbookFile.cxx:87
void SetCoefficient(UInt_t i, Double_t v)
Definition: RuleEnsemble.h:120
const MethodBase * GetMethodBase() const
Get a pointer to the original MethodRuleFit.
Double_t GetOffset() const
Definition: RuleEnsemble.h:275
double norm(double *x, double *p)
Definition: unuranDistr.cxx:40
std::vector< Double_t > fRulePTag
Definition: RuleEnsemble.h:374
void SetRuleMinDist(Double_t d)
Definition: RuleEnsemble.h:138
Bool_t DoLinear() const
Definition: RuleEnsemble.h:267
Double_t fImportanceCut
Definition: RuleEnsemble.h:352
std::vector< Double_t > fRuleVarFrac
Definition: RuleEnsemble.h:369
void SetLinQuantile(Double_t q)
Definition: RuleEnsemble.h:144
std::vector< std::vector< UInt_t > > fRuleMap
Definition: RuleEnsemble.h:388
const Event * fEvent
Definition: RuleEnsemble.h:382
UInt_t GetRulesNCuts(int i) const
Definition: RuleEnsemble.h:289
void ReadRaw(std::istream &istr)
read rule ensemble from stream