Logo ROOT   6.18/05
Reference Guide
RuleEnsemble.h
Go to the documentation of this file.
1// @(#)root/tmva $Id$
2// Author: Andreas Hoecker, Joerg Stelzer, Fredrik Tegenfeldt, Helge Voss
3
4/**********************************************************************************
5 * Project: TMVA - a Root-integrated toolkit for multivariate data analysis *
6 * Package: TMVA *
7 * Class : RuleEnsemble *
8 * Web : http://tmva.sourceforge.net *
9 * *
10 * Description: *
11 * A class generating an ensemble of rules *
12 * Input: a forest of decision trees *
13 * Output: an ensemble of rules *
14 * *
15 * Authors (alphabetical): *
16 * Fredrik Tegenfeldt <Fredrik.Tegenfeldt@cern.ch> - Iowa State U., USA *
17 * Helge Voss <Helge.Voss@cern.ch> - MPI-KP Heidelberg, Ger. *
18 * *
19 * Copyright (c) 2005: *
20 * CERN, Switzerland *
21 * Iowa State U. *
22 * MPI-K Heidelberg, Germany *
23 * *
24 * Redistribution and use in source and binary forms, with or without *
25 * modification, are permitted according to the terms listed in LICENSE *
26 * (http://tmva.sourceforge.net/LICENSE) *
27 **********************************************************************************/
28
29#ifndef ROOT_TMVA_RuleEnsemble
30#define ROOT_TMVA_RuleEnsemble
31
32// #if ROOT_VERSION_CODE >= 364802
33// #ifndef ROOT_TMathBase
34// #include "TMathBase.h"
35// #endif
36// #else
37#include "TMath.h"
38// #endif
39
40#include "TMVA/DecisionTree.h"
41#include "TMVA/Event.h"
42#include "TMVA/Rule.h"
43#include "TMVA/Types.h"
44
45class TH1F;
46
47namespace TMVA {
48
49 class MethodBase;
50 class RuleFit;
51 class MethodRuleFit;
52 class RuleEnsemble;
53 class MsgLogger;
54
55 std::ostream& operator<<( std::ostream& os, const RuleEnsemble& event );
56
58
59 // output operator for a RuleEnsemble
60 friend std::ostream& operator<< ( std::ostream& os, const RuleEnsemble& rules );
61
62 public:
63
65
66 // main constructor
67 RuleEnsemble( RuleFit* rf );
68
69 // copy constructor
70 RuleEnsemble( const RuleEnsemble& other );
71
72 // empty constructor
74
75 // destructor
76 virtual ~RuleEnsemble();
77
78 // initialize
79 void Initialize( const RuleFit* rf );
80
81 // set message type
82 void SetMsgType( EMsgType t );
83
84 // makes the model - calls MakeRules() and MakeLinearTerms()
85 void MakeModel();
86
87 // generates the rules from a given forest of decision trees
88 void MakeRules( const std::vector< const TMVA::DecisionTree *>& forest );
89
90 // make the linear terms
91 void MakeLinearTerms();
92
93 // select linear model
95
96 // select rule model
98
99 // select full (linear+rules) model
101
102 // set rule collection (if not created by MakeRules())
103 void SetRules( const std::vector< TMVA::Rule *> & rules );
104
105 // set RuleFit ptr
106 void SetRuleFit( const RuleFit *rf ) { fRuleFit = rf; }
107
108 // set coefficients
109 void SetCoefficients( const std::vector< Double_t >& v );
110 void SetCoefficient( UInt_t i, Double_t v ) { if (i<fRules.size()) fRules[i]->SetCoefficient(v); }
111 //
112 void SetOffset(Double_t v=0.0) { fOffset=v; }
114 void SetLinCoefficients( const std::vector< Double_t >& v ) { fLinCoefficients = v; }
116 void SetLinDM( const std::vector<Double_t> & xmin ) { fLinDM = xmin; }
117 void SetLinDP( const std::vector<Double_t> & xmax ) { fLinDP = xmax; }
118 void SetLinNorm( const std::vector<Double_t> & norm ) { fLinNorm = norm; }
119
120 Double_t CalcLinNorm( Double_t stdev ) { return ( stdev>0 ? fAverageRuleSigma/stdev : 1.0 ); }
121
122 // clear coefficients
123 void ClearCoefficients( Double_t val=0 ) { for (UInt_t i=0; i<fRules.size(); i++) fRules[i]->SetCoefficient(val); }
124 void ClearLinCoefficients( Double_t val=0 ) { for (UInt_t i=0; i<fLinCoefficients.size(); i++) fLinCoefficients[i]=val; }
125 void ClearLinNorm( Double_t val=1.0 ) { for (UInt_t i=0; i<fLinNorm.size(); i++) fLinNorm[i]=val; }
126
127 // set maximum allowed distance between equal rules
129
130 // set minimum rule importance - used by CleanupRules()
131 void SetImportanceCut(Double_t minimp=0) { fImportanceCut=minimp; }
132
133 // set the quantile for linear terms
135
136 // set average sigma for rules
137 void SetAverageRuleSigma(Double_t v) { if (v>0.5) v=0.5; fAverageRuleSigma = v; fAverageSupport = 0.5*(1.0+TMath::Sqrt(1.0-4.0*v*v)); }
138
139 // Calculate the number of possible rules from a given tree
140 Int_t CalcNRules( const TMVA::DecisionTree* dtree );
141 // Recursively search for end-nodes; used by CalcNRules()
142 void FindNEndNodes( const TMVA::Node* node, Int_t& nendnodes );
143
144 // set current event to be used
145 void SetEvent( const Event & e ) { fEvent = &e; fEventCacheOK = kFALSE; }
146
147 // fill cached values of rule/linear respons
148 void UpdateEventVal();
149
150 // fill binary rule respons for all events (or selected subset)
151 void MakeRuleMap(const std::vector<const TMVA::Event *> *events=0, UInt_t ifirst=0, UInt_t ilast=0);
152
153 // clear rule map
154 void ClearRuleMap() { fRuleMap.clear(); fRuleMapEvents=0; }
155
156 // evaluates the event using the ensemble of rules
157 // the following uses fEventCache, that is per event saved in cache
158 Double_t EvalEvent() const;
159 Double_t EvalEvent( const Event & e );
160
161 // same as previous but using other model coefficients
163 const std::vector<Double_t> & coefs,
164 const std::vector<Double_t> & lincoefs) const;
165 Double_t EvalEvent( const Event & e,
166 Double_t ofs,
167 const std::vector<Double_t> & coefs,
168 const std::vector<Double_t> & lincoefs);
169
170 // same as above but using the event index
171 // these will use fRuleMap - MUST call MakeRuleMap() before - no check...
172 Double_t EvalEvent( UInt_t evtidx ) const;
173 Double_t EvalEvent( UInt_t evtidx,
174 Double_t ofs,
175 const std::vector<Double_t> & coefs,
176 const std::vector<Double_t> & lincoefs) const;
177
178 // evaluate the linear term using event by reference
179 // Double_t EvalLinEvent( UInt_t vind ) const;
180 Double_t EvalLinEvent() const;
181 Double_t EvalLinEvent( const std::vector<Double_t> & coefs ) const;
182 Double_t EvalLinEvent( const Event &e );
183 Double_t EvalLinEvent( const Event &e, UInt_t vind );
184 Double_t EvalLinEvent( const Event &e, const std::vector<Double_t> & coefs );
185
186 // idem but using evtidx - must call MakeRuleMap() first
187 Double_t EvalLinEvent( UInt_t evtidx ) const;
188 Double_t EvalLinEvent( UInt_t evtidx, const std::vector<Double_t> & coefs ) const;
189 Double_t EvalLinEvent( UInt_t evtidx, UInt_t vind ) const;
190 Double_t EvalLinEvent( UInt_t evtidx, UInt_t vind, Double_t coefs ) const;
191
192 // evaluate linear terms used to fill fEventLinearVal
193 Double_t EvalLinEventRaw( UInt_t vind, const Event &e, Bool_t norm ) const;
194 Double_t EvalLinEventRaw( UInt_t vind, UInt_t evtidx, Bool_t norm ) const;
195
196 // calculate p(y=1|x) for a given event using the linear terms
197 Double_t PdfLinear( Double_t & nsig, Double_t & ntot ) const;
198
199 // calculate p(y=1|x) for a given event using the rules
200 Double_t PdfRule( Double_t & nsig, Double_t & ntot ) const;
201
202 // calculate F* = 2*p(y=1|x) - 1
203 Double_t FStar() const;
204 Double_t FStar(const TMVA::Event & e );
205
206 // set reference importance for all model objects
207 void SetImportanceRef(Double_t impref);
208
209 // calculates the support for all rules given the set of events
210 void CalcRuleSupport();
211
212 // calculates rule importance
213 void CalcImportance();
214
215 // calculates rule importance
217
218 // calculates linear importance
220
221 // calculates variable importance
222 void CalcVarImportance();
223
224 // remove rules of low importance
225 void CleanupRules();
226
227 // remove linear terms of low importance
228 void CleanupLinear();
229
230 // remove similar rules
231 void RemoveSimilarRules();
232
233 // get rule statistics
234 void RuleStatistics();
235
236 // get rule response stats
237 void RuleResponseStats();
238
239 // copy operator
240 void operator=( const RuleEnsemble& other ) { Copy( other ); }
241
242 // calculate sum of the squared coefficients
244
245 // fill the vector with the coefficients
246 void GetCoefficients( std::vector< Double_t >& v );
247
248 // accessors
249 const MethodRuleFit* GetMethodRuleFit() const;
250 const MethodBase* GetMethodBase() const;
251 const RuleFit* GetRuleFit() const { return fRuleFit; }
252 //
253 const std::vector<const TMVA::Event *>* GetTrainingEvents() const;
254 const Event* GetTrainingEvent(UInt_t i) const;
255 const Event* GetEvent() const { return fEvent; }
256 //
261 Bool_t DoFull() const { return (fLearningModel==kFull); }
265 Double_t GetOffset() const { return fOffset; }
266 UInt_t GetNRules() const { return (DoRules() ? fRules.size():0); }
267 const std::vector<TMVA::Rule*>& GetRulesConst() const { return fRules; }
268 std::vector<TMVA::Rule*>& GetRules() { return fRules; }
269 const std::vector< Double_t >& GetLinCoefficients() const { return fLinCoefficients; }
270 const std::vector< Double_t >& GetLinNorm() const { return fLinNorm; }
271 const std::vector< Double_t >& GetLinImportance() const { return fLinImportance; }
272 const std::vector< Double_t >& GetVarImportance() const { return fVarImportance; }
273 UInt_t GetNLinear() const { return (DoLinear() ? fLinNorm.size():0); }
275
276 const Rule *GetRulesConst(int i) const { return fRules[i]; }
277 Rule *GetRules(int i) { return fRules[i]; }
278
279 UInt_t GetRulesNCuts(int i) const { return fRules[i]->GetRuleCut()->GetNcuts(); }
281 Double_t GetLinCoefficients(int i) const { return fLinCoefficients[i]; }
282 Double_t GetLinNorm(int i) const { return fLinNorm[i]; }
283 Double_t GetLinDM(int i) const { return fLinDM[i]; }
284 Double_t GetLinDP(int i) const { return fLinDP[i]; }
285 Double_t GetLinImportance(int i) const { return fLinImportance[i]; }
286 Double_t GetVarImportance(int i) const { return fVarImportance[i]; }
287 Double_t GetRulePTag(int i) const { return fRulePTag[i]; }
288 Double_t GetRulePSS(int i) const { return fRulePSS[i]; }
289 Double_t GetRulePSB(int i) const { return fRulePSB[i]; }
290 Double_t GetRulePBS(int i) const { return fRulePBS[i]; }
291 Double_t GetRulePBB(int i) const { return fRulePBB[i]; }
292
293 Bool_t IsLinTermOK(int i) const { return fLinTermOK[i]; }
294 //
297 Double_t GetEventRuleVal(UInt_t i) const { return (fEventRuleVal[i] ? 1.0:0.0); }
300 //
301 const std::vector<UInt_t> & GetEventRuleMap(UInt_t evtidx) const { return fRuleMap[evtidx]; }
302 const TMVA::Event *GetRuleMapEvent(UInt_t evtidx) const { return (*fRuleMapEvents)[evtidx]; }
303 Bool_t IsRuleMapOK() const { return fRuleMapOK; }
304
305 // print rule generation info
306 void PrintRuleGen() const;
307
308 // print the ensemble
309 void Print() const;
310
311 // print the model in a cryptic way
312 void PrintRaw ( std::ostream& os ) const; // obsolete
313 void* AddXMLTo ( void* parent ) const;
314
315 // read the model from input stream
316 void ReadRaw ( std::istream& istr ); // obsolete
317 void ReadFromXML( void* wghtnode );
318
319
320 private:
321
322 // delete all rules
323 void DeleteRules() { for (UInt_t i=0; i<fRules.size(); i++) delete fRules[i]; fRules.clear(); }
324
325 // copy method
326 void Copy( RuleEnsemble const& other );
327
328 // set all coeffs to default values
329 void ResetCoefficients();
330
331 // make rules form one decision tree
332 void MakeRulesFromTree( const DecisionTree *dtree );
333
334 // add a rule with the given end-node
335 void AddRule( const Node *node );
336
337 // make a rule
338 Rule *MakeTheRule( const Node *node );
339
340
341 ELearningModel fLearningModel; // can be full (rules+linear), rules, linear
342 Double_t fImportanceCut; // minimum importance accepted
343 Double_t fLinQuantile; // quantile cut to remove outliers
344 Double_t fOffset; // offset in discriminator function
345 std::vector< TMVA::Rule* > fRules; // vector of rules
346 std::vector< Char_t > fLinTermOK; // flags linear terms with sufficient strong importance <-- stores boolean
347 std::vector< Double_t > fLinDP; // delta+ in eq 24, ref 2
348 std::vector< Double_t > fLinDM; // delta-
349 std::vector< Double_t > fLinCoefficients; // linear coefficients, one per variable
350 std::vector< Double_t > fLinNorm; // norm of ditto, see after eq 26 in ref 2
351 std::vector< TH1F* > fLinPDFB; // pdfs for each variable, background
352 std::vector< TH1F* > fLinPDFS; // pdfs for each variable, signal
353 std::vector< Double_t > fLinImportance; // linear term importance
354 std::vector< Double_t > fVarImportance; // one importance per input variable
355 Double_t fImportanceRef; // reference importance (max)
356 Double_t fAverageSupport; // average support (over all rules)
357 Double_t fAverageRuleSigma; // average rule sigma
358 //
359 std::vector< Double_t > fRuleVarFrac; // fraction of rules using a given variable - size of vector = n(variables)
360 std::vector< Double_t > fRulePSS; // p(tag as S|S) - tagged as S if rule is SIG and the event is accepted
361 std::vector< Double_t > fRulePSB; // p(tag as S|B)
362 std::vector< Double_t > fRulePBS; // p(tag as B|S)
363 std::vector< Double_t > fRulePBB; // p(tag as B|B)
364 std::vector< Double_t > fRulePTag; // p(tag)
365 Double_t fRuleFSig; // N(sig)/N(sig)+N(bkg)
366 Double_t fRuleNCave; // N(cuts) average
367 Double_t fRuleNCsig; // idem sigma
368 //
369 Double_t fRuleMinDist; // minimum rule distance
370 UInt_t fNRulesGenerated; // number of rules generated, before cleanup
371 //
372 const Event* fEvent; // current event.
373 Bool_t fEventCacheOK; // true if rule/linear respons are updated
374 std::vector<Char_t> fEventRuleVal; // the rule respons of current event <----- stores boolean
375 std::vector<Double_t> fEventLinearVal; // linear respons
376 //
377 Bool_t fRuleMapOK; // true if MakeRuleMap() has been called
378 std::vector< std::vector<UInt_t> > fRuleMap; // map of rule responses
379 UInt_t fRuleMapInd0; // start index
380 UInt_t fRuleMapInd1; // last index
381 const std::vector<const TMVA::Event *> *fRuleMapEvents; // pointer to vector of events used
382 //
383 const RuleFit* fRuleFit; // pointer to rule fit object
384
385 mutable MsgLogger* fLogger; //! message logger
386 MsgLogger& Log() const { return *fLogger; }
387 };
388}
389
390//_______________________________________________________________________
392{
393 //
394 // Update rule and linear respons using the current event
395 //
396 if (fEventCacheOK) return;
397 //
398 if (DoRules()) {
399 UInt_t nrules = fRules.size();
400 fEventRuleVal.resize(nrules,kFALSE);
401 for (UInt_t r=0; r<nrules; r++) {
402 fEventRuleVal[r] = fRules[r]->EvalEvent(*fEvent);
403 }
404 }
405 if (DoLinear()) {
406 UInt_t nlin = fLinTermOK.size();
407 fEventLinearVal.resize(nlin,0);
408 for (UInt_t r=0; r<nlin; r++) {
409 fEventLinearVal[r] = EvalLinEventRaw(r,*fEvent,kFALSE); // not normalised!
410 }
411 }
413}
414
415//_____________________________________________________________________
417{
418 // evaluate current event
419
420 Int_t nrules = fRules.size();
421 Double_t rval=fOffset;
422 Double_t linear=0;
423 //
424 // evaluate all rules
425 // normally it should NOT use the normalized rules - the flag should be kFALSE
426 //
427 if (DoRules()) {
428 for ( Int_t i=0; i<nrules; i++ ) {
429 if (fEventRuleVal[i])
430 rval += fRules[i]->GetCoefficient();
431 }
432 }
433 //
434 // Include linear part - the call below incorporates both coefficient and normalisation (fLinNorm)
435 //
436 if (DoLinear()) linear = EvalLinEvent();
437 rval +=linear;
438
439 return rval;
440}
441
442//_____________________________________________________________________
444 const std::vector<Double_t> & coefs,
445 const std::vector<Double_t> & lincoefs ) const
446{
447 // evaluate current event with given offset and coefs
448
449 Int_t nrules = fRules.size();
450 Double_t rval = ofs;
451 Double_t linear = 0;
452 //
453 // evaluate all rules
454 //
455 if (DoRules()) {
456 for ( Int_t i=0; i<nrules; i++ ) {
457 if (fEventRuleVal[i])
458 rval += coefs[i];
459 }
460 }
461 //
462 // Include linear part - the call below incorporates both coefficient and normalisation (fLinNorm)
463 //
464 if (DoLinear()) linear = EvalLinEvent(lincoefs);
465 rval +=linear;
466
467 return rval;
468}
469
470//_____________________________________________________________________
472{
473 // evaluate event e
474 SetEvent(e);
475 UpdateEventVal();
476 return EvalEvent();
477}
478
479//_____________________________________________________________________
481 Double_t ofs,
482 const std::vector<Double_t> & coefs,
483 const std::vector<Double_t> & lincoefs )
484{
485 // evaluate event e
486 SetEvent(e);
487 UpdateEventVal();
488 return EvalEvent(ofs,coefs,lincoefs);
489}
490
491//_____________________________________________________________________
493{
494 // evaluate event with index evtidx
495 if ((evtidx<fRuleMapInd0) || (evtidx>fRuleMapInd1)) return 0;
496 //
497 Double_t rval=fOffset;
498 if (DoRules()) {
499 UInt_t nrules = fRuleMap[evtidx].size();
500 UInt_t rind;
501 for (UInt_t ir = 0; ir<nrules; ir++) {
502 rind = fRuleMap[evtidx][ir];
503 rval += fRules[rind]->GetCoefficient();
504 }
505 }
506 if (DoLinear()) {
507 UInt_t nlin = fLinTermOK.size();
508 for (UInt_t r=0; r<nlin; r++) {
509 if (fLinTermOK[r]) {
510 rval += fLinCoefficients[r] * EvalLinEventRaw(r,*(*fRuleMapEvents)[evtidx],kTRUE);
511 }
512 }
513 }
514 return rval;
515}
516
517//_____________________________________________________________________
519 Double_t ofs,
520 const std::vector<Double_t> & coefs,
521 const std::vector<Double_t> & lincoefs ) const
522{
523 // evaluate event with index evtidx and user given model coefficients
524 //
525 if ((evtidx<fRuleMapInd0) || (evtidx>fRuleMapInd1)) return 0;
526 Double_t rval=ofs;
527 if (DoRules()) {
528 UInt_t nrules = fRuleMap[evtidx].size();
529 UInt_t rind;
530 for (UInt_t ir = 0; ir<nrules; ir++) {
531 rind = fRuleMap[evtidx][ir];
532 rval += coefs[rind];
533 }
534 }
535 if (DoLinear()) {
536 rval += EvalLinEvent( evtidx, lincoefs );
537 }
538 return rval;
539}
540
541//_______________________________________________________________________
543{
544 // evaluate the event linearly (not normalized)
545
546 Double_t val = e.GetValue(vind);
547 Double_t rval = TMath::Min( fLinDP[vind], TMath::Max( fLinDM[vind], val ) );
548 if (norm) rval *= fLinNorm[vind];
549 return rval;
550}
551
552//_______________________________________________________________________
554{
555 // evaluate the event linearly (not normalized)
556
557 Double_t val = (*fRuleMapEvents)[evtidx]->GetValue(vind);
558 Double_t rval = TMath::Min( fLinDP[vind], TMath::Max( fLinDM[vind], val ) );
559 if (norm) rval *= fLinNorm[vind];
560 return rval;
561}
562
563//_______________________________________________________________________
565{
566 // evaluate event linearly
567
568 Double_t rval=0;
569 for (UInt_t v=0; v<fLinTermOK.size(); v++) {
570 if (fLinTermOK[v])
571 rval += fLinCoefficients[v]*fEventLinearVal[v]*fLinNorm[v];
572 }
573 return rval;
574}
575
576//_______________________________________________________________________
577inline Double_t TMVA::RuleEnsemble::EvalLinEvent(const std::vector<Double_t> & coefs) const
578{
579 // evaluate event linearly using the given coefficients
580
581 Double_t rval=0;
582 for (UInt_t v=0; v<fLinTermOK.size(); v++) {
583 if (fLinTermOK[v])
584 rval += coefs[v]*fEventLinearVal[v]*fLinNorm[v];
585 }
586 return rval;
587}
588
589//_______________________________________________________________________
591{
592 // evaluate event linearly
593
594 SetEvent(e);
595 UpdateEventVal();
596 return EvalLinEvent();
597}
598
599//_______________________________________________________________________
601{
602 // evaluate linear term vind
603
604 SetEvent(e);
605 UpdateEventVal();
606 return GetEventLinearValNorm(vind);
607}
608
609//_______________________________________________________________________
610inline Double_t TMVA::RuleEnsemble::EvalLinEvent( const TMVA::Event& e, const std::vector<Double_t> & coefs )
611{
612 // evaluate event linearly using the given coefficients
613
614 SetEvent(e);
615 UpdateEventVal();
616 return EvalLinEvent(coefs);
617}
618
619//_______________________________________________________________________
620inline Double_t TMVA::RuleEnsemble::EvalLinEvent( UInt_t evtidx, const std::vector<Double_t> & coefs ) const
621{
622 // evaluate event linearly using the given coefficients
623 if ((evtidx<fRuleMapInd0) || (evtidx>fRuleMapInd1)) return 0;
624 Double_t rval=0;
625 UInt_t nlin = fLinTermOK.size();
626 for (UInt_t r=0; r<nlin; r++) {
627 if (fLinTermOK[r]) {
628 rval += coefs[r] * EvalLinEventRaw(r,*(*fRuleMapEvents)[evtidx],kTRUE);
629 }
630 }
631 return rval;
632}
633
634//_______________________________________________________________________
636{
637 // evaluate event linearly using the given coefficients
638 if ((evtidx<fRuleMapInd0) || (evtidx>fRuleMapInd1)) return 0;
639 Double_t rval=0;
640 UInt_t nlin = fLinTermOK.size();
641 for (UInt_t r=0; r<nlin; r++) {
642 if (fLinTermOK[r]) {
643 rval += fLinCoefficients[r] * EvalLinEventRaw(r,*(*fRuleMapEvents)[evtidx],kTRUE);
644 }
645 }
646 return rval;
647}
648
649//_______________________________________________________________________
651{
652 // evaluate event linearly using the given coefficients
653 if ((evtidx<fRuleMapInd0) || (evtidx>fRuleMapInd1)) return 0;
654 Double_t rval;
655 rval = fLinCoefficients[vind] * EvalLinEventRaw(vind,*(*fRuleMapEvents)[evtidx],kTRUE);
656 return rval;
657}
658
659//_______________________________________________________________________
661{
662 // evaluate event linearly using the given coefficients
663 if ((evtidx<fRuleMapInd0) || (evtidx>fRuleMapInd1)) return 0;
664 Double_t rval;
665 rval = coefs * EvalLinEventRaw(vind,*(*fRuleMapEvents)[evtidx],kTRUE);
666 return rval;
667}
668
669#endif
SVector< double, 2 > v
Definition: Dict.h:5
ROOT::R::TRInterface & r
Definition: Object.C:4
#define d(i)
Definition: RSha256.hxx:102
#define e(i)
Definition: RSha256.hxx:103
int Int_t
Definition: RtypesCore.h:41
unsigned int UInt_t
Definition: RtypesCore.h:42
const Bool_t kFALSE
Definition: RtypesCore.h:88
bool Bool_t
Definition: RtypesCore.h:59
double Double_t
Definition: RtypesCore.h:55
const Bool_t kTRUE
Definition: RtypesCore.h:87
float xmin
Definition: THbookFile.cxx:93
float * q
Definition: THbookFile.cxx:87
float xmax
Definition: THbookFile.cxx:93
1-D histogram with a float per channel (see TH1 documentation)}
Definition: TH1.h:571
Implementation of a Decision Tree.
Definition: DecisionTree.h:64
Virtual base Class for all MVA method.
Definition: MethodBase.h:109
J Friedman's RuleFit method.
Definition: MethodRuleFit.h:47
ostringstream derivative to redirect and format output
Definition: MsgLogger.h:59
Node for the BinarySearch or Decision Trees.
Definition: Node.h:56
std::vector< Double_t > fRulePBB
Definition: RuleEnsemble.h:363
Bool_t IsRuleMapOK() const
Definition: RuleEnsemble.h:303
MsgLogger & Log() const
message logger
Definition: RuleEnsemble.h:386
void SetLinCoefficients(const std::vector< Double_t > &v)
Definition: RuleEnsemble.h:114
Double_t GetLinDP(int i) const
Definition: RuleEnsemble.h:284
virtual ~RuleEnsemble()
destructor
Double_t EvalEvent() const
Definition: RuleEnsemble.h:416
const std::vector< UInt_t > & GetEventRuleMap(UInt_t evtidx) const
Definition: RuleEnsemble.h:301
Double_t GetRulePBB(int i) const
Definition: RuleEnsemble.h:291
void CalcVarImportance()
Calculates variable importance using eq (35) in RuleFit paper by Friedman et.al.
std::vector< Double_t > fLinImportance
Definition: RuleEnsemble.h:353
void SetLinCoefficient(UInt_t i, Double_t v)
Definition: RuleEnsemble.h:115
Double_t GetLinDM(int i) const
Definition: RuleEnsemble.h:283
void SetOffset(Double_t v=0.0)
Definition: RuleEnsemble.h:112
void SetImportanceRef(Double_t impref)
set reference importance
void CalcImportance()
calculate the importance of each rule
void PrintRuleGen() const
print rule generation info
void MakeRuleMap(const std::vector< const TMVA::Event * > *events=0, UInt_t ifirst=0, UInt_t ilast=0)
Makes rule map for all events.
Int_t CalcNRules(const TMVA::DecisionTree *dtree)
calculate the number of rules
std::vector< Double_t > fLinCoefficients
Definition: RuleEnsemble.h:349
void ResetCoefficients()
reset all rule coefficients
Bool_t DoRules() const
Definition: RuleEnsemble.h:258
std::vector< Double_t > fRulePBS
Definition: RuleEnsemble.h:362
void SetMsgType(EMsgType t)
std::vector< TMVA::Rule * > fRules
Definition: RuleEnsemble.h:345
Bool_t DoOnlyLinear() const
Definition: RuleEnsemble.h:260
void SetLinQuantile(Double_t q)
Definition: RuleEnsemble.h:134
Double_t CalcLinNorm(Double_t stdev)
Definition: RuleEnsemble.h:120
Double_t GetLinQuantile() const
Definition: RuleEnsemble.h:274
Double_t GetVarImportance(int i) const
Definition: RuleEnsemble.h:286
void ReadRaw(std::istream &istr)
read rule ensemble from stream
std::vector< Double_t > fRulePSS
Definition: RuleEnsemble.h:360
std::vector< Double_t > fLinDP
Definition: RuleEnsemble.h:347
Double_t EvalLinEventRaw(UInt_t vind, const Event &e, Bool_t norm) const
Definition: RuleEnsemble.h:542
const Event * fEvent
Definition: RuleEnsemble.h:372
void AddRule(const Node *node)
add a new rule to the tree
Double_t GetRulePTag(int i) const
Definition: RuleEnsemble.h:287
void ReadFromXML(void *wghtnode)
read rules from XML
void SetLinDP(const std::vector< Double_t > &xmax)
Definition: RuleEnsemble.h:117
void SetRuleFit(const RuleFit *rf)
Definition: RuleEnsemble.h:106
Double_t GetImportanceCut() const
Definition: RuleEnsemble.h:263
const Event * GetTrainingEvent(UInt_t i) const
get the training event from the rule fitter
const std::vector< const TMVA::Event * > * GetTrainingEvents() const
get list of training events from the rule fitter
Double_t GetRuleMinDist() const
Definition: RuleEnsemble.h:280
void SetLinNorm(const std::vector< Double_t > &norm)
Definition: RuleEnsemble.h:118
void SetRules(const std::vector< TMVA::Rule * > &rules)
set rules
void MakeRules(const std::vector< const TMVA::DecisionTree * > &forest)
Makes rules from the given decision tree.
void RemoveSimilarRules()
remove rules that behave similar
std::vector< Double_t > fRulePTag
Definition: RuleEnsemble.h:364
std::vector< TH1F * > fLinPDFB
Definition: RuleEnsemble.h:351
std::vector< Char_t > fEventRuleVal
Definition: RuleEnsemble.h:374
ELearningModel fLearningModel
Definition: RuleEnsemble.h:341
void FindNEndNodes(const TMVA::Node *node, Int_t &nendnodes)
find the number of leaf nodes
Double_t GetRulePBS(int i) const
Definition: RuleEnsemble.h:290
RuleEnsemble()
constructor
Double_t GetEventRuleVal(UInt_t i) const
Definition: RuleEnsemble.h:297
const std::vector< Double_t > & GetLinCoefficients() const
Definition: RuleEnsemble.h:269
Double_t GetImportanceRef() const
Definition: RuleEnsemble.h:264
const RuleFit * fRuleFit
Definition: RuleEnsemble.h:383
std::vector< TMVA::Rule * > & GetRules()
Definition: RuleEnsemble.h:268
const std::vector< Double_t > & GetVarImportance() const
Definition: RuleEnsemble.h:272
void CleanupRules()
cleanup rules
void Initialize(const RuleFit *rf)
Initializes all member variables with default values.
const std::vector< Double_t > & GetLinNorm() const
Definition: RuleEnsemble.h:270
Bool_t DoLinear() const
Definition: RuleEnsemble.h:257
std::vector< Double_t > fLinDM
Definition: RuleEnsemble.h:348
void CleanupLinear()
cleanup linear model
void RuleResponseStats()
calculate various statistics for this rule
std::vector< Double_t > fVarImportance
Definition: RuleEnsemble.h:354
UInt_t GetNLinear() const
Definition: RuleEnsemble.h:273
UInt_t GetRulesNCuts(int i) const
Definition: RuleEnsemble.h:279
Double_t GetRulePSS(int i) const
Definition: RuleEnsemble.h:288
Double_t GetRulePSB(int i) const
Definition: RuleEnsemble.h:289
void ClearLinCoefficients(Double_t val=0)
Definition: RuleEnsemble.h:124
const RuleFit * GetRuleFit() const
Definition: RuleEnsemble.h:251
Double_t EvalLinEvent() const
Definition: RuleEnsemble.h:564
void SetLinDM(const std::vector< Double_t > &xmin)
Definition: RuleEnsemble.h:116
void * AddXMLTo(void *parent) const
write rules to XML
std::vector< Double_t > fLinNorm
Definition: RuleEnsemble.h:350
void SetCoefficient(UInt_t i, Double_t v)
Definition: RuleEnsemble.h:110
const std::vector< TMVA::Rule * > & GetRulesConst() const
Definition: RuleEnsemble.h:267
Double_t GetLinNorm(int i) const
Definition: RuleEnsemble.h:282
void ClearLinNorm(Double_t val=1.0)
Definition: RuleEnsemble.h:125
const Event * GetEvent() const
Definition: RuleEnsemble.h:255
const MethodRuleFit * GetMethodRuleFit() const
Get a pointer to the original MethodRuleFit.
void ClearCoefficients(Double_t val=0)
Definition: RuleEnsemble.h:123
Double_t GetAverageRuleSigma() const
Definition: RuleEnsemble.h:296
std::vector< Double_t > fEventLinearVal
Definition: RuleEnsemble.h:375
void SetRuleMinDist(Double_t d)
Definition: RuleEnsemble.h:128
void MakeModel()
create model
void RuleStatistics()
calculate various statistics for this rule
Rule * GetRules(int i)
Definition: RuleEnsemble.h:277
void SetCoefficients(const std::vector< Double_t > &v)
set all rule coefficients
void Print() const
print function
const std::vector< Double_t > & GetLinImportance() const
Definition: RuleEnsemble.h:271
Double_t PdfRule(Double_t &nsig, Double_t &ntot) const
This function returns Pr( y = 1 | x ) for rules.
Double_t GetLinCoefficients(int i) const
Definition: RuleEnsemble.h:281
const MethodBase * GetMethodBase() const
Get a pointer to the original MethodRuleFit.
Double_t fAverageSupport
Definition: RuleEnsemble.h:356
friend std::ostream & operator<<(std::ostream &os, const RuleEnsemble &rules)
Double_t GetOffset() const
Definition: RuleEnsemble.h:265
Bool_t DoOnlyRules() const
Definition: RuleEnsemble.h:259
Double_t GetLinImportance(int i) const
Definition: RuleEnsemble.h:285
std::vector< Char_t > fLinTermOK
Definition: RuleEnsemble.h:346
Double_t GetAverageSupport() const
Definition: RuleEnsemble.h:295
void Copy(RuleEnsemble const &other)
copy function
MsgLogger * fLogger
Definition: RuleEnsemble.h:385
std::vector< std::vector< UInt_t > > fRuleMap
Definition: RuleEnsemble.h:378
Double_t CalcLinImportance()
calculate the linear importance for each rule
const std::vector< const TMVA::Event * > * fRuleMapEvents
Definition: RuleEnsemble.h:381
void SetAverageRuleSigma(Double_t v)
Definition: RuleEnsemble.h:137
Double_t CalcRuleImportance()
calculate importance of each rule
Bool_t IsLinTermOK(int i) const
Definition: RuleEnsemble.h:293
std::vector< TH1F * > fLinPDFS
Definition: RuleEnsemble.h:352
Double_t fImportanceRef
Definition: RuleEnsemble.h:355
void PrintRaw(std::ostream &os) const
write rules to stream
std::vector< Double_t > fRulePSB
Definition: RuleEnsemble.h:361
Double_t fAverageRuleSigma
Definition: RuleEnsemble.h:357
void CalcRuleSupport()
calculate the support for all rules
Double_t fImportanceCut
Definition: RuleEnsemble.h:342
void AddOffset(Double_t v)
Definition: RuleEnsemble.h:113
Double_t GetEventLinearVal(UInt_t i) const
Definition: RuleEnsemble.h:298
ELearningModel GetLearningModel() const
Definition: RuleEnsemble.h:262
Double_t PdfLinear(Double_t &nsig, Double_t &ntot) const
This function returns Pr( y = 1 | x ) for the linear terms.
Double_t CoefficientRadius()
Calculates sqrt(Sum(a_i^2)), i=1..N (NOTE do not include a0)
Bool_t DoFull() const
Definition: RuleEnsemble.h:261
void SetEvent(const Event &e)
Definition: RuleEnsemble.h:145
Double_t GetEventLinearValNorm(UInt_t i) const
Definition: RuleEnsemble.h:299
void MakeRulesFromTree(const DecisionTree *dtree)
create rules from the decision tree structure
void SetImportanceCut(Double_t minimp=0)
Definition: RuleEnsemble.h:131
const TMVA::Event * GetRuleMapEvent(UInt_t evtidx) const
Definition: RuleEnsemble.h:302
UInt_t GetNRules() const
Definition: RuleEnsemble.h:266
void MakeLinearTerms()
Make the linear terms as in eq 25, ref 2 For this the b and (1-b) quantiles are needed.
void operator=(const RuleEnsemble &other)
Definition: RuleEnsemble.h:240
const Rule * GetRulesConst(int i) const
Definition: RuleEnsemble.h:276
Rule * MakeTheRule(const Node *node)
Make a Rule from a given Node.
std::vector< Double_t > fRuleVarFrac
Definition: RuleEnsemble.h:359
void GetCoefficients(std::vector< Double_t > &v)
Retrieve all rule coefficients.
Double_t FStar() const
We want to estimate F* = argmin Eyx( L(y,F(x) ), min wrt F(x) F(x) = FL(x) + FR(x) ,...
A class implementing various fits of rule ensembles.
Definition: RuleFit.h:45
Implementation of a rule.
Definition: Rule.h:48
create variable transformations
std::ostream & operator<<(std::ostream &os, const BinaryTree &tree)
Short_t Max(Short_t a, Short_t b)
Definition: TMathBase.h:212
Double_t Sqrt(Double_t x)
Definition: TMath.h:679
Short_t Min(Short_t a, Short_t b)
Definition: TMathBase.h:180