Logo ROOT  
Reference Guide
 
Loading...
Searching...
No Matches
RuleEnsemble.h
Go to the documentation of this file.
1// @(#)root/tmva $Id$
2// Author: Andreas Hoecker, Joerg Stelzer, Fredrik Tegenfeldt, Helge Voss
3
4/**********************************************************************************
5 * Project: TMVA - a Root-integrated toolkit for multivariate data analysis *
6 * Package: TMVA *
7 * Class : RuleEnsemble *
8 * *
9 * *
10 * Description: *
11 * A class generating an ensemble of rules *
12 * Input: a forest of decision trees *
13 * Output: an ensemble of rules *
14 * *
15 * Authors (alphabetical): *
16 * Fredrik Tegenfeldt <Fredrik.Tegenfeldt@cern.ch> - Iowa State U., USA *
17 * Helge Voss <Helge.Voss@cern.ch> - MPI-KP Heidelberg, Ger. *
18 * *
19 * Copyright (c) 2005: *
20 * CERN, Switzerland *
21 * Iowa State U. *
22 * MPI-K Heidelberg, Germany *
23 * *
24 * Redistribution and use in source and binary forms, with or without *
25 * modification, are permitted according to the terms listed in LICENSE *
26 * (see tmva/doc/LICENSE) *
27 **********************************************************************************/
28
29#ifndef ROOT_TMVA_RuleEnsemble
30#define ROOT_TMVA_RuleEnsemble
31
32#include "TMath.h"
33
34#include "TMVA/DecisionTree.h"
35#include "TMVA/Event.h"
36#include "TMVA/Rule.h"
37#include "TMVA/Types.h"
38
39#include <vector>
40
41class TH1F;
42
43namespace TMVA {
44
45 class MethodBase;
46 class RuleFit;
47 class MethodRuleFit;
48 class RuleEnsemble;
49 class MsgLogger;
50
51 std::ostream& operator<<( std::ostream& os, const RuleEnsemble& event );
52
54
55 // output operator for a RuleEnsemble
56 friend std::ostream& operator<< ( std::ostream& os, const RuleEnsemble& rules );
57
58 public:
59
61
62 // main constructor
63 RuleEnsemble( RuleFit* rf );
64
65 // copy constructor
66 RuleEnsemble( const RuleEnsemble& other );
67
68 // empty constructor
70
71 // destructor
72 virtual ~RuleEnsemble();
73
74 // initialize
75 void Initialize( const RuleFit* rf );
76
77 // set message type
78 void SetMsgType( EMsgType t );
79
80 // makes the model - calls MakeRules() and MakeLinearTerms()
81 void MakeModel();
82
83 // generates the rules from a given forest of decision trees
84 void MakeRules( const std::vector< const TMVA::DecisionTree *>& forest );
85
86 // make the linear terms
87 void MakeLinearTerms();
88
89 // select linear model
91
92 // select rule model
94
95 // select full (linear+rules) model
97
98 // set rule collection (if not created by MakeRules())
99 void SetRules( const std::vector< TMVA::Rule *> & rules );
100
101 // set RuleFit ptr
102 void SetRuleFit( const RuleFit *rf ) { fRuleFit = rf; }
103
104 // set coefficients
105 void SetCoefficients( const std::vector< Double_t >& v );
106 void SetCoefficient( UInt_t i, Double_t v ) { if (i<fRules.size()) fRules[i]->SetCoefficient(v); }
107 //
108 void SetOffset(Double_t v=0.0) { fOffset=v; }
110 void SetLinCoefficients( const std::vector< Double_t >& v ) { fLinCoefficients = v; }
112 void SetLinDM( const std::vector<Double_t> & xmin ) { fLinDM = xmin; }
113 void SetLinDP( const std::vector<Double_t> & xmax ) { fLinDP = xmax; }
114 void SetLinNorm( const std::vector<Double_t> & norm ) { fLinNorm = norm; }
115
116 Double_t CalcLinNorm( Double_t stdev ) { return ( stdev>0 ? fAverageRuleSigma/stdev : 1.0 ); }
117
118 // clear coefficients
119 void ClearCoefficients( Double_t val=0 ) { for (UInt_t i=0; i<fRules.size(); i++) fRules[i]->SetCoefficient(val); }
120 void ClearLinCoefficients( Double_t val=0 ) { for (UInt_t i=0; i<fLinCoefficients.size(); i++) fLinCoefficients[i]=val; }
121 void ClearLinNorm( Double_t val=1.0 ) { for (UInt_t i=0; i<fLinNorm.size(); i++) fLinNorm[i]=val; }
122
123 // set maximum allowed distance between equal rules
125
126 // set minimum rule importance - used by CleanupRules()
127 void SetImportanceCut(Double_t minimp=0) { fImportanceCut=minimp; }
128
129 // set the quantile for linear terms
131
132 // set average sigma for rules
133 void SetAverageRuleSigma(Double_t v) { if (v>0.5) v=0.5; fAverageRuleSigma = v; fAverageSupport = 0.5*(1.0+TMath::Sqrt(1.0-4.0*v*v)); }
134
135 // Calculate the number of possible rules from a given tree
136 Int_t CalcNRules( const TMVA::DecisionTree* dtree );
137 // Recursively search for end-nodes; used by CalcNRules()
138 void FindNEndNodes( const TMVA::Node* node, Int_t& nendnodes );
139
140 // set current event to be used
141 void SetEvent( const Event & e ) { fEvent = &e; fEventCacheOK = kFALSE; }
142
143 // fill cached values of rule/linear respons
144 void UpdateEventVal();
145
146 // fill binary rule respons for all events (or selected subset)
147 void MakeRuleMap(const std::vector<const TMVA::Event *> *events=nullptr, UInt_t ifirst=0, UInt_t ilast=0);
148
149 // clear rule map
150 void ClearRuleMap() { fRuleMap.clear(); fRuleMapEvents=nullptr; }
151
152 // evaluates the event using the ensemble of rules
153 // the following uses fEventCache, that is per event saved in cache
154 Double_t EvalEvent() const;
155 Double_t EvalEvent( const Event & e );
156
157 // same as previous but using other model coefficients
159 const std::vector<Double_t> & coefs,
160 const std::vector<Double_t> & lincoefs) const;
161 Double_t EvalEvent( const Event & e,
162 Double_t ofs,
163 const std::vector<Double_t> & coefs,
164 const std::vector<Double_t> & lincoefs);
165
166 // same as above but using the event index
167 // these will use fRuleMap - MUST call MakeRuleMap() before - no check...
168 Double_t EvalEvent( UInt_t evtidx ) const;
169 Double_t EvalEvent( UInt_t evtidx,
170 Double_t ofs,
171 const std::vector<Double_t> & coefs,
172 const std::vector<Double_t> & lincoefs) const;
173
174 // evaluate the linear term using event by reference
175 // Double_t EvalLinEvent( UInt_t vind ) const;
176 Double_t EvalLinEvent() const;
177 Double_t EvalLinEvent( const std::vector<Double_t> & coefs ) const;
178 Double_t EvalLinEvent( const Event &e );
179 Double_t EvalLinEvent( const Event &e, UInt_t vind );
180 Double_t EvalLinEvent( const Event &e, const std::vector<Double_t> & coefs );
181
182 // idem but using evtidx - must call MakeRuleMap() first
183 Double_t EvalLinEvent( UInt_t evtidx ) const;
184 Double_t EvalLinEvent( UInt_t evtidx, const std::vector<Double_t> & coefs ) const;
185 Double_t EvalLinEvent( UInt_t evtidx, UInt_t vind ) const;
186 Double_t EvalLinEvent( UInt_t evtidx, UInt_t vind, Double_t coefs ) const;
187
188 // evaluate linear terms used to fill fEventLinearVal
189 Double_t EvalLinEventRaw( UInt_t vind, const Event &e, Bool_t norm ) const;
190 Double_t EvalLinEventRaw( UInt_t vind, UInt_t evtidx, Bool_t norm ) const;
191
192 // calculate p(y=1|x) for a given event using the linear terms
193 Double_t PdfLinear( Double_t & nsig, Double_t & ntot ) const;
194
195 // calculate p(y=1|x) for a given event using the rules
196 Double_t PdfRule( Double_t & nsig, Double_t & ntot ) const;
197
198 // calculate F* = 2*p(y=1|x) - 1
199 Double_t FStar() const;
200 Double_t FStar(const TMVA::Event & e );
201
202 // set reference importance for all model objects
203 void SetImportanceRef(Double_t impref);
204
205 // calculates the support for all rules given the set of events
206 void CalcRuleSupport();
207
208 // calculates rule importance
209 void CalcImportance();
210
211 // calculates rule importance
213
214 // calculates linear importance
216
217 // calculates variable importance
218 void CalcVarImportance();
219
220 // remove rules of low importance
221 void CleanupRules();
222
223 // remove linear terms of low importance
224 void CleanupLinear();
225
226 // remove similar rules
227 void RemoveSimilarRules();
228
229 // get rule statistics
230 void RuleStatistics();
231
232 // get rule response stats
233 void RuleResponseStats();
234
235 // copy operator
236 void operator=( const RuleEnsemble& other ) { Copy( other ); }
237
238 // calculate sum of the squared coefficients
240
241 // fill the vector with the coefficients
242 void GetCoefficients( std::vector< Double_t >& v );
243
244 // accessors
245 const MethodRuleFit* GetMethodRuleFit() const;
246 const MethodBase* GetMethodBase() const;
247 const RuleFit* GetRuleFit() const { return fRuleFit; }
248 //
249 const std::vector<const TMVA::Event *>* GetTrainingEvents() const;
250 const Event* GetTrainingEvent(UInt_t i) const;
251 const Event* GetEvent() const { return fEvent; }
252 //
257 Bool_t DoFull() const { return (fLearningModel==kFull); }
261 Double_t GetOffset() const { return fOffset; }
262 UInt_t GetNRules() const { return (DoRules() ? fRules.size():0); }
263 const std::vector<TMVA::Rule*>& GetRulesConst() const { return fRules; }
264 std::vector<TMVA::Rule*>& GetRules() { return fRules; }
265 const std::vector< Double_t >& GetLinCoefficients() const { return fLinCoefficients; }
266 const std::vector< Double_t >& GetLinNorm() const { return fLinNorm; }
267 const std::vector< Double_t >& GetLinImportance() const { return fLinImportance; }
268 const std::vector< Double_t >& GetVarImportance() const { return fVarImportance; }
269 UInt_t GetNLinear() const { return (DoLinear() ? fLinNorm.size():0); }
271
272 const Rule *GetRulesConst(int i) const { return fRules[i]; }
273 Rule *GetRules(int i) { return fRules[i]; }
274
275 UInt_t GetRulesNCuts(int i) const { return fRules[i]->GetRuleCut()->GetNcuts(); }
277 Double_t GetLinCoefficients(int i) const { return fLinCoefficients[i]; }
278 Double_t GetLinNorm(int i) const { return fLinNorm[i]; }
279 Double_t GetLinDM(int i) const { return fLinDM[i]; }
280 Double_t GetLinDP(int i) const { return fLinDP[i]; }
281 Double_t GetLinImportance(int i) const { return fLinImportance[i]; }
282 Double_t GetVarImportance(int i) const { return fVarImportance[i]; }
283 Double_t GetRulePTag(int i) const { return fRulePTag[i]; }
284 Double_t GetRulePSS(int i) const { return fRulePSS[i]; }
285 Double_t GetRulePSB(int i) const { return fRulePSB[i]; }
286 Double_t GetRulePBS(int i) const { return fRulePBS[i]; }
287 Double_t GetRulePBB(int i) const { return fRulePBB[i]; }
288
289 Bool_t IsLinTermOK(int i) const { return fLinTermOK[i]; }
290 //
293 Double_t GetEventRuleVal(UInt_t i) const { return (fEventRuleVal[i] ? 1.0:0.0); }
296 //
297 const std::vector<UInt_t> & GetEventRuleMap(UInt_t evtidx) const { return fRuleMap[evtidx]; }
298 const TMVA::Event *GetRuleMapEvent(UInt_t evtidx) const { return (*fRuleMapEvents)[evtidx]; }
299 Bool_t IsRuleMapOK() const { return fRuleMapOK; }
300
301 // print rule generation info
302 void PrintRuleGen() const;
303
304 // print the ensemble
305 void Print() const;
306
307 // print the model in a cryptic way
308 void PrintRaw ( std::ostream& os ) const; // obsolete
309 void* AddXMLTo ( void* parent ) const;
310
311 // read the model from input stream
312 void ReadRaw ( std::istream& istr ); // obsolete
313 void ReadFromXML( void* wghtnode );
314
315
316 private:
317
318 // delete all rules
319 void DeleteRules() { for (UInt_t i=0; i<fRules.size(); i++) delete fRules[i]; fRules.clear(); }
320
321 // copy method
322 void Copy( RuleEnsemble const& other );
323
324 // set all coeffs to default values
325 void ResetCoefficients();
326
327 // make rules form one decision tree
328 void MakeRulesFromTree( const DecisionTree *dtree );
329
330 // add a rule with the given end-node
331 void AddRule( const Node *node );
332
333 // make a rule
334 Rule *MakeTheRule( const Node *node );
335
336
337 ELearningModel fLearningModel; ///< can be full (rules+linear), rules, linear
338 Double_t fImportanceCut; ///< minimum importance accepted
339 Double_t fLinQuantile; ///< quantile cut to remove outliers
340 Double_t fOffset; ///< offset in discriminator function
341 std::vector< TMVA::Rule* > fRules; ///< vector of rules
342 std::vector< Char_t > fLinTermOK; ///< flags linear terms with sufficient strong importance <-- stores boolean
343 std::vector< Double_t > fLinDP; ///< delta+ in eq 24, ref 2
344 std::vector< Double_t > fLinDM; ///< delta-
345 std::vector< Double_t > fLinCoefficients; ///< linear coefficients, one per variable
346 std::vector< Double_t > fLinNorm; ///< norm of ditto, see after eq 26 in ref 2
347 std::vector< TH1F* > fLinPDFB; ///< pdfs for each variable, background
348 std::vector< TH1F* > fLinPDFS; ///< pdfs for each variable, signal
349 std::vector< Double_t > fLinImportance; ///< linear term importance
350 std::vector< Double_t > fVarImportance; ///< one importance per input variable
351 Double_t fImportanceRef; ///< reference importance (max)
352 Double_t fAverageSupport; ///< average support (over all rules)
353 Double_t fAverageRuleSigma; ///< average rule sigma
354 //
355 std::vector< Double_t > fRuleVarFrac; ///< fraction of rules using a given variable - size of vector = n(variables)
356 std::vector< Double_t > fRulePSS; ///< p(tag as S|S) - tagged as S if rule is SIG and the event is accepted
357 std::vector< Double_t > fRulePSB; ///< p(tag as S|B)
358 std::vector< Double_t > fRulePBS; ///< p(tag as B|S)
359 std::vector< Double_t > fRulePBB; ///< p(tag as B|B)
360 std::vector< Double_t > fRulePTag; ///< p(tag)
361 Double_t fRuleFSig; ///< N(sig)/N(sig)+N(bkg)
362 Double_t fRuleNCave; ///< N(cuts) average
363 Double_t fRuleNCsig; ///< idem sigma
364 //
365 Double_t fRuleMinDist; ///< minimum rule distance
366 UInt_t fNRulesGenerated; ///< number of rules generated, before cleanup
367 //
368 const Event* fEvent; ///< current event.
369 Bool_t fEventCacheOK; ///< true if rule/linear respons are updated
370 std::vector<Char_t> fEventRuleVal; ///< the rule respons of current event <----- stores boolean
371 std::vector<Double_t> fEventLinearVal; ///< linear respons
372 //
373 Bool_t fRuleMapOK; ///< true if MakeRuleMap() has been called
374 std::vector< std::vector<UInt_t> > fRuleMap; ///< map of rule responses
375 UInt_t fRuleMapInd0; ///< start index
376 UInt_t fRuleMapInd1; ///< last index
377 const std::vector<const TMVA::Event *> *fRuleMapEvents; ///< pointer to vector of events used
378 //
379 const RuleFit* fRuleFit; ///< pointer to rule fit object
380
381 mutable MsgLogger* fLogger; ///<! message logger
382 MsgLogger& Log() const { return *fLogger; }
383 };
384}
385
386//_______________________________________________________________________
388{
389 //
390 // Update rule and linear respons using the current event
391 //
392 if (fEventCacheOK) return;
393 //
394 if (DoRules()) {
395 UInt_t nrules = fRules.size();
396 fEventRuleVal.resize(nrules,kFALSE);
397 for (UInt_t r=0; r<nrules; r++) {
398 fEventRuleVal[r] = fRules[r]->EvalEvent(*fEvent);
399 }
400 }
401 if (DoLinear()) {
402 UInt_t nlin = fLinTermOK.size();
403 fEventLinearVal.resize(nlin,0);
404 for (UInt_t r=0; r<nlin; r++) {
405 fEventLinearVal[r] = EvalLinEventRaw(r,*fEvent,kFALSE); // not normalised!
406 }
407 }
409}
410
411//_____________________________________________________________________
413{
414 // evaluate current event
415
416 Int_t nrules = fRules.size();
417 Double_t rval=fOffset;
418 Double_t linear=0;
419 //
420 // evaluate all rules
421 // normally it should NOT use the normalized rules - the flag should be kFALSE
422 //
423 if (DoRules()) {
424 for ( Int_t i=0; i<nrules; i++ ) {
425 if (fEventRuleVal[i])
426 rval += fRules[i]->GetCoefficient();
427 }
428 }
429 //
430 // Include linear part - the call below incorporates both coefficient and normalisation (fLinNorm)
431 //
432 if (DoLinear()) linear = EvalLinEvent();
433 rval +=linear;
434
435 return rval;
436}
437
438//_____________________________________________________________________
440 const std::vector<Double_t> & coefs,
441 const std::vector<Double_t> & lincoefs ) const
442{
443 // evaluate current event with given offset and coefs
444
445 Int_t nrules = fRules.size();
446 Double_t rval = ofs;
447 Double_t linear = 0;
448 //
449 // evaluate all rules
450 //
451 if (DoRules()) {
452 for ( Int_t i=0; i<nrules; i++ ) {
453 if (fEventRuleVal[i])
454 rval += coefs[i];
455 }
456 }
457 //
458 // Include linear part - the call below incorporates both coefficient and normalisation (fLinNorm)
459 //
460 if (DoLinear()) linear = EvalLinEvent(lincoefs);
461 rval +=linear;
462
463 return rval;
464}
465
466//_____________________________________________________________________
468{
469 // evaluate event e
470 SetEvent(e);
471 UpdateEventVal();
472 return EvalEvent();
473}
474
475//_____________________________________________________________________
477 Double_t ofs,
478 const std::vector<Double_t> & coefs,
479 const std::vector<Double_t> & lincoefs )
480{
481 // evaluate event e
482 SetEvent(e);
483 UpdateEventVal();
484 return EvalEvent(ofs,coefs,lincoefs);
485}
486
487//_____________________________________________________________________
489{
490 // evaluate event with index evtidx
491 if ((evtidx<fRuleMapInd0) || (evtidx>fRuleMapInd1)) return 0;
492 //
493 Double_t rval=fOffset;
494 if (DoRules()) {
495 UInt_t nrules = fRuleMap[evtidx].size();
496 UInt_t rind;
497 for (UInt_t ir = 0; ir<nrules; ir++) {
498 rind = fRuleMap[evtidx][ir];
499 rval += fRules[rind]->GetCoefficient();
500 }
501 }
502 if (DoLinear()) {
503 UInt_t nlin = fLinTermOK.size();
504 for (UInt_t r=0; r<nlin; r++) {
505 if (fLinTermOK[r]) {
506 rval += fLinCoefficients[r] * EvalLinEventRaw(r,*(*fRuleMapEvents)[evtidx],kTRUE);
507 }
508 }
509 }
510 return rval;
511}
512
513//_____________________________________________________________________
515 Double_t ofs,
516 const std::vector<Double_t> & coefs,
517 const std::vector<Double_t> & lincoefs ) const
518{
519 // evaluate event with index evtidx and user given model coefficients
520 //
521 if ((evtidx<fRuleMapInd0) || (evtidx>fRuleMapInd1)) return 0;
522 Double_t rval=ofs;
523 if (DoRules()) {
524 UInt_t nrules = fRuleMap[evtidx].size();
525 UInt_t rind;
526 for (UInt_t ir = 0; ir<nrules; ir++) {
527 rind = fRuleMap[evtidx][ir];
528 rval += coefs[rind];
529 }
530 }
531 if (DoLinear()) {
532 rval += EvalLinEvent( evtidx, lincoefs );
533 }
534 return rval;
535}
536
537//_______________________________________________________________________
539{
540 // evaluate the event linearly (not normalized)
541
542 Double_t val = e.GetValue(vind);
543 Double_t rval = TMath::Min( fLinDP[vind], TMath::Max( fLinDM[vind], val ) );
544 if (norm) rval *= fLinNorm[vind];
545 return rval;
546}
547
548//_______________________________________________________________________
550{
551 // evaluate the event linearly (not normalized)
552
553 Double_t val = (*fRuleMapEvents)[evtidx]->GetValue(vind);
554 Double_t rval = TMath::Min( fLinDP[vind], TMath::Max( fLinDM[vind], val ) );
555 if (norm) rval *= fLinNorm[vind];
556 return rval;
557}
558
559//_______________________________________________________________________
561{
562 // evaluate event linearly
563
564 Double_t rval=0;
565 for (UInt_t v=0; v<fLinTermOK.size(); v++) {
566 if (fLinTermOK[v])
567 rval += fLinCoefficients[v]*fEventLinearVal[v]*fLinNorm[v];
568 }
569 return rval;
570}
571
572//_______________________________________________________________________
573inline Double_t TMVA::RuleEnsemble::EvalLinEvent(const std::vector<Double_t> & coefs) const
574{
575 // evaluate event linearly using the given coefficients
576
577 Double_t rval=0;
578 for (UInt_t v=0; v<fLinTermOK.size(); v++) {
579 if (fLinTermOK[v])
580 rval += coefs[v]*fEventLinearVal[v]*fLinNorm[v];
581 }
582 return rval;
583}
584
585//_______________________________________________________________________
587{
588 // evaluate event linearly
589
590 SetEvent(e);
591 UpdateEventVal();
592 return EvalLinEvent();
593}
594
595//_______________________________________________________________________
597{
598 // evaluate linear term vind
599
600 SetEvent(e);
601 UpdateEventVal();
602 return GetEventLinearValNorm(vind);
603}
604
605//_______________________________________________________________________
606inline Double_t TMVA::RuleEnsemble::EvalLinEvent( const TMVA::Event& e, const std::vector<Double_t> & coefs )
607{
608 // evaluate event linearly using the given coefficients
609
610 SetEvent(e);
611 UpdateEventVal();
612 return EvalLinEvent(coefs);
613}
614
615//_______________________________________________________________________
616inline Double_t TMVA::RuleEnsemble::EvalLinEvent( UInt_t evtidx, const std::vector<Double_t> & coefs ) const
617{
618 // evaluate event linearly using the given coefficients
619 if ((evtidx<fRuleMapInd0) || (evtidx>fRuleMapInd1)) return 0;
620 Double_t rval=0;
621 UInt_t nlin = fLinTermOK.size();
622 for (UInt_t r=0; r<nlin; r++) {
623 if (fLinTermOK[r]) {
624 rval += coefs[r] * EvalLinEventRaw(r,*(*fRuleMapEvents)[evtidx],kTRUE);
625 }
626 }
627 return rval;
628}
629
630//_______________________________________________________________________
632{
633 // evaluate event linearly using the given coefficients
634 if ((evtidx<fRuleMapInd0) || (evtidx>fRuleMapInd1)) return 0;
635 Double_t rval=0;
636 UInt_t nlin = fLinTermOK.size();
637 for (UInt_t r=0; r<nlin; r++) {
638 if (fLinTermOK[r]) {
639 rval += fLinCoefficients[r] * EvalLinEventRaw(r,*(*fRuleMapEvents)[evtidx],kTRUE);
640 }
641 }
642 return rval;
643}
644
645//_______________________________________________________________________
647{
648 // evaluate event linearly using the given coefficients
649 if ((evtidx<fRuleMapInd0) || (evtidx>fRuleMapInd1)) return 0;
650 Double_t rval;
651 rval = fLinCoefficients[vind] * EvalLinEventRaw(vind,*(*fRuleMapEvents)[evtidx],kTRUE);
652 return rval;
653}
654
655//_______________________________________________________________________
657{
658 // evaluate event linearly using the given coefficients
659 if ((evtidx<fRuleMapInd0) || (evtidx>fRuleMapInd1)) return 0;
660 Double_t rval;
661 rval = coefs * EvalLinEventRaw(vind,*(*fRuleMapEvents)[evtidx],kTRUE);
662 return rval;
663}
664
665#endif
#define d(i)
Definition RSha256.hxx:102
#define e(i)
Definition RSha256.hxx:103
bool Bool_t
Definition RtypesCore.h:63
unsigned int UInt_t
Definition RtypesCore.h:46
constexpr Bool_t kFALSE
Definition RtypesCore.h:94
double Double_t
Definition RtypesCore.h:59
constexpr Bool_t kTRUE
Definition RtypesCore.h:93
TBuffer & operator<<(TBuffer &buf, const Tmpl *obj)
Definition TBuffer.h:397
Option_t Option_t TPoint TPoint const char GetTextMagnitude GetFillStyle GetLineColor GetLineWidth GetMarkerStyle GetTextAlign GetTextColor GetTextSize void char Point_t Rectangle_t WindowAttributes_t Float_t r
float xmin
float * q
float xmax
1-D histogram with a float per channel (see TH1 documentation)
Definition TH1.h:622
Implementation of a Decision Tree.
Virtual base Class for all MVA method.
Definition MethodBase.h:111
J Friedman's RuleFit method.
ostringstream derivative to redirect and format output
Definition MsgLogger.h:57
Node for the BinarySearch or Decision Trees.
Definition Node.h:58
std::vector< Double_t > fRulePBB
p(tag as B|B)
Bool_t IsRuleMapOK() const
MsgLogger & Log() const
void SetLinCoefficients(const std::vector< Double_t > &v)
Double_t GetLinDP(int i) const
virtual ~RuleEnsemble()
destructor
Double_t EvalEvent() const
const std::vector< UInt_t > & GetEventRuleMap(UInt_t evtidx) const
Double_t GetRulePBB(int i) const
void CalcVarImportance()
Calculates variable importance using eq (35) in RuleFit paper by Friedman et.al.
std::vector< Double_t > fLinImportance
linear term importance
void SetLinCoefficient(UInt_t i, Double_t v)
Double_t GetLinDM(int i) const
void SetOffset(Double_t v=0.0)
void SetImportanceRef(Double_t impref)
set reference importance
void CalcImportance()
calculate the importance of each rule
void PrintRuleGen() const
print rule generation info
Int_t CalcNRules(const TMVA::DecisionTree *dtree)
calculate the number of rules
std::vector< Double_t > fLinCoefficients
linear coefficients, one per variable
UInt_t fNRulesGenerated
number of rules generated, before cleanup
void ResetCoefficients()
reset all rule coefficients
Bool_t DoRules() const
std::vector< Double_t > fRulePBS
p(tag as B|S)
void SetMsgType(EMsgType t)
std::vector< TMVA::Rule * > fRules
vector of rules
Bool_t DoOnlyLinear() const
void SetLinQuantile(Double_t q)
Double_t CalcLinNorm(Double_t stdev)
Double_t fOffset
offset in discriminator function
Double_t GetLinQuantile() const
Double_t GetVarImportance(int i) const
void ReadRaw(std::istream &istr)
read rule ensemble from stream
std::vector< Double_t > fRulePSS
p(tag as S|S) - tagged as S if rule is SIG and the event is accepted
std::vector< Double_t > fLinDP
delta+ in eq 24, ref 2
Double_t EvalLinEventRaw(UInt_t vind, const Event &e, Bool_t norm) const
const Event * fEvent
current event.
void AddRule(const Node *node)
add a new rule to the tree
Double_t GetRulePTag(int i) const
void ReadFromXML(void *wghtnode)
read rules from XML
void SetLinDP(const std::vector< Double_t > &xmax)
void SetRuleFit(const RuleFit *rf)
Double_t GetImportanceCut() const
const Event * GetTrainingEvent(UInt_t i) const
get the training event from the rule fitter
const std::vector< const TMVA::Event * > * GetTrainingEvents() const
get list of training events from the rule fitter
Double_t GetRuleMinDist() const
void SetLinNorm(const std::vector< Double_t > &norm)
void SetRules(const std::vector< TMVA::Rule * > &rules)
set rules
void MakeRules(const std::vector< const TMVA::DecisionTree * > &forest)
Makes rules from the given decision tree.
void RemoveSimilarRules()
remove rules that behave similar
std::vector< Double_t > fRulePTag
p(tag)
std::vector< TH1F * > fLinPDFB
pdfs for each variable, background
std::vector< Char_t > fEventRuleVal
the rule respons of current event <--— stores boolean
Double_t fRuleFSig
N(sig)/N(sig)+N(bkg)
ELearningModel fLearningModel
can be full (rules+linear), rules, linear
Double_t fRuleMinDist
minimum rule distance
void FindNEndNodes(const TMVA::Node *node, Int_t &nendnodes)
find the number of leaf nodes
Double_t GetRulePBS(int i) const
Bool_t fRuleMapOK
true if MakeRuleMap() has been called
RuleEnsemble()
constructor
Double_t GetEventRuleVal(UInt_t i) const
const std::vector< Double_t > & GetLinCoefficients() const
Double_t GetImportanceRef() const
const RuleFit * fRuleFit
pointer to rule fit object
std::vector< TMVA::Rule * > & GetRules()
UInt_t fRuleMapInd1
last index
const std::vector< Double_t > & GetVarImportance() const
void CleanupRules()
cleanup rules
void Initialize(const RuleFit *rf)
Initializes all member variables with default values.
const std::vector< Double_t > & GetLinNorm() const
Bool_t DoLinear() const
std::vector< Double_t > fLinDM
delta-
void CleanupLinear()
cleanup linear model
void RuleResponseStats()
calculate various statistics for this rule
std::vector< Double_t > fVarImportance
one importance per input variable
UInt_t GetNLinear() const
UInt_t GetRulesNCuts(int i) const
Double_t GetRulePSS(int i) const
Double_t GetRulePSB(int i) const
void ClearLinCoefficients(Double_t val=0)
const RuleFit * GetRuleFit() const
Double_t EvalLinEvent() const
void SetLinDM(const std::vector< Double_t > &xmin)
void * AddXMLTo(void *parent) const
write rules to XML
std::vector< Double_t > fLinNorm
norm of ditto, see after eq 26 in ref 2
void SetCoefficient(UInt_t i, Double_t v)
const std::vector< TMVA::Rule * > & GetRulesConst() const
Double_t fLinQuantile
quantile cut to remove outliers
Double_t GetLinNorm(int i) const
void ClearLinNorm(Double_t val=1.0)
const Event * GetEvent() const
const MethodRuleFit * GetMethodRuleFit() const
Get a pointer to the original MethodRuleFit.
void ClearCoefficients(Double_t val=0)
Double_t GetAverageRuleSigma() const
std::vector< Double_t > fEventLinearVal
linear respons
void SetRuleMinDist(Double_t d)
void MakeModel()
create model
void RuleStatistics()
calculate various statistics for this rule
Rule * GetRules(int i)
void SetCoefficients(const std::vector< Double_t > &v)
set all rule coefficients
void Print() const
print function
const std::vector< Double_t > & GetLinImportance() const
Double_t PdfRule(Double_t &nsig, Double_t &ntot) const
This function returns Pr( y = 1 | x ) for rules.
void MakeRuleMap(const std::vector< const TMVA::Event * > *events=nullptr, UInt_t ifirst=0, UInt_t ilast=0)
Makes rule map for all events.
Double_t GetLinCoefficients(int i) const
const MethodBase * GetMethodBase() const
Get a pointer to the original MethodRuleFit.
Double_t fAverageSupport
average support (over all rules)
friend std::ostream & operator<<(std::ostream &os, const RuleEnsemble &rules)
Double_t GetOffset() const
Bool_t DoOnlyRules() const
Double_t fRuleNCave
N(cuts) average.
Double_t GetLinImportance(int i) const
std::vector< Char_t > fLinTermOK
flags linear terms with sufficient strong importance <– stores boolean
Double_t GetAverageSupport() const
void Copy(RuleEnsemble const &other)
copy function
MsgLogger * fLogger
! message logger
std::vector< std::vector< UInt_t > > fRuleMap
map of rule responses
Double_t CalcLinImportance()
calculate the linear importance for each rule
const std::vector< const TMVA::Event * > * fRuleMapEvents
pointer to vector of events used
void SetAverageRuleSigma(Double_t v)
Double_t CalcRuleImportance()
calculate importance of each rule
Bool_t IsLinTermOK(int i) const
std::vector< TH1F * > fLinPDFS
pdfs for each variable, signal
Double_t fImportanceRef
reference importance (max)
void PrintRaw(std::ostream &os) const
write rules to stream
std::vector< Double_t > fRulePSB
p(tag as S|B)
Double_t fAverageRuleSigma
average rule sigma
Bool_t fEventCacheOK
true if rule/linear respons are updated
void CalcRuleSupport()
calculate the support for all rules
Double_t fImportanceCut
minimum importance accepted
void AddOffset(Double_t v)
Double_t GetEventLinearVal(UInt_t i) const
ELearningModel GetLearningModel() const
Double_t PdfLinear(Double_t &nsig, Double_t &ntot) const
This function returns Pr( y = 1 | x ) for the linear terms.
Double_t CoefficientRadius()
Calculates sqrt(Sum(a_i^2)), i=1..N (NOTE do not include a0)
Bool_t DoFull() const
void SetEvent(const Event &e)
Double_t GetEventLinearValNorm(UInt_t i) const
UInt_t fRuleMapInd0
start index
void MakeRulesFromTree(const DecisionTree *dtree)
create rules from the decision tree structure
void SetImportanceCut(Double_t minimp=0)
const TMVA::Event * GetRuleMapEvent(UInt_t evtidx) const
Double_t fRuleNCsig
idem sigma
UInt_t GetNRules() const
void MakeLinearTerms()
Make the linear terms as in eq 25, ref 2 For this the b and (1-b) quantiles are needed.
void operator=(const RuleEnsemble &other)
const Rule * GetRulesConst(int i) const
Rule * MakeTheRule(const Node *node)
Make a Rule from a given Node.
std::vector< Double_t > fRuleVarFrac
fraction of rules using a given variable - size of vector = n(variables)
void GetCoefficients(std::vector< Double_t > &v)
Retrieve all rule coefficients.
Double_t FStar() const
We want to estimate F* = argmin Eyx( L(y,F(x) ), min wrt F(x) F(x) = FL(x) + FR(x) ,...
A class implementing various fits of rule ensembles.
Definition RuleFit.h:46
Implementation of a rule.
Definition Rule.h:50
EMsgType
Definition Types.h:55
create variable transformations
Short_t Max(Short_t a, Short_t b)
Returns the largest of a and b.
Definition TMathBase.h:250
Double_t Sqrt(Double_t x)
Returns the square root of x.
Definition TMath.h:662
Short_t Min(Short_t a, Short_t b)
Returns the smallest of a and b.
Definition TMathBase.h:198