Logo ROOT   6.10/09
Reference Guide
RuleFit.cxx
Go to the documentation of this file.
1 // @(#)root/tmva $Id$
2 // Author: Andreas Hoecker, Joerg Stelzer, Fredrik Tegenfeldt, Helge Voss
3 
4 /**********************************************************************************
5  * Project: TMVA - a Root-integrated toolkit for multivariate data analysis *
6  * Package: TMVA *
7  * Class : Rule *
8  * Web : http://tmva.sourceforge.net *
9  * *
10  * Description: *
11  * A class describing a 'rule' *
12  * Each internal node of a tree defines a rule from all the parental nodes. *
13  * A rule with 0 or 1 nodes in the list is a root rule -> corresponds to a0. *
14  * Input: a decision tree (in the constructor) *
15  * its coefficient *
16  * *
17  * *
18  * Authors (alphabetical): *
19  * Fredrik Tegenfeldt <Fredrik.Tegenfeldt@cern.ch> - Iowa State U., USA *
20  * *
21  * Copyright (c) 2005: *
22  * CERN, Switzerland *
23  * Iowa State U. *
24  * MPI-K Heidelberg, Germany *
25  * *
26  * Redistribution and use in source and binary forms, with or without *
27  * modification, are permitted according to the terms listed in LICENSE *
28  * (http://tmva.sourceforge.net/LICENSE) *
29  **********************************************************************************/
30 
31 /*! \class TMVA::RuleFit
32 \ingroup TMVA
33 A class implementing various fits of rule ensembles
34 */
35 #include "TMVA/RuleFit.h"
36 
37 #include "TMVA/DataSet.h"
38 #include "TMVA/DecisionTree.h"
39 #include "TMVA/Event.h"
40 #include "TMVA/Factory.h" // for root base dir
41 #include "TMVA/GiniIndex.h"
42 #include "TMVA/MethodBase.h"
43 #include "TMVA/MethodRuleFit.h"
44 #include "TMVA/MsgLogger.h"
45 #include "TMVA/Timer.h"
46 #include "TMVA/Tools.h"
47 #include "TMVA/Types.h"
48 #include "TMVA/SeparationBase.h"
49 
50 #include "TDirectory.h"
51 #include "TH2F.h"
52 #include "TFile.h"
53 #include "TKey.h"
54 #include "TRandom3.h"
55 #include "TROOT.h" // for gROOT
56 
57 #include <algorithm>
58 
60 
61 ////////////////////////////////////////////////////////////////////////////////
62 /// constructor
63 
64 TMVA::RuleFit::RuleFit( const MethodBase *rfbase )
65 : fVisHistsUseImp( kTRUE ),
66  fLogger( new MsgLogger("RuleFit") )
67 {
68  Initialize( rfbase );
69  std::srand( randSEED ); // initialize random number generator used by std::random_shuffle
70 }
71 
72 ////////////////////////////////////////////////////////////////////////////////
73 /// default constructor
74 
76  : fNTreeSample(0)
77  , fNEveEffTrain(0)
78  , fMethodRuleFit(0)
79  , fMethodBase(0)
80  , fVisHistsUseImp( kTRUE )
81  , fLogger( new MsgLogger("RuleFit") )
82 {
83  std::srand( randSEED ); // initialize random number generator used by std::random_shuffle
84 }
85 
86 ////////////////////////////////////////////////////////////////////////////////
87 /// destructor
88 
90 {
91  delete fLogger;
92 }
93 
94 ////////////////////////////////////////////////////////////////////////////////
95 /// init effective number of events (using event weights)
96 
98 {
99  UInt_t neve = fTrainingEvents.size();
100  if (neve==0) return;
101  //
103  //
104 }
105 
106 ////////////////////////////////////////////////////////////////////////////////
107 /// initialize pointers
108 
109 void TMVA::RuleFit::InitPtrs( const MethodBase *rfbase )
110 {
111  this->SetMethodBase(rfbase);
112  fRuleEnsemble.Initialize( this );
113  fRuleFitParams.SetRuleFit( this );
114 }
115 
116 ////////////////////////////////////////////////////////////////////////////////
117 /// initialize the parameters of the RuleFit method and make rules
118 
120 {
121  InitPtrs(rfbase);
122 
123  if (fMethodRuleFit){
126  std::vector<const TMVA::Event*> tmp;
127  for (Long64_t ievt=0; ievt<nevents; ievt++) {
128  const Event *event = fMethodRuleFit->GetEvent(ievt);
129  tmp.push_back(event);
130  }
131  SetTrainingEvents( tmp );
132  }
133  // SetTrainingEvents( fMethodRuleFit->GetTrainingEvents() );
134 
135  InitNEveEff();
136 
137  MakeForest();
138 
139  // Make the model - Rule + Linear (if fDoLinear is true)
141 
142  // init rulefit params
144 
145 }
146 
147 ////////////////////////////////////////////////////////////////////////////////
148 /// set MethodBase
149 
151 {
152  fMethodBase = rfbase;
153  fMethodRuleFit = dynamic_cast<const MethodRuleFit *>(rfbase);
154 }
155 
156 ////////////////////////////////////////////////////////////////////////////////
157 /// copy method
158 
159 void TMVA::RuleFit::Copy( const RuleFit& other )
160 {
161  if(this != &other) {
163  fMethodBase = other.GetMethodBase();
165  // fSubsampleEvents = other.GetSubsampleEvents();
166 
167  fForest = other.GetForest();
168  fRuleEnsemble = other.GetRuleEnsemble();
169  }
170 }
171 
172 ////////////////////////////////////////////////////////////////////////////////
173 /// calculate the sum of weights
174 
175 Double_t TMVA::RuleFit::CalcWeightSum( const std::vector<const Event *> *events, UInt_t neve )
176 {
177  if (events==0) return 0.0;
178  if (neve==0) neve=events->size();
179  //
180  Double_t sumw=0;
181  for (UInt_t ie=0; ie<neve; ie++) {
182  sumw += ((*events)[ie])->GetWeight();
183  }
184  return sumw;
185 }
186 
187 ////////////////////////////////////////////////////////////////////////////////
188 /// set the current message type to that of mlog for this class and all other subtools
189 
190 void TMVA::RuleFit::SetMsgType( EMsgType t )
191 {
192  fLogger->SetMinType(t);
195 }
196 
197 ////////////////////////////////////////////////////////////////////////////////
198 /// build the decision tree using fNTreeSample events from fTrainingEventsRndm
199 
201 {
202  if (dt==0) return;
203  if (fMethodRuleFit==0) {
204  Log() << kFATAL << "RuleFit::BuildTree() - Attempting to build a tree NOT from a MethodRuleFit" << Endl;
205  }
206  std::vector<const Event *> evevec;
207  for (UInt_t ie=0; ie<fNTreeSample; ie++) {
208  evevec.push_back(fTrainingEventsRndm[ie]);
209  }
210  dt->BuildTree(evevec);
214  dt->PruneTree();
215  }
216 }
217 
218 ////////////////////////////////////////////////////////////////////////////////
219 /// make a forest of decisiontrees
220 
222 {
223  if (fMethodRuleFit==0) {
224  Log() << kFATAL << "RuleFit::BuildTree() - Attempting to build a tree NOT from a MethodRuleFit" << Endl;
225  }
226  Log() << kDEBUG << "Creating a forest with " << fMethodRuleFit->GetNTrees() << " decision trees" << Endl;
227  Log() << kDEBUG << "Each tree is built using a random subsample with " << fNTreeSample << " events" << Endl;
228  //
229  Timer timer( fMethodRuleFit->GetNTrees(), "RuleFit" );
230 
231  // Double_t fsig;
232  Int_t nsig,nbkg;
233  //
234  TRandom3 rndGen;
235  //
236  // First save all event weights.
237  // Weights are modified by the boosting.
238  // Those weights we do not want for the later fitting.
239  //
240  Bool_t useBoost = fMethodRuleFit->UseBoost(); // (AdaBoost (True) or RandomForest/Tree (False)
241 
242  if (useBoost) SaveEventWeights();
243 
244  for (Int_t i=0; i<fMethodRuleFit->GetNTrees(); i++) {
245  // timer.DrawProgressBar(i);
246  if (!useBoost) ReshuffleEvents();
247  nsig=0;
248  nbkg=0;
249  for (UInt_t ie = 0; ie<fNTreeSample; ie++) {
250  if (fMethodBase->DataInfo().IsSignal(fTrainingEventsRndm[ie])) nsig++; // ignore weights here
251  else nbkg++;
252  }
253  // fsig = Double_t(nsig)/Double_t(nsig+nbkg);
254  // do not implement the above in this release...just set it to default
255 
256  DecisionTree *dt=nullptr;
257  Bool_t tryAgain=kTRUE;
258  Int_t ntries=0;
259  const Int_t ntriesMax=10;
260  Double_t frnd = 0.;
261  while (tryAgain) {
262  frnd = 100*rndGen.Uniform( fMethodRuleFit->GetMinFracNEve(), 0.5*fMethodRuleFit->GetMaxFracNEve() );
263  Int_t iclass = 0; // event class being treated as signal during training
264  Bool_t useRandomisedTree = !useBoost;
265  dt = new DecisionTree( fMethodRuleFit->GetSeparationBase(), frnd, fMethodRuleFit->GetNCuts(), &(fMethodRuleFit->DataInfo()), iclass, useRandomisedTree);
266  dt->SetNVars(fMethodBase->GetNvar());
267 
268  BuildTree(dt); // reads fNTreeSample events from fTrainingEventsRndm
269  if (dt->GetNNodes()<3) {
270  delete dt;
271  dt=0;
272  }
273  ntries++;
274  tryAgain = ((dt==0) && (ntries<ntriesMax));
275  }
276  if (dt) {
277  fForest.push_back(dt);
278  if (useBoost) Boost(dt);
279 
280  } else {
281 
282  Log() << kWARNING << "------------------------------------------------------------------" << Endl;
283  Log() << kWARNING << " Failed growing a tree even after " << ntriesMax << " trials" << Endl;
284  Log() << kWARNING << " Possible solutions: " << Endl;
285  Log() << kWARNING << " 1. increase the number of training events" << Endl;
286  Log() << kWARNING << " 2. set a lower min fraction cut (fEventsMin)" << Endl;
287  Log() << kWARNING << " 3. maybe also decrease the max fraction cut (fEventsMax)" << Endl;
288  Log() << kWARNING << " If the above warning occurs rarely only, it can be ignored" << Endl;
289  Log() << kWARNING << "------------------------------------------------------------------" << Endl;
290  }
291 
292  Log() << kDEBUG << "Built tree with minimum cut at N = " << frnd <<"% events"
293  << " => N(nodes) = " << fForest.back()->GetNNodes()
294  << " ; n(tries) = " << ntries
295  << Endl;
296  }
297 
298  // Now restore event weights
299  if (useBoost) RestoreEventWeights();
300 
301  // print statistics on the forest created
303 }
304 
305 ////////////////////////////////////////////////////////////////////////////////
306 /// save event weights - must be done before making the forest
307 
309 {
310  fEventWeights.clear();
311  for (std::vector<const Event*>::iterator e=fTrainingEvents.begin(); e!=fTrainingEvents.end(); e++) {
312  Double_t w = (*e)->GetBoostWeight();
313  fEventWeights.push_back(w);
314  }
315 }
316 
317 ////////////////////////////////////////////////////////////////////////////////
318 /// save event weights - must be done before making the forest
319 
321 {
322  UInt_t ie=0;
323  if (fEventWeights.size() != fTrainingEvents.size()) {
324  Log() << kERROR << "RuleFit::RestoreEventWeights() called without having called SaveEventWeights() before!" << Endl;
325  return;
326  }
327  for (std::vector<const Event*>::iterator e=fTrainingEvents.begin(); e!=fTrainingEvents.end(); e++) {
328  (*e)->SetBoostWeight(fEventWeights[ie]);
329  ie++;
330  }
331 }
332 
333 ////////////////////////////////////////////////////////////////////////////////
334 /// Boost the events. The algorithm below is the called AdaBoost.
335 /// See MethodBDT for details.
336 /// Actually, this is a more or less copy of MethodBDT::AdaBoost().
337 
339 {
340  Double_t sumw=0; // sum of initial weights - all events
341  Double_t sumwfalse=0; // idem, only misclassified events
342  //
343  std::vector<Char_t> correctSelected; // <--- boolean stored
344  //
345  for (std::vector<const Event*>::iterator e=fTrainingEvents.begin(); e!=fTrainingEvents.end(); e++) {
346  Bool_t isSignalType = (dt->CheckEvent(*e,kTRUE) > 0.5 );
347  Double_t w = (*e)->GetWeight();
348  sumw += w;
349  //
350  if (isSignalType == fMethodBase->DataInfo().IsSignal(*e)) { // correctly classified
351  correctSelected.push_back(kTRUE);
352  }
353  else { // misclassified
354  sumwfalse+= w;
355  correctSelected.push_back(kFALSE);
356  }
357  }
358  // misclassification error
359  Double_t err = sumwfalse/sumw;
360  // calculate boost weight for misclassified events
361  // use for now the exponent = 1.0
362  // one could have w = ((1-err)/err)^beta
363  Double_t boostWeight = (err>0 ? (1.0-err)/err : 1000.0);
364  Double_t newSumw=0.0;
365  UInt_t ie=0;
366  // set new weight to misclassified events
367  for (std::vector<const Event*>::iterator e=fTrainingEvents.begin(); e!=fTrainingEvents.end(); e++) {
368  if (!correctSelected[ie])
369  (*e)->SetBoostWeight( (*e)->GetBoostWeight() * boostWeight);
370  newSumw+=(*e)->GetWeight();
371  ie++;
372  }
373  // reweight all events
374  Double_t scale = sumw/newSumw;
375  for (std::vector<const Event*>::iterator e=fTrainingEvents.begin(); e!=fTrainingEvents.end(); e++) {
376  (*e)->SetBoostWeight( (*e)->GetBoostWeight() * scale);
377  }
378  Log() << kDEBUG << "boostWeight = " << boostWeight << " scale = " << scale << Endl;
379 }
380 
381 ////////////////////////////////////////////////////////////////////////////////
382 /// summary of statistics of all trees
383 /// - end-nodes: average and spread
384 
386 {
387  UInt_t ntrees = fForest.size();
388  if (ntrees==0) return;
389  const DecisionTree *tree;
390  Double_t sumn2 = 0;
391  Double_t sumn = 0;
392  Double_t nd;
393  for (UInt_t i=0; i<ntrees; i++) {
394  tree = fForest[i];
395  nd = Double_t(tree->GetNNodes());
396  sumn += nd;
397  sumn2 += nd*nd;
398  }
399  Double_t sig = TMath::Sqrt( gTools().ComputeVariance( sumn2, sumn, ntrees ));
400  Log() << kVERBOSE << "Nodes in trees: average & std dev = " << sumn/ntrees << " , " << sig << Endl;
401 }
402 
403 ////////////////////////////////////////////////////////////////////////////////
404 ///
405 /// Fit the coefficients for the rule ensemble
406 ///
407 
409 {
410  Log() << kVERBOSE << "Fitting rule/linear terms" << Endl;
412 }
413 
414 ////////////////////////////////////////////////////////////////////////////////
415 /// calculates the importance of each rule
416 
418 {
419  Log() << kVERBOSE << "Calculating importance" << Endl;
424  Log() << kVERBOSE << "Filling rule statistics" << Endl;
426 }
427 
428 ////////////////////////////////////////////////////////////////////////////////
429 /// evaluate single event
430 
432 {
433  return fRuleEnsemble.EvalEvent( e );
434 }
435 
436 ////////////////////////////////////////////////////////////////////////////////
437 /// set the training events randomly
438 
439 void TMVA::RuleFit::SetTrainingEvents( const std::vector<const Event *>& el )
440 {
441  if (fMethodRuleFit==0) Log() << kFATAL << "RuleFit::SetTrainingEvents - MethodRuleFit not initialized" << Endl;
442  UInt_t neve = el.size();
443  if (neve==0) Log() << kWARNING << "An empty sample of training events was given" << Endl;
444 
445  // copy vector
446  fTrainingEvents.clear();
447  fTrainingEventsRndm.clear();
448  for (UInt_t i=0; i<neve; i++) {
449  fTrainingEvents.push_back(static_cast< const Event *>(el[i]));
450  fTrainingEventsRndm.push_back(static_cast< const Event *>(el[i]));
451  }
452 
453  // Re-shuffle the vector, ie, recreate it in a random order
454  std::random_shuffle( fTrainingEventsRndm.begin(), fTrainingEventsRndm.end() );
455 
456  // fraction events per tree
457  fNTreeSample = static_cast<UInt_t>(neve*fMethodRuleFit->GetTreeEveFrac());
458  Log() << kDEBUG << "Number of events per tree : " << fNTreeSample
459  << " ( N(events) = " << neve << " )"
460  << " randomly drawn without replacement" << Endl;
461 }
462 
463 ////////////////////////////////////////////////////////////////////////////////
464 /// draw a random subsample of the training events without replacement
465 
466 void TMVA::RuleFit::GetRndmSampleEvents(std::vector< const Event * > & evevec, UInt_t nevents)
467 {
468  ReshuffleEvents();
469  if ((nevents<fTrainingEventsRndm.size()) && (nevents>0)) {
470  evevec.resize(nevents);
471  for (UInt_t ie=0; ie<nevents; ie++) {
472  evevec[ie] = fTrainingEventsRndm[ie];
473  }
474  }
475  else {
476  Log() << kWARNING << "GetRndmSampleEvents() : requested sub sample size larger than total size (BUG!).";
477  }
478 }
479 ////////////////////////////////////////////////////////////////////////////////
480 /// normalize rule importance hists
481 ///
482 /// if all weights are positive, the scale will be 1/maxweight
483 /// if minimum weight < 0, then the scale will be 1/max(maxweight,abs(minweight))
484 
485 void TMVA::RuleFit::NormVisHists(std::vector<TH2F *> & hlist)
486 {
487  if (hlist.empty()) return;
488  //
489  Double_t wmin=0;
490  Double_t wmax=0;
491  Double_t w,wm;
492  Double_t awmin;
493  Double_t scale;
494  for (UInt_t i=0; i<hlist.size(); i++) {
495  TH2F *hs = hlist[i];
496  w = hs->GetMaximum();
497  wm = hs->GetMinimum();
498  if (i==0) {
499  wmin=wm;
500  wmax=w;
501  }
502  else {
503  if (w>wmax) wmax=w;
504  if (wm<wmin) wmin=wm;
505  }
506  }
507  awmin = TMath::Abs(wmin);
508  Double_t usemin,usemax;
509  if (awmin>wmax) {
510  scale = 1.0/awmin;
511  usemin = -1.0;
512  usemax = scale*wmax;
513  }
514  else {
515  scale = 1.0/wmax;
516  usemin = scale*wmin;
517  usemax = 1.0;
518  }
519 
520  //
521  for (UInt_t i=0; i<hlist.size(); i++) {
522  TH2F *hs = hlist[i];
523  hs->Scale(scale);
524  hs->SetMinimum(usemin);
525  hs->SetMaximum(usemax);
526  }
527 }
528 
529 ////////////////////////////////////////////////////////////////////////////////
530 /// Fill cut
531 
532 void TMVA::RuleFit::FillCut(TH2F* h2, const Rule *rule, Int_t vind)
533 {
534  if (rule==0) return;
535  if (h2==0) return;
536  //
537  Double_t rmin, rmax;
538  Bool_t dormin,dormax;
539  Bool_t ruleHasVar = rule->GetRuleCut()->GetCutRange(vind,rmin,rmax,dormin,dormax);
540  if (!ruleHasVar) return;
541  //
542  Int_t firstbin = h2->GetBin(1,1,1);
543  if(firstbin<0) firstbin=0;
544  Int_t lastbin = h2->GetBin(h2->GetNbinsX(),1,1);
545  Int_t binmin=(dormin ? h2->FindBin(rmin,0.5):firstbin);
546  Int_t binmax=(dormax ? h2->FindBin(rmax,0.5):lastbin);
547  Int_t fbin;
548  Double_t xbinw = h2->GetXaxis()->GetBinWidth(firstbin);
549  Double_t fbmin = h2->GetXaxis()->GetBinLowEdge(binmin-firstbin+1);
550  Double_t lbmax = h2->GetXaxis()->GetBinLowEdge(binmax-firstbin+1)+xbinw;
551  Double_t fbfrac = (dormin ? ((fbmin+xbinw-rmin)/xbinw):1.0);
552  Double_t lbfrac = (dormax ? ((rmax-lbmax+xbinw)/xbinw):1.0);
553  Double_t f;
554  Double_t xc;
555  Double_t val;
556 
557  for (Int_t bin = binmin; bin<binmax+1; bin++) {
558  fbin = bin-firstbin+1;
559  if (bin==binmin) {
560  f = fbfrac;
561  }
562  else if (bin==binmax) {
563  f = lbfrac;
564  }
565  else {
566  f = 1.0;
567  }
568  xc = h2->GetXaxis()->GetBinCenter(fbin);
569  //
570  if (fVisHistsUseImp) {
571  val = rule->GetImportance();
572  }
573  else {
574  val = rule->GetCoefficient()*rule->GetSupport();
575  }
576  h2->Fill(xc,0.5,val*f);
577  }
578 }
579 
580 ////////////////////////////////////////////////////////////////////////////////
581 /// fill lin
582 
584 {
585  if (h2==0) return;
586  if (!fRuleEnsemble.DoLinear()) return;
587  //
588  Int_t firstbin = 1;
589  Int_t lastbin = h2->GetNbinsX();
590  Double_t xc;
591  Double_t val;
592  if (fVisHistsUseImp) {
593  val = fRuleEnsemble.GetLinImportance(vind);
594  }
595  else {
596  val = fRuleEnsemble.GetLinCoefficients(vind);
597  }
598  for (Int_t bin = firstbin; bin<lastbin+1; bin++) {
599  xc = h2->GetXaxis()->GetBinCenter(bin);
600  h2->Fill(xc,0.5,val);
601  }
602 }
603 
604 ////////////////////////////////////////////////////////////////////////////////
605 /// fill rule correlation between vx and vy, weighted with either the importance or the coefficient
606 
607 void TMVA::RuleFit::FillCorr(TH2F* h2,const Rule *rule,Int_t vx, Int_t vy)
608 {
609  if (rule==0) return;
610  if (h2==0) return;
611  Double_t val;
612  if (fVisHistsUseImp) {
613  val = rule->GetImportance();
614  }
615  else {
616  val = rule->GetCoefficient()*rule->GetSupport();
617  }
618  //
619  Double_t rxmin, rxmax, rymin, rymax;
620  Bool_t dorxmin, dorxmax, dorymin, dorymax;
621  //
622  // Get range in rule for X and Y
623  //
624  Bool_t ruleHasVarX = rule->GetRuleCut()->GetCutRange(vx,rxmin,rxmax,dorxmin,dorxmax);
625  Bool_t ruleHasVarY = rule->GetRuleCut()->GetCutRange(vy,rymin,rymax,dorymin,dorymax);
626  if (!(ruleHasVarX || ruleHasVarY)) return;
627  // min max of varX and varY in hist
628  Double_t vxmin = (dorxmin ? rxmin:h2->GetXaxis()->GetXmin());
629  Double_t vxmax = (dorxmax ? rxmax:h2->GetXaxis()->GetXmax());
630  Double_t vymin = (dorymin ? rymin:h2->GetYaxis()->GetXmin());
631  Double_t vymax = (dorymax ? rymax:h2->GetYaxis()->GetXmax());
632  // min max bin in X and Y
633  Int_t binxmin = h2->GetXaxis()->FindBin(vxmin);
634  Int_t binxmax = h2->GetXaxis()->FindBin(vxmax);
635  Int_t binymin = h2->GetYaxis()->FindBin(vymin);
636  Int_t binymax = h2->GetYaxis()->FindBin(vymax);
637  // bin widths
638  Double_t xbinw = h2->GetXaxis()->GetBinWidth(binxmin);
639  Double_t ybinw = h2->GetYaxis()->GetBinWidth(binxmin);
640  Double_t xbinmin = h2->GetXaxis()->GetBinLowEdge(binxmin);
641  Double_t xbinmax = h2->GetXaxis()->GetBinLowEdge(binxmax)+xbinw;
642  Double_t ybinmin = h2->GetYaxis()->GetBinLowEdge(binymin);
643  Double_t ybinmax = h2->GetYaxis()->GetBinLowEdge(binymax)+ybinw;
644  // fraction of edges
645  Double_t fxbinmin = (dorxmin ? ((xbinmin+xbinw-vxmin)/xbinw):1.0);
646  Double_t fxbinmax = (dorxmax ? ((vxmax-xbinmax+xbinw)/xbinw):1.0);
647  Double_t fybinmin = (dorymin ? ((ybinmin+ybinw-vymin)/ybinw):1.0);
648  Double_t fybinmax = (dorymax ? ((vymax-ybinmax+ybinw)/ybinw):1.0);
649  //
650  Double_t fx,fy;
651  Double_t xc,yc;
652  // fill histo
653  for (Int_t binx = binxmin; binx<binxmax+1; binx++) {
654  if (binx==binxmin) {
655  fx = fxbinmin;
656  }
657  else if (binx==binxmax) {
658  fx = fxbinmax;
659  }
660  else {
661  fx = 1.0;
662  }
663  xc = h2->GetXaxis()->GetBinCenter(binx);
664  for (Int_t biny = binymin; biny<binymax+1; biny++) {
665  if (biny==binymin) {
666  fy = fybinmin;
667  }
668  else if (biny==binymax) {
669  fy = fybinmax;
670  }
671  else {
672  fy = 1.0;
673  }
674  yc = h2->GetYaxis()->GetBinCenter(biny);
675  h2->Fill(xc,yc,val*fx*fy);
676  }
677  }
678 }
679 
680 ////////////////////////////////////////////////////////////////////////////////
681 /// help routine to MakeVisHists() - fills for all variables
682 
683 void TMVA::RuleFit::FillVisHistCut(const Rule* rule, std::vector<TH2F *> & hlist)
684 {
685  Int_t nhists = hlist.size();
686  Int_t nvar = fMethodBase->GetNvar();
687  if (nhists!=nvar) Log() << kFATAL << "BUG TRAP: number of hists is not equal the number of variables!" << Endl;
688  //
689  std::vector<Int_t> vindex;
690  TString hstr;
691  // not a nice way to do a check...
692  for (Int_t ih=0; ih<nhists; ih++) {
693  hstr = hlist[ih]->GetTitle();
694  for (Int_t iv=0; iv<nvar; iv++) {
695  if (fMethodBase->GetInputTitle(iv) == hstr)
696  vindex.push_back(iv);
697  }
698  }
699  //
700  for (Int_t iv=0; iv<nvar; iv++) {
701  if (rule) {
702  if (rule->ContainsVariable(vindex[iv])) {
703  FillCut(hlist[iv],rule,vindex[iv]);
704  }
705  }
706  else {
707  FillLin(hlist[iv],vindex[iv]);
708  }
709  }
710 }
711 ////////////////////////////////////////////////////////////////////////////////
712 /// help routine to MakeVisHists() - fills for all correlation plots
713 
714 void TMVA::RuleFit::FillVisHistCorr(const Rule * rule, std::vector<TH2F *> & hlist)
715 {
716  if (rule==0) return;
717  Double_t ruleimp = rule->GetImportance();
718  if (!(ruleimp>0)) return;
719  if (ruleimp<fRuleEnsemble.GetImportanceCut()) return;
720  //
721  Int_t nhists = hlist.size();
722  Int_t nvar = fMethodBase->GetNvar();
723  Int_t ncorr = (nvar*(nvar+1)/2)-nvar;
724  if (nhists!=ncorr) Log() << kERROR << "BUG TRAP: number of corr hists is not correct! ncorr = "
725  << ncorr << " nvar = " << nvar << " nhists = " << nhists << Endl;
726  //
727  std::vector< std::pair<Int_t,Int_t> > vindex;
728  TString hstr, var1, var2;
729  Int_t iv1=0,iv2=0;
730  // not a nice way to do a check...
731  for (Int_t ih=0; ih<nhists; ih++) {
732  hstr = hlist[ih]->GetName();
733  if (GetCorrVars( hstr, var1, var2 )) {
734  iv1 = fMethodBase->DataInfo().FindVarIndex( var1 );
735  iv2 = fMethodBase->DataInfo().FindVarIndex( var2 );
736  vindex.push_back( std::pair<Int_t,Int_t>(iv2,iv1) ); // pair X, Y
737  }
738  else {
739  Log() << kERROR << "BUG TRAP: should not be here - failed getting var1 and var2" << Endl;
740  }
741  }
742  //
743  for (Int_t ih=0; ih<nhists; ih++) {
744  if ( (rule->ContainsVariable(vindex[ih].first)) ||
745  (rule->ContainsVariable(vindex[ih].second)) ) {
746  FillCorr(hlist[ih],rule,vindex[ih].first,vindex[ih].second);
747  }
748  }
749 }
750 ////////////////////////////////////////////////////////////////////////////////
751 /// get first and second variables from title
752 
754 {
755  var1="";
756  var2="";
757  if(!title.BeginsWith("scat_")) return kFALSE;
758 
759  TString titleCopy = title(5,title.Length());
760  if(titleCopy.Index("_RF2D")>=0) titleCopy.Remove(titleCopy.Index("_RF2D"));
761 
762  Int_t splitPos = titleCopy.Index("_vs_");
763  if(splitPos>=0) { // there is a _vs_ in the string
764  var1 = titleCopy(0,splitPos);
765  var2 = titleCopy(splitPos+4, titleCopy.Length());
766  return kTRUE;
767  }
768  else {
769  var1 = titleCopy;
770  return kFALSE;
771  }
772 }
773 ////////////////////////////////////////////////////////////////////////////////
774 /// this will create histograms visualizing the rule ensemble
775 
777 {
778  const TString directories[5] = { "InputVariables_Id",
779  "InputVariables_Deco",
780  "InputVariables_PCA",
781  "InputVariables_Gauss",
782  "InputVariables_Gauss_Deco" };
783 
784  const TString corrDirName = "CorrelationPlots";
785 
786  TDirectory* rootDir = fMethodBase->GetFile();
787  TDirectory* varDir = 0;
788  TDirectory* corrDir = 0;
789 
790  TDirectory* methodDir = fMethodBase->BaseDir();
791  TString varDirName;
792  //
793  Bool_t done=(rootDir==0);
794  Int_t type=0;
795  if (done) {
796  Log() << kWARNING << "No basedir - BUG??" << Endl;
797  return;
798  }
799  while (!done) {
800  varDir = (TDirectory*)rootDir->Get( directories[type] );
801  type++;
802  done = ((varDir!=0) || (type>4));
803  }
804  if (varDir==0) {
805  Log() << kWARNING << "No input variable directory found - BUG?" << Endl;
806  return;
807  }
808  corrDir = (TDirectory*)varDir->Get( corrDirName );
809  if (corrDir==0) {
810  Log() << kWARNING << "No correlation directory found" << Endl;
811  Log() << kWARNING << "Check for other warnings related to correlation histograms" << Endl;
812  return;
813  }
814  if (methodDir==0) {
815  Log() << kWARNING << "No rulefit method directory found - BUG?" << Endl;
816  return;
817  }
818 
819  varDirName = varDir->GetName();
820  varDir->cd();
821  //
822  // get correlation plot directory
823  corrDir = (TDirectory *)varDir->Get(corrDirName);
824  if (corrDir==0) {
825  Log() << kWARNING << "No correlation directory found : " << corrDirName << Endl;
826  return;
827  }
828 
829  // how many plots are in the var directory?
830  Int_t noPlots = ((varDir->GetListOfKeys())->GetEntries()) / 2;
831  Log() << kDEBUG << "Got number of plots = " << noPlots << Endl;
832 
833  // loop over all objects in directory
834  std::vector<TH2F *> h1Vector;
835  std::vector<TH2F *> h2CorrVector;
836  TIter next(varDir->GetListOfKeys());
837  TKey *key;
838  while ((key = (TKey*)next())) {
839  // make sure, that we only look at histograms
840  TClass *cl = gROOT->GetClass(key->GetClassName());
841  if (!cl->InheritsFrom(TH1F::Class())) continue;
842  TH1F *sig = (TH1F*)key->ReadObj();
843  TString hname= sig->GetName();
844  Log() << kDEBUG << "Got histogram : " << hname << Endl;
845 
846  // check for all signal histograms
847  if (hname.Contains("__S")){ // found a new signal plot
848  TString htitle = sig->GetTitle();
849  htitle.ReplaceAll("signal","");
850  TString newname = hname;
851  newname.ReplaceAll("__Signal","__RF");
852  newname.ReplaceAll("__S","__RF");
853 
854  methodDir->cd();
855  TH2F *newhist = new TH2F(newname,htitle,sig->GetNbinsX(),sig->GetXaxis()->GetXmin(),sig->GetXaxis()->GetXmax(),
856  1,sig->GetYaxis()->GetXmin(),sig->GetYaxis()->GetXmax());
857  varDir->cd();
858  h1Vector.push_back( newhist );
859  }
860  }
861  //
862  corrDir->cd();
863  TString var1,var2;
864  TIter nextCorr(corrDir->GetListOfKeys());
865  while ((key = (TKey*)nextCorr())) {
866  // make sure, that we only look at histograms
867  TClass *cl = gROOT->GetClass(key->GetClassName());
868  if (!cl->InheritsFrom(TH2F::Class())) continue;
869  TH2F *sig = (TH2F*)key->ReadObj();
870  TString hname= sig->GetName();
871 
872  // check for all signal histograms
873  if ((hname.Contains("scat_")) && (hname.Contains("_Signal"))) {
874  Log() << kDEBUG << "Got histogram (2D) : " << hname << Endl;
875  TString htitle = sig->GetTitle();
876  htitle.ReplaceAll("(Signal)","");
877  TString newname = hname;
878  newname.ReplaceAll("_Signal","_RF2D");
879 
880  methodDir->cd();
881  const Int_t rebin=2;
882  TH2F *newhist = new TH2F(newname,htitle,
883  sig->GetNbinsX()/rebin,sig->GetXaxis()->GetXmin(),sig->GetXaxis()->GetXmax(),
884  sig->GetNbinsY()/rebin,sig->GetYaxis()->GetXmin(),sig->GetYaxis()->GetXmax());
885  if (GetCorrVars( newname, var1, var2 )) {
886  Int_t iv1 = fMethodBase->DataInfo().FindVarIndex(var1);
887  Int_t iv2 = fMethodBase->DataInfo().FindVarIndex(var2);
888  if (iv1<0) {
889  sig->GetYaxis()->SetTitle(var1);
890  }
891  else {
893  }
894  if (iv2<0) {
895  sig->GetXaxis()->SetTitle(var2);
896  }
897  else {
899  }
900  }
901  corrDir->cd();
902  h2CorrVector.push_back( newhist );
903  }
904  }
905 
906  varDir->cd();
907  // fill rules
908  UInt_t nrules = fRuleEnsemble.GetNRules();
909  const Rule *rule;
910  for (UInt_t i=0; i<nrules; i++) {
911  rule = fRuleEnsemble.GetRulesConst(i);
912  FillVisHistCut(rule, h1Vector);
913  }
914  // fill linear terms and normalise hists
915  FillVisHistCut(0, h1Vector);
916  NormVisHists(h1Vector);
917 
918  //
919  corrDir->cd();
920  // fill rules
921  for (UInt_t i=0; i<nrules; i++) {
922  rule = fRuleEnsemble.GetRulesConst(i);
923  FillVisHistCorr(rule, h2CorrVector);
924  }
925  NormVisHists(h2CorrVector);
926 
927  // write histograms to file
928  methodDir->cd();
929  for (UInt_t i=0; i<h1Vector.size(); i++) h1Vector[i]->Write();
930  for (UInt_t i=0; i<h2CorrVector.size(); i++) h2CorrVector[i]->Write();
931 }
932 
933 ////////////////////////////////////////////////////////////////////////////////
934 /// this will create a histograms intended rather for debugging or for the curious user
935 
937 {
938  TDirectory* methodDir = fMethodBase->BaseDir();
939  if (methodDir==0) {
940  Log() << kWARNING << "<MakeDebugHists> No rulefit method directory found - bug?" << Endl;
941  return;
942  }
943  //
944  methodDir->cd();
945  std::vector<Double_t> distances;
946  std::vector<Double_t> fncuts;
947  std::vector<Double_t> fnvars;
948  const Rule *ruleA;
949  const Rule *ruleB;
950  Double_t dABmin=1000000.0;
951  Double_t dABmax=-1.0;
952  UInt_t nrules = fRuleEnsemble.GetNRules();
953  for (UInt_t i=0; i<nrules; i++) {
954  ruleA = fRuleEnsemble.GetRulesConst(i);
955  for (UInt_t j=i+1; j<nrules; j++) {
956  ruleB = fRuleEnsemble.GetRulesConst(j);
957  Double_t dAB = ruleA->RuleDist( *ruleB, kTRUE );
958  if (dAB>-0.5) {
959  UInt_t nc = ruleA->GetNcuts();
960  UInt_t nv = ruleA->GetNumVarsUsed();
961  distances.push_back(dAB);
962  fncuts.push_back(static_cast<Double_t>(nc));
963  fnvars.push_back(static_cast<Double_t>(nv));
964  if (dAB<dABmin) dABmin=dAB;
965  if (dAB>dABmax) dABmax=dAB;
966  }
967  }
968  }
969  //
970  TH1F *histDist = new TH1F("RuleDist","Rule distances",100,dABmin,dABmax);
971  TTree *distNtuple = new TTree("RuleDistNtuple","RuleDist ntuple");
972  Double_t ntDist;
973  Double_t ntNcuts;
974  Double_t ntNvars;
975  distNtuple->Branch("dist", &ntDist, "dist/D");
976  distNtuple->Branch("ncuts",&ntNcuts, "ncuts/D");
977  distNtuple->Branch("nvars",&ntNvars, "nvars/D");
978  //
979  for (UInt_t i=0; i<distances.size(); i++) {
980  histDist->Fill(distances[i]);
981  ntDist = distances[i];
982  ntNcuts = fncuts[i];
983  ntNvars = fnvars[i];
984  distNtuple->Fill();
985  }
986  distNtuple->Write();
987 }
std::vector< const TMVA::Event * > fTrainingEventsRndm
Definition: RuleFit.h:156
void ForestStatistics()
summary of statistics of all trees
Definition: RuleFit.cxx:385
virtual const char * GetName() const
Returns name of object.
Definition: TNamed.h:47
void SetPruneMethod(EPruneMethod m=kCostComplexityPruning)
Definition: DecisionTree.h:134
void MakeForest()
make a forest of decisiontrees
Definition: RuleFit.cxx:221
virtual Int_t FindBin(Double_t x, Double_t y=0, Double_t z=0)
Return Global bin number corresponding to x,y,z.
Definition: TH1.cxx:3441
virtual void Scale(Double_t c1=1, Option_t *option="")
Multiply this histogram by a constant c1.
Definition: TH1.cxx:5937
virtual Int_t Fill(Double_t x)
Increment bin with abscissa X by 1.
Definition: TH1.cxx:3126
UInt_t GetNumVarsUsed() const
Definition: Rule.h:128
virtual Double_t GetMaximum(Double_t maxval=FLT_MAX) const
Return maximum value smaller than maxval of bins in the range, unless the value has been overridden b...
Definition: TH1.cxx:7666
J Friedman&#39;s RuleFit method.
Definition: MethodRuleFit.h:47
Double_t GetTreeEveFrac() const
Definition: MethodRuleFit.h:93
Random number generator class based on M.
Definition: TRandom3.h:27
virtual TList * GetListOfKeys() const
Definition: TDirectory.h:148
MsgLogger & Endl(MsgLogger &ml)
Definition: MsgLogger.h:158
const RuleEnsemble & GetRuleEnsemble() const
Definition: RuleFit.h:140
long long Long64_t
Definition: RtypesCore.h:69
virtual void SetMaximum(Double_t maximum=-1111)
Definition: TH1.h:375
Bool_t GetCutRange(Int_t sel, Double_t &rmin, Double_t &rmax, Bool_t &dormin, Bool_t &dormax) const
get cut range for a given selector
Definition: RuleCut.cxx:176
void CalcImportance()
calculates the importance of each rule
Definition: RuleFit.cxx:417
virtual TObject * Get(const char *namecycle)
Return pointer to object identified by namecycle.
Definition: TDirectory.cxx:729
const std::vector< const TMVA::Event *> & GetTrainingEvents() const
Definition: RuleFit.h:133
A class implementing various fits of rule ensembles.
Definition: RuleFit.h:44
virtual Double_t GetBinLowEdge(Int_t bin) const
Return low edge of bin.
Definition: TAxis.cxx:504
Double_t CheckEvent(const TMVA::Event *, Bool_t UseYesNoLeaf=kFALSE) const
the event e is put into the decision tree (starting at the root node) and the output is NodeType (sig...
Bool_t ContainsVariable(UInt_t iv) const
check if variable in node
Definition: Rule.cxx:137
TString & ReplaceAll(const TString &s1, const TString &s2)
Definition: TString.h:640
UInt_t GetNvar() const
Definition: MethodBase.h:328
MsgLogger & Log() const
Definition: RuleFit.h:169
UInt_t GetNNodes() const
Definition: BinaryTree.h:86
virtual Int_t Fill()
Fill all branches.
Definition: TTree.cxx:4383
void NormVisHists(std::vector< TH2F *> &hlist)
normalize rule importance hists
Definition: RuleFit.cxx:485
THist< 1, float, THistStatContent, THistStatUncertainty > TH1F
Definition: THist.hxx:311
void SetMsgType(EMsgType t)
set the current message type to that of mlog for this class and all other subtools ...
Definition: RuleFit.cxx:190
const MethodBase * fMethodBase
Definition: RuleFit.h:165
const std::vector< TMVA::Rule * > & GetRulesConst() const
Definition: RuleEnsemble.h:267
Virtual base Class for all MVA method.
Definition: MethodBase.h:106
Bool_t GetCorrVars(TString &title, TString &var1, TString &var2)
get first and second variables from title
Definition: RuleFit.cxx:753
void InitNEveEff()
init effective number of events (using event weights)
Definition: RuleFit.cxx:97
virtual void SetMinimum(Double_t minimum=-1111)
Definition: TH1.h:376
#define gROOT
Definition: TROOT.h:375
Ssiz_t Index(const char *pat, Ssiz_t i=0, ECaseCompare cmp=kExact) const
Definition: TString.h:587
void FitCoefficients()
Fit the coefficients for the rule ensemble.
Definition: RuleFit.cxx:408
Basic string class.
Definition: TString.h:129
const RuleCut * GetRuleCut() const
Definition: Rule.h:137
tomato 1-D histogram with a float per channel (see TH1 documentation)}
Definition: TH1.h:551
int Int_t
Definition: RtypesCore.h:41
bool Bool_t
Definition: RtypesCore.h:59
std::vector< Double_t > fEventWeights
Definition: RuleFit.h:157
const char * GetInputTitle(Int_t i) const
Definition: MethodBase.h:335
Double_t fNEveEffTrain
Definition: RuleFit.h:160
void CleanupLinear()
cleanup linear model
void SetTrainingEvents(const std::vector< const TMVA::Event *> &el)
set the training events randomly
Definition: RuleFit.cxx:439
Short_t Abs(Short_t d)
Definition: TMathBase.h:108
const std::vector< Double_t > & GetLinCoefficients() const
Definition: RuleEnsemble.h:269
Implementation of a rule.
Definition: Rule.h:48
const std::vector< const TMVA::DecisionTree * > & GetForest() const
Definition: RuleFit.h:139
void SetMsgType(EMsgType t)
void RuleResponseStats()
calculate various statistics for this rule
RuleFit(void)
default constructor
Definition: RuleFit.cxx:75
Double_t GetXmin() const
Definition: TAxis.h:133
TStopwatch timer
Definition: pirndm.C:37
void BuildTree(TMVA::DecisionTree *dt)
build the decision tree using fNTreeSample events from fTrainingEventsRndm
Definition: RuleFit.cxx:200
void Class()
Definition: Class.C:29
TMVA::DecisionTree::EPruneMethod GetPruneMethod() const
Definition: MethodRuleFit.h:96
const std::vector< Double_t > & GetLinImportance() const
Definition: RuleEnsemble.h:271
UInt_t GetNRules() const
Definition: RuleEnsemble.h:266
const Event * GetEvent() const
Definition: MethodBase.h:733
DataSet * Data() const
Definition: MethodBase.h:393
void ReshuffleEvents()
Definition: RuleFit.h:64
virtual Double_t GetBinCenter(Int_t bin) const
Return center of bin.
Definition: TAxis.cxx:464
void GetRndmSampleEvents(std::vector< const TMVA::Event * > &evevec, UInt_t nevents)
draw a random subsample of the training events without replacement
Definition: RuleFit.cxx:466
DataSetInfo & DataInfo() const
Definition: MethodBase.h:394
void SetMinType(EMsgType minType)
Definition: MsgLogger.h:72
SeparationBase * GetSeparationBase() const
Definition: MethodRuleFit.h:95
Book space in a file, create I/O buffers, to fill them, (un)compress them.
Definition: TKey.h:24
TFile * GetFile() const
Definition: MethodBase.h:354
virtual ~RuleFit(void)
destructor
Definition: RuleFit.cxx:89
Long64_t GetNTrainingEvents() const
Definition: DataSet.h:79
void CalcImportance()
calculate the importance of each rule
void SetMsgType(EMsgType t)
void SetMethodBase(const MethodBase *rfbase)
set MethodBase
Definition: RuleFit.cxx:150
void CleanupRules()
cleanup rules
UInt_t fNTreeSample
Definition: RuleFit.h:158
Double_t GetPruneStrength() const
Definition: MethodRuleFit.h:97
Double_t GetImportanceCut() const
Definition: RuleEnsemble.h:263
void SetNVars(Int_t n)
Definition: DecisionTree.h:188
virtual Int_t Write(const char *name=0, Int_t option=0, Int_t bufsize=0)
Write this object to the current directory.
Definition: TTree.cxx:9163
void RestoreEventWeights()
save event weights - must be done before making the forest
Definition: RuleFit.cxx:320
Double_t RuleDist(const Rule &other, Bool_t useCutValue) const
Returns:
Definition: Rule.cxx:190
const MethodBase * GetMethodBase() const
Definition: RuleFit.h:145
void CalcVarImportance()
Calculates variable importance using eq (35) in RuleFit paper by Friedman et.al.
void Copy(const RuleFit &other)
copy method
Definition: RuleFit.cxx:159
void Initialize(Bool_t useTMVAStyle=kTRUE)
Definition: tmvaglob.cxx:176
void FillVisHistCut(const Rule *rule, std::vector< TH2F *> &hlist)
help routine to MakeVisHists() - fills for all variables
Definition: RuleFit.cxx:683
Bool_t DoLinear() const
Definition: RuleEnsemble.h:257
void FillCorr(TH2F *h2, const TMVA::Rule *rule, Int_t v1, Int_t v2)
fill rule correlation between vx and vy, weighted with either the importance or the coefficient ...
Definition: RuleFit.cxx:607
Double_t GetCoefficient() const
Definition: Rule.h:139
void MakeDebugHists()
this will create a histograms intended rather for debugging or for the curious user ...
Definition: RuleFit.cxx:936
static const Int_t randSEED
Definition: RuleFit.h:171
tomato 2-D histogram with a float per channel (see TH1 documentation)}
Definition: TH2.h:249
Bool_t BeginsWith(const char *s, ECaseCompare cmp=kExact) const
Definition: TString.h:563
void SetPruneStrength(Double_t p)
Definition: DecisionTree.h:140
Implementation of a Decision Tree.
Definition: DecisionTree.h:59
unsigned int UInt_t
Definition: RtypesCore.h:42
Ssiz_t Length() const
Definition: TString.h:388
void FillLin(TH2F *h2, Int_t vind)
fill lin
Definition: RuleFit.cxx:583
The ROOT global object gROOT contains a list of all defined classes.
Definition: TClass.h:71
TAxis * GetYaxis()
Definition: TH1.h:301
RuleEnsemble fRuleEnsemble
Definition: RuleFit.h:162
Tools & gTools()
Bool_t InheritsFrom(const char *cl) const
Return kTRUE if this class inherits from a class with name "classname".
Definition: TClass.cxx:4602
void Boost(TMVA::DecisionTree *dt)
Boost the events.
Definition: RuleFit.cxx:338
Int_t FindVarIndex(const TString &) const
find variable by name
virtual Double_t GetMinimum(Double_t minval=-FLT_MAX) const
Return minimum value larger than minval of bins in the range, unless the value has been overridden by...
Definition: TH1.cxx:7751
UInt_t GetNcuts() const
Definition: Rule.h:131
const Bool_t kFALSE
Definition: RtypesCore.h:92
virtual Int_t FindBin(Double_t x)
Find bin number corresponding to abscissa x.
Definition: TAxis.cxx:279
TString & Remove(Ssiz_t pos)
Definition: TString.h:621
Int_t GetNCuts() const
void SaveEventWeights()
save event weights - must be done before making the forest
Definition: RuleFit.cxx:308
#define ClassImp(name)
Definition: Rtypes.h:336
void Initialize(const RuleFit *rf)
Initializes all member variables with default values.
double f(double x)
double Double_t
Definition: RtypesCore.h:55
const MethodRuleFit * fMethodRuleFit
Definition: RuleFit.h:164
Describe directory structure in memory.
Definition: TDirectory.h:34
int type
Definition: TGX11.cxx:120
void MakeVisHists()
this will create histograms visualizing the rule ensemble
Definition: RuleFit.cxx:776
void SetCurrentType(Types::ETreeType type) const
Definition: DataSet.h:100
Bool_t Contains(const char *pat, ECaseCompare cmp=kExact) const
Definition: TString.h:572
you should not use this method at all Int_t Int_t Double_t Double_t Double_t e
Definition: TRolke.cxx:630
void FillVisHistCorr(const Rule *rule, std::vector< TH2F *> &hlist)
help routine to MakeVisHists() - fills for all correlation plots
Definition: RuleFit.cxx:714
virtual Double_t Uniform(Double_t x1=1)
Returns a uniform deviate on the interval (0, x1).
Definition: TRandom.cxx:606
void InitPtrs(const TMVA::MethodBase *rfbase)
initialize pointers
Definition: RuleFit.cxx:109
Int_t GetNTrees() const
Definition: MethodRuleFit.h:92
Bool_t UseBoost() const
Definition: MethodRuleFit.h:84
ostringstream derivative to redirect and format output
Definition: MsgLogger.h:59
void MakeModel()
create model
virtual Int_t Branch(TCollection *list, Int_t bufsize=32000, Int_t splitlevel=99, const char *name="")
Create one branch for each element in the collection.
Definition: TTree.cxx:1660
Double_t PruneTree(const EventConstList *validationSample=NULL)
prune (get rid of internal nodes) the Decision tree to avoid overtraining several different pruning m...
Abstract ClassifierFactory template that handles arbitrary types.
void FillCut(TH2F *h2, const TMVA::Rule *rule, Int_t vind)
Fill cut.
Definition: RuleFit.cxx:532
virtual Double_t GetBinWidth(Int_t bin) const
Return bin width.
Definition: TAxis.cxx:526
virtual Bool_t cd(const char *path=0)
Change current directory to "this" directory.
Definition: TDirectory.cxx:435
Double_t GetImportance() const
Definition: Rule.h:143
TDirectory * BaseDir() const
returns the ROOT directory where info/histograms etc of the corresponding MVA method instance are sto...
void Init()
Initializes all parameters using the RuleEnsemble and the training tree.
UInt_t BuildTree(const EventConstList &eventSample, DecisionTreeNode *node=NULL)
building the decision tree by recursively calling the splitting of one (root-) node into two daughter...
Bool_t fVisHistsUseImp
Definition: RuleFit.h:166
RuleFitParams fRuleFitParams
Definition: RuleFit.h:163
Double_t EvalEvent(const Event &e)
evaluate single event
Definition: RuleFit.cxx:431
Bool_t IsSignal(const Event *ev) const
std::vector< const TMVA::DecisionTree * > fForest
Definition: RuleFit.h:161
A TTree object has a header with a name and a title.
Definition: TTree.h:78
Double_t EvalEvent() const
Definition: RuleEnsemble.h:416
void Initialize(const TMVA::MethodBase *rfbase)
initialize the parameters of the RuleFit method and make rules
Definition: RuleFit.cxx:119
Definition: first.py:1
Double_t CalcWeightSum(const std::vector< const TMVA::Event *> *events, UInt_t neve=0)
calculate the sum of weights
Definition: RuleFit.cxx:175
const MethodRuleFit * GetMethodRuleFit() const
Definition: RuleFit.h:144
virtual Int_t GetNbinsX() const
Definition: TH1.h:277
void SetRuleFit(RuleFit *rf)
Definition: RuleFitParams.h:66
Double_t Sqrt(Double_t x)
Definition: TMath.h:591
void MakeGDPath()
The following finds the gradient directed path in parameter space.
virtual Int_t GetBin(Int_t binx, Int_t biny, Int_t binz=0) const
Return Global bin number corresponding to binx,y,z.
Definition: TH2.cxx:966
Int_t Fill(Double_t)
Invalid Fill method.
Definition: TH2.cxx:292
std::vector< const TMVA::Event * > fTrainingEvents
Definition: RuleFit.h:155
virtual void SetTitle(const char *title="")
Set the title of the TNamed.
Definition: TNamed.cxx:155
THist< 2, float, THistStatContent, THistStatUncertainty > TH2F
Definition: THist.hxx:317
const Bool_t kTRUE
Definition: RtypesCore.h:91
Double_t GetXmax() const
Definition: TAxis.h:134
Double_t GetSupport() const
Definition: Rule.h:140
Timing information for training and evaluation of MVA methods.
Definition: Timer.h:58
MsgLogger * fLogger
Definition: RuleFit.h:168
TAxis * GetXaxis()
Definition: TH1.h:300
Double_t GetMaxFracNEve() const
Definition: MethodRuleFit.h:99
virtual Int_t GetNbinsY() const
Definition: TH1.h:278
virtual const char * GetTitle() const
Returns title of object.
Definition: TNamed.h:48
Double_t GetMinFracNEve() const
Definition: MethodRuleFit.h:98