ROOT  6.07/01
Reference Guide
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Properties Friends Macros Groups Pages
RuleFit.cxx
Go to the documentation of this file.
1 // @(#)root/tmva $Id$
2 // Author: Andreas Hoecker, Joerg Stelzer, Fredrik Tegenfeldt, Helge Voss
3 
4 /**********************************************************************************
5  * Project: TMVA - a Root-integrated toolkit for multivariate data analysis *
6  * Package: TMVA *
7  * Class : Rule *
8  * Web : http://tmva.sourceforge.net *
9  * *
10  * Description: *
11  * A class describung a 'rule' *
12  * Each internal node of a tree defines a rule from all the parental nodes. *
13  * A rule with 0 or 1 nodes in the list is a root rule -> corresponds to a0. *
14  * Input: a decision tree (in the constructor) *
15  * its coefficient *
16  * *
17  * *
18  * Authors (alphabetical): *
19  * Fredrik Tegenfeldt <Fredrik.Tegenfeldt@cern.ch> - Iowa State U., USA *
20  * *
21  * Copyright (c) 2005: *
22  * CERN, Switzerland *
23  * Iowa State U. *
24  * MPI-K Heidelberg, Germany *
25  * *
26  * Redistribution and use in source and binary forms, with or without *
27  * modification, are permitted according to the terms listed in LICENSE *
28  * (http://tmva.sourceforge.net/LICENSE) *
29  **********************************************************************************/
30 
31 #include "TMVA/RuleFit.h"
32 
33 #include "TMVA/DataSet.h"
34 #include "TMVA/DecisionTree.h"
35 #include "TMVA/Event.h"
36 #include "TMVA/Factory.h" // for root base dir
37 #include "TMVA/GiniIndex.h"
38 #include "TMVA/MethodBase.h"
39 #include "TMVA/MethodRuleFit.h"
40 #include "TMVA/MsgLogger.h"
41 #include "TMVA/Timer.h"
42 #include "TMVA/Tools.h"
43 #include "TMVA/Types.h"
44 #include "TMVA/SeparationBase.h"
45 
46 #include "TH2F.h"
47 #include "TKey.h"
48 #include "TRandom3.h"
49 
50 #include <algorithm>
51 
53 
54 ////////////////////////////////////////////////////////////////////////////////
55 /// constructor
56 
57 TMVA::RuleFit::RuleFit( const MethodBase *rfbase )
58 : fVisHistsUseImp( kTRUE ),
59  fLogger( new MsgLogger("RuleFit") )
60 {
61  Initialize( rfbase );
62  std::srand( randSEED ); // initialize random number generator used by std::random_shuffle
63 }
64 
65 ////////////////////////////////////////////////////////////////////////////////
66 /// default constructor
67 
69  : fNTreeSample(0)
70  , fNEveEffTrain(0)
71  , fMethodRuleFit(0)
72  , fMethodBase(0)
73  , fVisHistsUseImp( kTRUE )
74  , fLogger( new MsgLogger("RuleFit") )
75 {
76  std::srand( randSEED ); // initialize random number generator used by std::random_shuffle
77 }
78 
79 ////////////////////////////////////////////////////////////////////////////////
80 /// destructor
81 
83 {
84  delete fLogger;
85 }
86 
87 ////////////////////////////////////////////////////////////////////////////////
88 /// init effective number of events (using event weights)
89 
91 {
92  UInt_t neve = fTrainingEvents.size();
93  if (neve==0) return;
94  //
95  fNEveEffTrain = CalcWeightSum( &fTrainingEvents );
96  //
97 }
98 
99 ////////////////////////////////////////////////////////////////////////////////
100 /// initialize pointers
101 
102 void TMVA::RuleFit::InitPtrs( const MethodBase *rfbase )
103 {
104  this->SetMethodBase(rfbase);
105  fRuleEnsemble.Initialize( this );
106  fRuleFitParams.SetRuleFit( this );
107 }
108 
109 ////////////////////////////////////////////////////////////////////////////////
110 /// initialize the parameters of the RuleFit method and make rules
111 
113 {
114  InitPtrs(rfbase);
115 
116  if (fMethodRuleFit){
117  fMethodRuleFit->Data()->SetCurrentType(Types::kTraining);
118  UInt_t nevents = fMethodRuleFit->Data()->GetNTrainingEvents();
119  std::vector<const TMVA::Event*> tmp;
120  for (Long64_t ievt=0; ievt<nevents; ievt++) {
121  const Event *event = fMethodRuleFit->GetEvent(ievt);
122  tmp.push_back(event);
123  }
124  SetTrainingEvents( tmp );
125  }
126  // SetTrainingEvents( fMethodRuleFit->GetTrainingEvents() );
127 
128  InitNEveEff();
129 
130  MakeForest();
131 
132  // Make the model - Rule + Linear (if fDoLinear is true)
133  fRuleEnsemble.MakeModel();
134 
135  // init rulefit params
136  fRuleFitParams.Init();
137 
138 }
139 
140 ////////////////////////////////////////////////////////////////////////////////
141 /// set MethodBase
142 
144 {
145  fMethodBase = rfbase;
146  fMethodRuleFit = dynamic_cast<const MethodRuleFit *>(rfbase);
147 }
148 
149 ////////////////////////////////////////////////////////////////////////////////
150 /// copy method
151 
152 void TMVA::RuleFit::Copy( const RuleFit& other )
153 {
154  if(this != &other) {
155  fMethodRuleFit = other.GetMethodRuleFit();
156  fMethodBase = other.GetMethodBase();
157  fTrainingEvents = other.GetTrainingEvents();
158  // fSubsampleEvents = other.GetSubsampleEvents();
159 
160  fForest = other.GetForest();
161  fRuleEnsemble = other.GetRuleEnsemble();
162  }
163 }
164 
165 ////////////////////////////////////////////////////////////////////////////////
166 /// calculate the sum of weights
167 
168 Double_t TMVA::RuleFit::CalcWeightSum( const std::vector<const Event *> *events, UInt_t neve )
169 {
170  if (events==0) return 0.0;
171  if (neve==0) neve=events->size();
172  //
173  Double_t sumw=0;
174  for (UInt_t ie=0; ie<neve; ie++) {
175  sumw += ((*events)[ie])->GetWeight();
176  }
177  return sumw;
178 }
179 
180 ////////////////////////////////////////////////////////////////////////////////
181 /// set the current message type to that of mlog for this class and all other subtools
182 
184 {
185  fLogger->SetMinType(t);
186  fRuleEnsemble.SetMsgType(t);
187  fRuleFitParams.SetMsgType(t);
188 }
189 
190 ////////////////////////////////////////////////////////////////////////////////
191 /// build the decision tree using fNTreeSample events from fTrainingEventsRndm
192 
194 {
195  if (dt==0) return;
196  if (fMethodRuleFit==0) {
197  Log() << kFATAL << "RuleFit::BuildTree() - Attempting to build a tree NOT from a MethodRuleFit" << Endl;
198  }
199  std::vector<const Event *> evevec;
200  for (UInt_t ie=0; ie<fNTreeSample; ie++) {
201  evevec.push_back(fTrainingEventsRndm[ie]);
202  }
203  dt->BuildTree(evevec);
204  if (fMethodRuleFit->GetPruneMethod() != DecisionTree::kNoPruning) {
205  dt->SetPruneMethod(fMethodRuleFit->GetPruneMethod());
206  dt->SetPruneStrength(fMethodRuleFit->GetPruneStrength());
207  dt->PruneTree();
208  }
209 }
210 
211 ////////////////////////////////////////////////////////////////////////////////
212 /// make a forest of decisiontrees
213 
215 {
216  if (fMethodRuleFit==0) {
217  Log() << kFATAL << "RuleFit::BuildTree() - Attempting to build a tree NOT from a MethodRuleFit" << Endl;
218  }
219  Log() << kDEBUG << "Creating a forest with " << fMethodRuleFit->GetNTrees() << " decision trees" << Endl;
220  Log() << kDEBUG << "Each tree is built using a random subsample with " << fNTreeSample << " events" << Endl;
221  //
222  Timer timer( fMethodRuleFit->GetNTrees(), "RuleFit" );
223 
224  // Double_t fsig;
225  Int_t nsig,nbkg;
226  //
227  TRandom3 rndGen;
228  //
229  //
230  // First save all event weights.
231  // Weights are modifed by the boosting.
232  // Those weights we do not want for the later fitting.
233  //
234  Bool_t useBoost = fMethodRuleFit->UseBoost(); // (AdaBoost (True) or RandomForest/Tree (False)
235 
236  if (useBoost) SaveEventWeights();
237 
238  for (Int_t i=0; i<fMethodRuleFit->GetNTrees(); i++) {
239  // timer.DrawProgressBar(i);
240  if (!useBoost) ReshuffleEvents();
241  nsig=0;
242  nbkg=0;
243  for (UInt_t ie = 0; ie<fNTreeSample; ie++) {
244  if (fMethodBase->DataInfo().IsSignal(fTrainingEventsRndm[ie])) nsig++; // ignore weights here
245  else nbkg++;
246  }
247  // fsig = Double_t(nsig)/Double_t(nsig+nbkg);
248  // do not implement the above in this release...just set it to default
249 
250  DecisionTree *dt=nullptr;
251  Bool_t tryAgain=kTRUE;
252  Int_t ntries=0;
253  const Int_t ntriesMax=10;
254  Double_t frnd = 0.;
255  while (tryAgain) {
256  frnd = 100*rndGen.Uniform( fMethodRuleFit->GetMinFracNEve(), 0.5*fMethodRuleFit->GetMaxFracNEve() );
257  Int_t iclass = 0; // event class being treated as signal during training
258  Bool_t useRandomisedTree = !useBoost;
259  dt = new DecisionTree( fMethodRuleFit->GetSeparationBase(), frnd, fMethodRuleFit->GetNCuts(), &(fMethodRuleFit->DataInfo()), iclass, useRandomisedTree);
260  dt->SetNVars(fMethodBase->GetNvar());
261 
262  BuildTree(dt); // reads fNTreeSample events from fTrainingEventsRndm
263  if (dt->GetNNodes()<3) {
264  delete dt;
265  dt=0;
266  }
267  ntries++;
268  tryAgain = ((dt==0) && (ntries<ntriesMax));
269  }
270  if (dt) {
271  fForest.push_back(dt);
272  if (useBoost) Boost(dt);
273 
274  } else {
275 
276  Log() << kWARNING << "------------------------------------------------------------------" << Endl;
277  Log() << kWARNING << " Failed growing a tree even after " << ntriesMax << " trials" << Endl;
278  Log() << kWARNING << " Possible solutions: " << Endl;
279  Log() << kWARNING << " 1. increase the number of training events" << Endl;
280  Log() << kWARNING << " 2. set a lower min fraction cut (fEventsMin)" << Endl;
281  Log() << kWARNING << " 3. maybe also decrease the max fraction cut (fEventsMax)" << Endl;
282  Log() << kWARNING << " If the above warning occurs rarely only, it can be ignored" << Endl;
283  Log() << kWARNING << "------------------------------------------------------------------" << Endl;
284  }
285 
286  Log() << kDEBUG << "Built tree with minimum cut at N = " << frnd <<"% events"
287  << " => N(nodes) = " << fForest.back()->GetNNodes()
288  << " ; n(tries) = " << ntries
289  << Endl;
290  }
291 
292  // Now restore event weights
293  if (useBoost) RestoreEventWeights();
294 
295  // print statistics on the forest created
296  ForestStatistics();
297 }
298 
299 ////////////////////////////////////////////////////////////////////////////////
300 /// save event weights - must be done before making the forest
301 
303 {
304  fEventWeights.clear();
305  for (std::vector<const Event*>::iterator e=fTrainingEvents.begin(); e!=fTrainingEvents.end(); e++) {
306  Double_t w = (*e)->GetBoostWeight();
307  fEventWeights.push_back(w);
308  }
309 }
310 
311 ////////////////////////////////////////////////////////////////////////////////
312 /// save event weights - must be done before making the forest
313 
315 {
316  UInt_t ie=0;
317  if (fEventWeights.size() != fTrainingEvents.size()) {
318  Log() << kERROR << "RuleFit::RestoreEventWeights() called without having called SaveEventWeights() before!" << Endl;
319  return;
320  }
321  for (std::vector<const Event*>::iterator e=fTrainingEvents.begin(); e!=fTrainingEvents.end(); e++) {
322  (*e)->SetBoostWeight(fEventWeights[ie]);
323  ie++;
324  }
325 }
326 
327 ////////////////////////////////////////////////////////////////////////////////
328 /// Boost the events. The algorithm below is the called AdaBoost.
329 /// See MethodBDT for details.
330 /// Actually, this is a more or less copy of MethodBDT::AdaBoost().
331 
333 {
334  Double_t sumw=0; // sum of initial weights - all events
335  Double_t sumwfalse=0; // idem, only missclassified events
336  //
337  std::vector<Char_t> correctSelected; // <--- boolean stored
338  //
339  for (std::vector<const Event*>::iterator e=fTrainingEvents.begin(); e!=fTrainingEvents.end(); e++) {
340  Bool_t isSignalType = (dt->CheckEvent(*e,kTRUE) > 0.5 );
341  Double_t w = (*e)->GetWeight();
342  sumw += w;
343  //
344  if (isSignalType == fMethodBase->DataInfo().IsSignal(*e)) { // correctly classified
345  correctSelected.push_back(kTRUE);
346  }
347  else { // missclassified
348  sumwfalse+= w;
349  correctSelected.push_back(kFALSE);
350  }
351  }
352  // missclassification error
353  Double_t err = sumwfalse/sumw;
354  // calculate boost weight for missclassified events
355  // use for now the exponent = 1.0
356  // one could have w = ((1-err)/err)^beta
357  Double_t boostWeight = (err>0 ? (1.0-err)/err : 1000.0);
358  Double_t newSumw=0.0;
359  UInt_t ie=0;
360  // set new weight to missclassified events
361  for (std::vector<const Event*>::iterator e=fTrainingEvents.begin(); e!=fTrainingEvents.end(); e++) {
362  if (!correctSelected[ie])
363  (*e)->SetBoostWeight( (*e)->GetBoostWeight() * boostWeight);
364  newSumw+=(*e)->GetWeight();
365  ie++;
366  }
367  // reweight all events
368  Double_t scale = sumw/newSumw;
369  for (std::vector<const Event*>::iterator e=fTrainingEvents.begin(); e!=fTrainingEvents.end(); e++) {
370  (*e)->SetBoostWeight( (*e)->GetBoostWeight() * scale);
371  }
372  Log() << kDEBUG << "boostWeight = " << boostWeight << " scale = " << scale << Endl;
373 }
374 
375 ////////////////////////////////////////////////////////////////////////////////
376 /// summary of statistics of all trees
377 /// * end-nodes: average and spread
378 
380 {
381  UInt_t ntrees = fForest.size();
382  if (ntrees==0) return;
383  const DecisionTree *tree;
384  Double_t sumn2 = 0;
385  Double_t sumn = 0;
386  Double_t nd;
387  for (UInt_t i=0; i<ntrees; i++) {
388  tree = fForest[i];
389  nd = Double_t(tree->GetNNodes());
390  sumn += nd;
391  sumn2 += nd*nd;
392  }
393  Double_t sig = TMath::Sqrt( gTools().ComputeVariance( sumn2, sumn, ntrees ));
394  Log() << kVERBOSE << "Nodes in trees: average & std dev = " << sumn/ntrees << " , " << sig << Endl;
395 }
396 
397 ////////////////////////////////////////////////////////////////////////////////
398 ///
399 /// Fit the coefficients for the rule ensemble
400 ///
401 
403 {
404  Log() << kVERBOSE << "Fitting rule/linear terms" << Endl;
405  fRuleFitParams.MakeGDPath();
406 }
407 
408 ////////////////////////////////////////////////////////////////////////////////
409 /// calculates the importance of each rule
410 
412 {
413  Log() << kVERBOSE << "Calculating importance" << Endl;
414  fRuleEnsemble.CalcImportance();
415  fRuleEnsemble.CleanupRules();
416  fRuleEnsemble.CleanupLinear();
417  fRuleEnsemble.CalcVarImportance();
418  Log() << kVERBOSE << "Filling rule statistics" << Endl;
419  fRuleEnsemble.RuleResponseStats();
420 }
421 
422 ////////////////////////////////////////////////////////////////////////////////
423 /// evaluate single event
424 
426 {
427  return fRuleEnsemble.EvalEvent( e );
428 }
429 
430 ////////////////////////////////////////////////////////////////////////////////
431 /// set the training events randomly
432 
433 void TMVA::RuleFit::SetTrainingEvents( const std::vector<const Event *>& el )
434 {
435  if (fMethodRuleFit==0) Log() << kFATAL << "RuleFit::SetTrainingEvents - MethodRuleFit not initialized" << Endl;
436  UInt_t neve = el.size();
437  if (neve==0) Log() << kWARNING << "An empty sample of training events was given" << Endl;
438 
439  // copy vector
440  fTrainingEvents.clear();
441  fTrainingEventsRndm.clear();
442  for (UInt_t i=0; i<neve; i++) {
443  fTrainingEvents.push_back(static_cast< const Event *>(el[i]));
444  fTrainingEventsRndm.push_back(static_cast< const Event *>(el[i]));
445  }
446 
447  // Re-shuffle the vector, ie, recreate it in a random order
448  std::random_shuffle( fTrainingEventsRndm.begin(), fTrainingEventsRndm.end() );
449 
450  // fraction events per tree
451  fNTreeSample = static_cast<UInt_t>(neve*fMethodRuleFit->GetTreeEveFrac());
452  Log() << kDEBUG << "Number of events per tree : " << fNTreeSample
453  << " ( N(events) = " << neve << " )"
454  << " randomly drawn without replacement" << Endl;
455 }
456 
457 ////////////////////////////////////////////////////////////////////////////////
458 /// draw a random subsample of the training events without replacement
459 
460 void TMVA::RuleFit::GetRndmSampleEvents(std::vector< const Event * > & evevec, UInt_t nevents)
461 {
462  ReshuffleEvents();
463  if ((nevents<fTrainingEventsRndm.size()) && (nevents>0)) {
464  evevec.resize(nevents);
465  for (UInt_t ie=0; ie<nevents; ie++) {
466  evevec[ie] = fTrainingEventsRndm[ie];
467  }
468  }
469  else {
470  Log() << kWARNING << "GetRndmSampleEvents() : requested sub sample size larger than total size (BUG!).";
471  }
472 }
473 ////////////////////////////////////////////////////////////////////////////////
474 /// normalize rule importance hists
475 ///
476 /// if all weights are positive, the scale will be 1/maxweight
477 /// if minimum weight < 0, then the scale will be 1/max(maxweight,abs(minweight))
478 ///
479 
480 void TMVA::RuleFit::NormVisHists(std::vector<TH2F *> & hlist)
481 {
482  if (hlist.empty()) return;
483  //
484  Double_t wmin=0;
485  Double_t wmax=0;
486  Double_t w,wm;
487  Double_t awmin;
488  Double_t scale;
489  for (UInt_t i=0; i<hlist.size(); i++) {
490  TH2F *hs = hlist[i];
491  w = hs->GetMaximum();
492  wm = hs->GetMinimum();
493  if (i==0) {
494  wmin=wm;
495  wmax=w;
496  }
497  else {
498  if (w>wmax) wmax=w;
499  if (wm<wmin) wmin=wm;
500  }
501  }
502  awmin = TMath::Abs(wmin);
503  Double_t usemin,usemax;
504  if (awmin>wmax) {
505  scale = 1.0/awmin;
506  usemin = -1.0;
507  usemax = scale*wmax;
508  }
509  else {
510  scale = 1.0/wmax;
511  usemin = scale*wmin;
512  usemax = 1.0;
513  }
514 
515  //
516  for (UInt_t i=0; i<hlist.size(); i++) {
517  TH2F *hs = hlist[i];
518  hs->Scale(scale);
519  hs->SetMinimum(usemin);
520  hs->SetMaximum(usemax);
521  }
522 }
523 
524 ////////////////////////////////////////////////////////////////////////////////
525 /// Fill cut
526 
527 void TMVA::RuleFit::FillCut(TH2F* h2, const Rule *rule, Int_t vind)
528 {
529  if (rule==0) return;
530  if (h2==0) return;
531  //
532  Double_t rmin, rmax;
533  Bool_t dormin,dormax;
534  Bool_t ruleHasVar = rule->GetRuleCut()->GetCutRange(vind,rmin,rmax,dormin,dormax);
535  if (!ruleHasVar) return;
536  //
537  Int_t firstbin = h2->GetBin(1,1,1);
538  if(firstbin<0) firstbin=0;
539  Int_t lastbin = h2->GetBin(h2->GetNbinsX(),1,1);
540  Int_t binmin=(dormin ? h2->FindBin(rmin,0.5):firstbin);
541  Int_t binmax=(dormax ? h2->FindBin(rmax,0.5):lastbin);
542  Int_t fbin;
543  Double_t xbinw = h2->GetXaxis()->GetBinWidth(firstbin);
544  Double_t fbmin = h2->GetXaxis()->GetBinLowEdge(binmin-firstbin+1);
545  Double_t lbmax = h2->GetXaxis()->GetBinLowEdge(binmax-firstbin+1)+xbinw;
546  Double_t fbfrac = (dormin ? ((fbmin+xbinw-rmin)/xbinw):1.0);
547  Double_t lbfrac = (dormax ? ((rmax-lbmax+xbinw)/xbinw):1.0);
548  Double_t f;
549  Double_t xc;
550  Double_t val;
551 
552  for (Int_t bin = binmin; bin<binmax+1; bin++) {
553  fbin = bin-firstbin+1;
554  if (bin==binmin) {
555  f = fbfrac;
556  }
557  else if (bin==binmax) {
558  f = lbfrac;
559  }
560  else {
561  f = 1.0;
562  }
563  xc = h2->GetXaxis()->GetBinCenter(fbin);
564  //
565  if (fVisHistsUseImp) {
566  val = rule->GetImportance();
567  }
568  else {
569  val = rule->GetCoefficient()*rule->GetSupport();
570  }
571  h2->Fill(xc,0.5,val*f);
572  }
573 }
574 
575 ////////////////////////////////////////////////////////////////////////////////
576 /// fill lin
577 
579 {
580  if (h2==0) return;
581  if (!fRuleEnsemble.DoLinear()) return;
582  //
583  Int_t firstbin = 1;
584  Int_t lastbin = h2->GetNbinsX();
585  Double_t xc;
586  Double_t val;
587  if (fVisHistsUseImp) {
588  val = fRuleEnsemble.GetLinImportance(vind);
589  }
590  else {
591  val = fRuleEnsemble.GetLinCoefficients(vind);
592  }
593  for (Int_t bin = firstbin; bin<lastbin+1; bin++) {
594  xc = h2->GetXaxis()->GetBinCenter(bin);
595  h2->Fill(xc,0.5,val);
596  }
597 }
598 
599 ////////////////////////////////////////////////////////////////////////////////
600 /// fill rule correlation between vx and vy, weighted with either the importance or the coefficient
601 
602 void TMVA::RuleFit::FillCorr(TH2F* h2,const Rule *rule,Int_t vx, Int_t vy)
603 {
604  if (rule==0) return;
605  if (h2==0) return;
606  Double_t val;
607  if (fVisHistsUseImp) {
608  val = rule->GetImportance();
609  }
610  else {
611  val = rule->GetCoefficient()*rule->GetSupport();
612  }
613  //
614  Double_t rxmin, rxmax, rymin, rymax;
615  Bool_t dorxmin, dorxmax, dorymin, dorymax;
616  //
617  // Get range in rule for X and Y
618  //
619  Bool_t ruleHasVarX = rule->GetRuleCut()->GetCutRange(vx,rxmin,rxmax,dorxmin,dorxmax);
620  Bool_t ruleHasVarY = rule->GetRuleCut()->GetCutRange(vy,rymin,rymax,dorymin,dorymax);
621  if (!(ruleHasVarX || ruleHasVarY)) return;
622  // min max of varX and varY in hist
623  Double_t vxmin = (dorxmin ? rxmin:h2->GetXaxis()->GetXmin());
624  Double_t vxmax = (dorxmax ? rxmax:h2->GetXaxis()->GetXmax());
625  Double_t vymin = (dorymin ? rymin:h2->GetYaxis()->GetXmin());
626  Double_t vymax = (dorymax ? rymax:h2->GetYaxis()->GetXmax());
627  // min max bin in X and Y
628  Int_t binxmin = h2->GetXaxis()->FindBin(vxmin);
629  Int_t binxmax = h2->GetXaxis()->FindBin(vxmax);
630  Int_t binymin = h2->GetYaxis()->FindBin(vymin);
631  Int_t binymax = h2->GetYaxis()->FindBin(vymax);
632  // bin widths
633  Double_t xbinw = h2->GetXaxis()->GetBinWidth(binxmin);
634  Double_t ybinw = h2->GetYaxis()->GetBinWidth(binxmin);
635  Double_t xbinmin = h2->GetXaxis()->GetBinLowEdge(binxmin);
636  Double_t xbinmax = h2->GetXaxis()->GetBinLowEdge(binxmax)+xbinw;
637  Double_t ybinmin = h2->GetYaxis()->GetBinLowEdge(binymin);
638  Double_t ybinmax = h2->GetYaxis()->GetBinLowEdge(binymax)+ybinw;
639  // fraction of edges
640  Double_t fxbinmin = (dorxmin ? ((xbinmin+xbinw-vxmin)/xbinw):1.0);
641  Double_t fxbinmax = (dorxmax ? ((vxmax-xbinmax+xbinw)/xbinw):1.0);
642  Double_t fybinmin = (dorymin ? ((ybinmin+ybinw-vymin)/ybinw):1.0);
643  Double_t fybinmax = (dorymax ? ((vymax-ybinmax+ybinw)/ybinw):1.0);
644  //
645  Double_t fx,fy;
646  Double_t xc,yc;
647  // fill histo
648  for (Int_t binx = binxmin; binx<binxmax+1; binx++) {
649  if (binx==binxmin) {
650  fx = fxbinmin;
651  }
652  else if (binx==binxmax) {
653  fx = fxbinmax;
654  }
655  else {
656  fx = 1.0;
657  }
658  xc = h2->GetXaxis()->GetBinCenter(binx);
659  for (Int_t biny = binymin; biny<binymax+1; biny++) {
660  if (biny==binymin) {
661  fy = fybinmin;
662  }
663  else if (biny==binymax) {
664  fy = fybinmax;
665  }
666  else {
667  fy = 1.0;
668  }
669  yc = h2->GetYaxis()->GetBinCenter(biny);
670  h2->Fill(xc,yc,val*fx*fy);
671  }
672  }
673 }
674 
675 ////////////////////////////////////////////////////////////////////////////////
676 /// help routine to MakeVisHists() - fills for all variables
677 
678 void TMVA::RuleFit::FillVisHistCut(const Rule* rule, std::vector<TH2F *> & hlist)
679 {
680  Int_t nhists = hlist.size();
681  Int_t nvar = fMethodBase->GetNvar();
682  if (nhists!=nvar) Log() << kFATAL << "BUG TRAP: number of hists is not equal the number of variables!" << Endl;
683  //
684  std::vector<Int_t> vindex;
685  TString hstr;
686  // not a nice way to do a check...
687  for (Int_t ih=0; ih<nhists; ih++) {
688  hstr = hlist[ih]->GetTitle();
689  for (Int_t iv=0; iv<nvar; iv++) {
690  if (fMethodBase->GetInputTitle(iv) == hstr)
691  vindex.push_back(iv);
692  }
693  }
694  //
695  for (Int_t iv=0; iv<nvar; iv++) {
696  if (rule) {
697  if (rule->ContainsVariable(vindex[iv])) {
698  FillCut(hlist[iv],rule,vindex[iv]);
699  }
700  }
701  else {
702  FillLin(hlist[iv],vindex[iv]);
703  }
704  }
705 }
706 ////////////////////////////////////////////////////////////////////////////////
707 /// help routine to MakeVisHists() - fills for all correlation plots
708 
709 void TMVA::RuleFit::FillVisHistCorr(const Rule * rule, std::vector<TH2F *> & hlist)
710 {
711  if (rule==0) return;
712  Double_t ruleimp = rule->GetImportance();
713  if (!(ruleimp>0)) return;
714  if (ruleimp<fRuleEnsemble.GetImportanceCut()) return;
715  //
716  Int_t nhists = hlist.size();
717  Int_t nvar = fMethodBase->GetNvar();
718  Int_t ncorr = (nvar*(nvar+1)/2)-nvar;
719  if (nhists!=ncorr) Log() << kERROR << "BUG TRAP: number of corr hists is not correct! ncorr = "
720  << ncorr << " nvar = " << nvar << " nhists = " << nhists << Endl;
721  //
722  std::vector< std::pair<Int_t,Int_t> > vindex;
723  TString hstr, var1, var2;
724  Int_t iv1=0,iv2=0;
725  // not a nice way to do a check...
726  for (Int_t ih=0; ih<nhists; ih++) {
727  hstr = hlist[ih]->GetName();
728  if (GetCorrVars( hstr, var1, var2 )) {
729  iv1 = fMethodBase->DataInfo().FindVarIndex( var1 );
730  iv2 = fMethodBase->DataInfo().FindVarIndex( var2 );
731  vindex.push_back( std::pair<Int_t,Int_t>(iv2,iv1) ); // pair X, Y
732  }
733  else {
734  Log() << kERROR << "BUG TRAP: should not be here - failed getting var1 and var2" << Endl;
735  }
736  }
737  //
738  for (Int_t ih=0; ih<nhists; ih++) {
739  if ( (rule->ContainsVariable(vindex[ih].first)) ||
740  (rule->ContainsVariable(vindex[ih].second)) ) {
741  FillCorr(hlist[ih],rule,vindex[ih].first,vindex[ih].second);
742  }
743  }
744 }
745 ////////////////////////////////////////////////////////////////////////////////
746 /// get first and second variables from title
747 
749 {
750  var1="";
751  var2="";
752  if(!title.BeginsWith("scat_")) return kFALSE;
753 
754  TString titleCopy = title(5,title.Length());
755  if(titleCopy.Index("_RF2D")>=0) titleCopy.Remove(titleCopy.Index("_RF2D"));
756 
757  Int_t splitPos = titleCopy.Index("_vs_");
758  if(splitPos>=0) { // there is a _vs_ in the string
759  var1 = titleCopy(0,splitPos);
760  var2 = titleCopy(splitPos+4, titleCopy.Length());
761  return kTRUE;
762  }
763  else {
764  var1 = titleCopy;
765  return kFALSE;
766  }
767 }
768 ////////////////////////////////////////////////////////////////////////////////
769 /// this will create histograms visualizing the rule ensemble
770 
772 {
773  const TString directories[5] = { "InputVariables_Id",
774  "InputVariables_Deco",
775  "InputVariables_PCA",
776  "InputVariables_Gauss",
777  "InputVariables_Gauss_Deco" };
778 
779  const TString corrDirName = "CorrelationPlots";
780 
781  TDirectory* rootDir = Factory::RootBaseDir();
782  TDirectory* varDir = 0;
783  TDirectory* corrDir = 0;
784 
785  TDirectory* methodDir = fMethodBase->BaseDir();
786  TString varDirName;
787  //
788  Bool_t done=(rootDir==0);
789  Int_t type=0;
790  if (done) {
791  Log() << kWARNING << "No basedir - BUG??" << Endl;
792  return;
793  }
794  while (!done) {
795  varDir = (TDirectory*)rootDir->Get( directories[type] );
796  type++;
797  done = ((varDir!=0) || (type>4));
798  }
799  if (varDir==0) {
800  Log() << kWARNING << "No input variable directory found - BUG?" << Endl;
801  return;
802  }
803  corrDir = (TDirectory*)varDir->Get( corrDirName );
804  if (corrDir==0) {
805  Log() << kWARNING << "No correlation directory found" << Endl;
806  Log() << kWARNING << "Check for other warnings related to correlation histograms" << Endl;
807  return;
808  }
809  if (methodDir==0) {
810  Log() << kWARNING << "No rulefit method directory found - BUG?" << Endl;
811  return;
812  }
813 
814  varDirName = varDir->GetName();
815  varDir->cd();
816  //
817  // get correlation plot directory
818  corrDir = (TDirectory *)varDir->Get(corrDirName);
819  if (corrDir==0) {
820  Log() << kWARNING << "No correlation directory found : " << corrDirName << Endl;
821  return;
822  }
823 
824  // how many plots are in the var directory?
825  Int_t noPlots = ((varDir->GetListOfKeys())->GetEntries()) / 2;
826  Log() << kDEBUG << "Got number of plots = " << noPlots << Endl;
827 
828  // loop over all objects in directory
829  std::vector<TH2F *> h1Vector;
830  std::vector<TH2F *> h2CorrVector;
831  TIter next(varDir->GetListOfKeys());
832  TKey *key;
833  while ((key = (TKey*)next())) {
834  // make sure, that we only look at histograms
835  TClass *cl = gROOT->GetClass(key->GetClassName());
836  if (!cl->InheritsFrom(TH1F::Class())) continue;
837  TH1F *sig = (TH1F*)key->ReadObj();
838  TString hname= sig->GetName();
839  Log() << kDEBUG << "Got histogram : " << hname << Endl;
840 
841  // check for all signal histograms
842  if (hname.Contains("__S")){ // found a new signal plot
843  TString htitle = sig->GetTitle();
844  htitle.ReplaceAll("signal","");
845  TString newname = hname;
846  newname.ReplaceAll("__Signal","__RF");
847  newname.ReplaceAll("__S","__RF");
848 
849  methodDir->cd();
850  TH2F *newhist = new TH2F(newname,htitle,sig->GetNbinsX(),sig->GetXaxis()->GetXmin(),sig->GetXaxis()->GetXmax(),
851  1,sig->GetYaxis()->GetXmin(),sig->GetYaxis()->GetXmax());
852  varDir->cd();
853  h1Vector.push_back( newhist );
854  }
855  }
856  //
857  corrDir->cd();
858  TString var1,var2;
859  TIter nextCorr(corrDir->GetListOfKeys());
860  while ((key = (TKey*)nextCorr())) {
861  // make sure, that we only look at histograms
862  TClass *cl = gROOT->GetClass(key->GetClassName());
863  if (!cl->InheritsFrom(TH2F::Class())) continue;
864  TH2F *sig = (TH2F*)key->ReadObj();
865  TString hname= sig->GetName();
866 
867  // check for all signal histograms
868  if ((hname.Contains("scat_")) && (hname.Contains("_Signal"))) {
869  Log() << kDEBUG << "Got histogram (2D) : " << hname << Endl;
870  TString htitle = sig->GetTitle();
871  htitle.ReplaceAll("(Signal)","");
872  TString newname = hname;
873  newname.ReplaceAll("_Signal","_RF2D");
874 
875  methodDir->cd();
876  const Int_t rebin=2;
877  TH2F *newhist = new TH2F(newname,htitle,
878  sig->GetNbinsX()/rebin,sig->GetXaxis()->GetXmin(),sig->GetXaxis()->GetXmax(),
879  sig->GetNbinsY()/rebin,sig->GetYaxis()->GetXmin(),sig->GetYaxis()->GetXmax());
880  if (GetCorrVars( newname, var1, var2 )) {
881  Int_t iv1 = fMethodBase->DataInfo().FindVarIndex(var1);
882  Int_t iv2 = fMethodBase->DataInfo().FindVarIndex(var2);
883  if (iv1<0) {
884  sig->GetYaxis()->SetTitle(var1);
885  }
886  else {
887  sig->GetYaxis()->SetTitle(fMethodBase->GetInputTitle(iv1));
888  }
889  if (iv2<0) {
890  sig->GetXaxis()->SetTitle(var2);
891  }
892  else {
893  sig->GetXaxis()->SetTitle(fMethodBase->GetInputTitle(iv2));
894  }
895  }
896  corrDir->cd();
897  h2CorrVector.push_back( newhist );
898  }
899  }
900 
901 
902  varDir->cd();
903  // fill rules
904  UInt_t nrules = fRuleEnsemble.GetNRules();
905  const Rule *rule;
906  for (UInt_t i=0; i<nrules; i++) {
907  rule = fRuleEnsemble.GetRulesConst(i);
908  FillVisHistCut(rule, h1Vector);
909  }
910  // fill linear terms and normalise hists
911  FillVisHistCut(0, h1Vector);
912  NormVisHists(h1Vector);
913 
914  //
915  corrDir->cd();
916  // fill rules
917  for (UInt_t i=0; i<nrules; i++) {
918  rule = fRuleEnsemble.GetRulesConst(i);
919  FillVisHistCorr(rule, h2CorrVector);
920  }
921  NormVisHists(h2CorrVector);
922 
923  // write histograms to file
924  methodDir->cd();
925  for (UInt_t i=0; i<h1Vector.size(); i++) h1Vector[i]->Write();
926  for (UInt_t i=0; i<h2CorrVector.size(); i++) h2CorrVector[i]->Write();
927 }
928 
929 ////////////////////////////////////////////////////////////////////////////////
930 /// this will create a histograms intended rather for debugging or for the curious user
931 
933 {
934  TDirectory* methodDir = fMethodBase->BaseDir();
935  if (methodDir==0) {
936  Log() << kWARNING << "<MakeDebugHists> No rulefit method directory found - bug?" << Endl;
937  return;
938  }
939  //
940  methodDir->cd();
941  std::vector<Double_t> distances;
942  std::vector<Double_t> fncuts;
943  std::vector<Double_t> fnvars;
944  const Rule *ruleA;
945  const Rule *ruleB;
946  Double_t dABmin=1000000.0;
947  Double_t dABmax=-1.0;
948  UInt_t nrules = fRuleEnsemble.GetNRules();
949  for (UInt_t i=0; i<nrules; i++) {
950  ruleA = fRuleEnsemble.GetRulesConst(i);
951  for (UInt_t j=i+1; j<nrules; j++) {
952  ruleB = fRuleEnsemble.GetRulesConst(j);
953  Double_t dAB = ruleA->RuleDist( *ruleB, kTRUE );
954  if (dAB>-0.5) {
955  UInt_t nc = ruleA->GetNcuts();
956  UInt_t nv = ruleA->GetNumVarsUsed();
957  distances.push_back(dAB);
958  fncuts.push_back(static_cast<Double_t>(nc));
959  fnvars.push_back(static_cast<Double_t>(nv));
960  if (dAB<dABmin) dABmin=dAB;
961  if (dAB>dABmax) dABmax=dAB;
962  }
963  }
964  }
965  //
966  TH1F *histDist = new TH1F("RuleDist","Rule distances",100,dABmin,dABmax);
967  TTree *distNtuple = new TTree("RuleDistNtuple","RuleDist ntuple");
968  Double_t ntDist;
969  Double_t ntNcuts;
970  Double_t ntNvars;
971  distNtuple->Branch("dist", &ntDist, "dist/D");
972  distNtuple->Branch("ncuts",&ntNcuts, "ncuts/D");
973  distNtuple->Branch("nvars",&ntNvars, "nvars/D");
974  //
975  for (UInt_t i=0; i<distances.size(); i++) {
976  histDist->Fill(distances[i]);
977  ntDist = distances[i];
978  ntNcuts = fncuts[i];
979  ntNvars = fnvars[i];
980  distNtuple->Fill();
981  }
982  distNtuple->Write();
983 }
void ForestStatistics()
summary of statistics of all trees
Definition: RuleFit.cxx:379
virtual const char * GetTitle() const
Returns title of object.
Definition: TNamed.h:52
void SetPruneMethod(EPruneMethod m=kCostComplexityPruning)
Definition: DecisionTree.h:148
void MakeForest()
make a forest of decisiontrees
Definition: RuleFit.cxx:214
virtual Int_t FindBin(Double_t x, Double_t y=0, Double_t z=0)
Return Global bin number corresponding to x,y,z.
Definition: TH1.cxx:3478
virtual void Scale(Double_t c1=1, Option_t *option="")
Multiply this histogram by a constant c1.
Definition: TH1.cxx:6174
virtual Int_t Fill(Double_t x)
Increment bin with abscissa X by 1.
Definition: TH1.cxx:3159
static TDirectory * RootBaseDir()
Definition: Factory.h:228
Random number generator class based on M.
Definition: TRandom3.h:29
MsgLogger & Endl(MsgLogger &ml)
Definition: MsgLogger.h:162
long long Long64_t
Definition: RtypesCore.h:69
virtual void SetMaximum(Double_t maximum=-1111)
Definition: TH1.h:394
const std::vector< const TMVA::Event * > & GetTrainingEvents() const
Definition: RuleFit.h:141
void CalcImportance()
calculates the importance of each rule
Definition: RuleFit.cxx:411
virtual TList * GetListOfKeys() const
Definition: TDirectory.h:158
virtual TObject * Get(const char *namecycle)
Return pointer to object identified by namecycle.
Definition: TDirectory.cxx:727
ClassImp(TSeqCollection) Int_t TSeqCollection TIter next(this)
Return index of object in collection.
Ssiz_t Length() const
Definition: TString.h:390
void FillVisHistCorr(const Rule *rule, std::vector< TH2F * > &hlist)
help routine to MakeVisHists() - fills for all correlation plots
Definition: RuleFit.cxx:709
TString & ReplaceAll(const TString &s1, const TString &s2)
Definition: TString.h:635
virtual Int_t Fill()
Fill all branches.
Definition: TTree.cxx:4306
void SetMsgType(EMsgType t)
set the current message type to that of mlog for this class and all other subtools ...
Definition: RuleFit.cxx:183
Bool_t GetCorrVars(TString &title, TString &var1, TString &var2)
get first and second variables from title
Definition: RuleFit.cxx:748
virtual Double_t GetBinLowEdge(Int_t bin) const
Return low edge of bin.
Definition: TAxis.cxx:489
void InitNEveEff()
init effective number of events (using event weights)
Definition: RuleFit.cxx:90
virtual void SetMinimum(Double_t minimum=-1111)
Definition: TH1.h:395
#define gROOT
Definition: TROOT.h:344
void FitCoefficients()
Fit the coefficients for the rule ensemble.
Definition: RuleFit.cxx:402
Basic string class.
Definition: TString.h:137
1-D histogram with a float per channel (see TH1 documentation)}
Definition: TH1.h:570
int Int_t
Definition: RtypesCore.h:41
bool Bool_t
Definition: RtypesCore.h:59
const Bool_t kFALSE
Definition: Rtypes.h:92
virtual Double_t GetBinWidth(Int_t bin) const
Return bin width.
Definition: TAxis.cxx:511
virtual Int_t GetNbinsX() const
Definition: TH1.h:296
Bool_t BeginsWith(const char *s, ECaseCompare cmp=kExact) const
Definition: TString.h:558
Short_t Abs(Short_t d)
Definition: TMathBase.h:110
ClassImp(TMVA::RuleFit) TMVA
constructor
Definition: RuleFit.cxx:52
Double_t RuleDist(const Rule &other, Bool_t useCutValue) const
Returns: -1.0 : rules are NOT equal, i.e, variables and/or cut directions are wrong >=0: rules are eq...
Definition: Rule.cxx:187
TFile * f
UInt_t GetNcuts() const
Definition: Rule.h:139
Tools & gTools()
Definition: Tools.cxx:79
RuleFit(void)
default constructor
Definition: RuleFit.cxx:68
TStopwatch timer
Definition: pirndm.C:37
void BuildTree(TMVA::DecisionTree *dt)
build the decision tree using fNTreeSample events from fTrainingEventsRndm
Definition: RuleFit.cxx:193
const MethodBase * GetMethodBase() const
Definition: RuleFit.h:153
void Class()
Definition: Class.C:29
void SetTrainingEvents(const std::vector< const TMVA::Event * > &el)
set the training events randomly
Definition: RuleFit.cxx:433
void GetRndmSampleEvents(std::vector< const TMVA::Event * > &evevec, UInt_t nevents)
draw a random subsample of the training events without replacement
Definition: RuleFit.cxx:460
TH2D * h2
Definition: fit2dHist.C:45
Book space in a file, create I/O buffers, to fill them, (un)compress them.
Definition: TKey.h:30
virtual ~RuleFit(void)
destructor
Definition: RuleFit.cxx:82
ROOT::Math::KDTree< _DataPoint > * BuildTree(const std::vector< const _DataPoint * > &vDataPoints, const unsigned int iBucketSize)
Double_t GetXmin() const
Definition: TAxis.h:137
const RuleCut * GetRuleCut() const
Definition: Rule.h:145
void SetMethodBase(const MethodBase *rfbase)
set MethodBase
Definition: RuleFit.cxx:143
Double_t CheckEvent(const TMVA::Event *, Bool_t UseYesNoLeaf=kFALSE) const
the event e is put into the decision tree (starting at the root node) and the output is NodeType (sig...
const std::vector< const TMVA::DecisionTree * > & GetForest() const
Definition: RuleFit.h:147
TThread * t[5]
Definition: threadsh1.C:13
void SetNVars(Int_t n)
Definition: DecisionTree.h:202
virtual Int_t Write(const char *name=0, Int_t option=0, Int_t bufsize=0)
Write this object to the current directory.
Definition: TTree.cxx:8780
void RestoreEventWeights()
save event weights - must be done before making the forest
Definition: RuleFit.cxx:314
void FillVisHistCut(const Rule *rule, std::vector< TH2F * > &hlist)
help routine to MakeVisHists() - fills for all variables
Definition: RuleFit.cxx:678
void Copy(const RuleFit &other)
copy method
Definition: RuleFit.cxx:152
void Initialize(Bool_t useTMVAStyle=kTRUE)
Definition: tmvaglob.cxx:176
Bool_t ContainsVariable(UInt_t iv) const
check if variable in node
Definition: Rule.cxx:135
TPaveLabel title(3, 27.1, 15, 28.7,"ROOT Environment and Tools")
void FillCorr(TH2F *h2, const TMVA::Rule *rule, Int_t v1, Int_t v2)
fill rule correlation between vx and vy, weighted with either the importance or the coefficient ...
Definition: RuleFit.cxx:602
void MakeDebugHists()
this will create a histograms intended rather for debugging or for the curious user ...
Definition: RuleFit.cxx:932
static const Int_t randSEED
Definition: RuleFit.h:179
2-D histogram with a float per channel (see TH1 documentation)}
Definition: TH2.h:256
Double_t GetImportance() const
Definition: Rule.h:151
const RuleEnsemble & GetRuleEnsemble() const
Definition: RuleFit.h:148
EMsgType
Definition: Types.h:61
void SetPruneStrength(Double_t p)
Definition: DecisionTree.h:154
unsigned int UInt_t
Definition: RtypesCore.h:42
bool first
Definition: line3Dfit.C:48
tuple w
Definition: qtexample.py:51
void FillLin(TH2F *h2, Int_t vind)
fill lin
Definition: RuleFit.cxx:578
virtual const char * GetName() const
Returns name of object.
Definition: TNamed.h:51
Double_t GetWeight(Double_t x) const
The ROOT global object gROOT contains a list of all defined classes.
Definition: TClass.h:81
TAxis * GetYaxis()
Definition: TH1.h:320
Bool_t GetCutRange(Int_t sel, Double_t &rmin, Double_t &rmax, Bool_t &dormin, Bool_t &dormax) const
get cut range for a given selector
Definition: RuleCut.cxx:170
const MethodRuleFit * GetMethodRuleFit() const
Definition: RuleFit.h:152
void Boost(TMVA::DecisionTree *dt)
Boost the events.
Definition: RuleFit.cxx:332
virtual Int_t GetBin(Int_t binx, Int_t biny, Int_t binz=0) const
Return Global bin number corresponding to binx,y,z.
Definition: TH2.cxx:961
virtual Int_t FindBin(Double_t x)
Find bin number corresponding to abscissa x.
Definition: TAxis.cxx:264
TString & Remove(Ssiz_t pos)
Definition: TString.h:616
void SaveEventWeights()
save event weights - must be done before making the forest
Definition: RuleFit.cxx:302
tuple tree
Definition: tree.py:24
double Double_t
Definition: RtypesCore.h:55
Describe directory structure in memory.
Definition: TDirectory.h:44
int type
Definition: TGX11.cxx:120
void MakeVisHists()
this will create histograms visualizing the rule ensemble
Definition: RuleFit.cxx:771
UInt_t GetNNodes() const
Definition: BinaryTree.h:92
Double_t GetXmax() const
Definition: TAxis.h:138
UInt_t GetNumVarsUsed() const
Definition: Rule.h:136
virtual Double_t Uniform(Double_t x1=1)
Returns a uniform deviate on the interval (0, x1).
Definition: TRandom.cxx:606
void InitPtrs(const TMVA::MethodBase *rfbase)
initialize pointers
Definition: RuleFit.cxx:102
Double_t GetCoefficient() const
Definition: Rule.h:147
Double_t CalcWeightSum(const std::vector< const TMVA::Event * > *events, UInt_t neve=0)
calculate the sum of weights
Definition: RuleFit.cxx:168
void NormVisHists(std::vector< TH2F * > &hlist)
normalize rule importance hists
Definition: RuleFit.cxx:480
virtual Double_t GetBinCenter(Int_t bin) const
Return center of bin.
Definition: TAxis.cxx:449
virtual Int_t Branch(TCollection *list, Int_t bufsize=32000, Int_t splitlevel=99, const char *name="")
Create one branch for each element in the collection.
Definition: TTree.cxx:1623
virtual Int_t GetNbinsY() const
Definition: TH1.h:297
Double_t PruneTree(const EventConstList *validationSample=NULL)
prune (get rid of internal nodes) the Decision tree to avoid overtraining serveral different pruning ...
void FillCut(TH2F *h2, const TMVA::Rule *rule, Int_t vind)
Fill cut.
Definition: RuleFit.cxx:527
virtual Bool_t cd(const char *path=0)
Change current directory to "this" directory.
Definition: TDirectory.cxx:433
Double_t GetSupport() const
Definition: Rule.h:148
Bool_t Contains(const char *pat, ECaseCompare cmp=kExact) const
Definition: TString.h:567
UInt_t BuildTree(const EventConstList &eventSample, DecisionTreeNode *node=NULL)
building the decision tree by recursively calling the splitting of one (root-) node into two daughter...
Double_t EvalEvent(const Event &e)
evaluate single event
Definition: RuleFit.cxx:425
A TTree object has a header with a name and a title.
Definition: TTree.h:98
Bool_t InheritsFrom(const char *cl) const
Return kTRUE if this class inherits from a class with name "classname".
Definition: TClass.cxx:4498
void Initialize(const TMVA::MethodBase *rfbase)
initialize the parameters of the RuleFit method and make rules
Definition: RuleFit.cxx:112
Double_t Sqrt(Double_t x)
Definition: TMath.h:464
Ssiz_t Index(const char *pat, Ssiz_t i=0, ECaseCompare cmp=kExact) const
Definition: TString.h:582
virtual Double_t GetMaximum(Double_t maxval=FLT_MAX) const
Return maximum value smaller than maxval of bins in the range, unless the value has been overridden b...
Definition: TH1.cxx:7921
const Bool_t kTRUE
Definition: Rtypes.h:91
Int_t Fill(Double_t)
Invalid Fill method.
Definition: TH2.cxx:287
virtual void SetTitle(const char *title="")
Change (i.e. set) the title of the TNamed.
Definition: TNamed.cxx:152
virtual Double_t GetMinimum(Double_t minval=-FLT_MAX) const
Return minimum value larger than minval of bins in the range, unless the value has been overridden by...
Definition: TH1.cxx:8006
Definition: math.cpp:60
TAxis * GetXaxis()
Definition: TH1.h:319