Logo ROOT   6.10/09
Reference Guide
RuleFitAPI.cxx
Go to the documentation of this file.
1 // @(#)root/tmva $Id$
2 // Author: Andreas Hoecker, Joerg Stelzer, Fredrik Tegenfeldt, Helge Voss
3 
4 /**********************************************************************************
5  * Project: TMVA - a Root-integrated toolkit for multivariate data analysis *
6  * Package: TMVA *
7  * Class : RuleFitAPI *
8  * Web : http://tmva.sourceforge.net *
9  * *
10  * Description: *
11  * Implementation (see header file for description) *
12  * *
13  * Authors (alphabetical): *
14  * Fredrik Tegenfeldt <Fredrik.Tegenfeldt@cern.ch> - Iowa State U., USA *
15  * *
16  * Copyright (c) 2005: *
17  * CERN, Switzerland *
18  * Iowa State U. *
19  * MPI-KP Heidelberg, Germany *
20  * *
21  * Redistribution and use in source and binary forms, with or without *
22  * modification, are permitted according to the terms listed in LICENSE *
23  * (http://tmva.sourceforge.net/LICENSE) *
24  **********************************************************************************/
25 
26 /*! \class TMVA::RuleFitAPI
27 \ingroup TMVA
28 J Friedman's RuleFit method
29 */
30 
31 #include "TMVA/RuleFitAPI.h"
32 
33 #include "TMVA/DataSet.h"
34 #include "TMVA/DataSetInfo.h"
35 #include "TMVA/MethodRuleFit.h"
36 #include "TMVA/RuleFit.h"
37 #include "TMVA/Timer.h"
38 #include "TMVA/Tools.h"
39 #include "TMVA/Types.h"
40 #include "TMVA/VariableInfo.h"
41 
42 #include "TROOT.h"
43 #include "TSystem.h"
44 #include "TMath.h"
45 
46 #include <algorithm>
47 
49 
51  RuleFit *rulefit,
52  EMsgType minType = kINFO ) :
53 fMethodRuleFit(rfbase),
54  fRuleFit(rulefit),
55  fRFProgram(kRfTrain),
56  fLogger("RuleFitAPI",minType)
57 {
58  // standard constructor
59  if (rfbase) {
60  SetRFWorkDir(rfbase->GetRFWorkDir());
61  } else {
62  SetRFWorkDir("./rulefit");
63  }
64  InitRuleFit();
65 }
66 
67 
68 ////////////////////////////////////////////////////////////////////////////////
69 /// destructor
70 
72 {
73 }
74 
75 ////////////////////////////////////////////////////////////////////////////////
76 /// welcome message
77 
79 {
80  fLogger << kINFO
81  << "\n"
82  << "---------------------------------------------------------------------------\n"
83  << "- You are running the interface to Jerome Friedmans RuleFit(tm) code. -\n"
84  << "- For a full manual see the following web page: -\n"
85  << "- -\n"
86  << "- http://www-stat.stanford.edu/~jhf/R-RuleFit.html -\n"
87  << "- -\n"
88  << "---------------------------------------------------------------------------"
89  << Endl;
90 }
91 ////////////////////////////////////////////////////////////////////////////////
92 /// howto message
93 
95 {
96  fLogger << kINFO
97  << "\n"
98  << "------------------------ RULEFIT-JF INTERFACE SETUP -----------------------\n"
99  << "\n"
100  << "1. Create a rulefit directory in your current work directory:\n"
101  << " mkdir " << fRFWorkDir << "\n\n"
102  << " the directory may be set using the option RuleFitDir\n"
103  << "\n"
104  << "2. Copy (or make a link) the file rf_go.exe into this directory\n"
105  << "\n"
106  << "The file can be obtained from Jerome Friedmans homepage (linux):\n"
107  << " wget http://www-stat.stanford.edu/~jhf/r-rulefit/linux/rf_go.exe\n"
108  << "\n"
109  << "Don't forget to do:\n"
110  << " chmod +x rf_go.exe\n"
111  << "\n"
112  << "For Windows download:\n"
113  << " http://www-stat.stanford.edu/~jhf/r-rulefit/windows/rf_go.exe\n"
114  << "\n"
115  << "NOTE: other platforms are not supported (see Friedmans homepage)\n"
116  << "\n"
117  << "---------------------------------------------------------------------------\n"
118  << Endl;
119 }
120 ////////////////////////////////////////////////////////////////////////////////
121 /// default initialisation
122 /// SetRFWorkDir("./rulefit");
123 
125 {
126  CheckRFWorkDir();
127  FillIntParmsDef();
129 }
130 
131 ////////////////////////////////////////////////////////////////////////////////
132 /// import setup from MethodRuleFit
133 
135 {
140  //
145  //
148  else if (fRuleFit->GetRuleEnsemblePtr()->DoOnlyRules() )
150  else
152 }
153 
154 ////////////////////////////////////////////////////////////////////////////////
155 /// set the directory containing rf_go.exe.
156 
157 void TMVA::RuleFitAPI::SetRFWorkDir(const char * wdir)
158 {
159  fRFWorkDir = wdir;
160 }
161 
162 ////////////////////////////////////////////////////////////////////////////////
163 /// check if the rulefit work dir is properly setup.
164 /// it aborts (kFATAL) if not.
165 ///
166 /// Check existence of directory
167 
169 {
170  TString oldDir = gSystem->pwd();
171  if (!gSystem->cd(fRFWorkDir)) {
172  fLogger << kWARNING << "Must create a rulefit directory named : " << fRFWorkDir << Endl;
173  HowtoSetupRF();
174  fLogger << kFATAL << "Setup failed - aborting!" << Endl;
175  }
176  // check rf_go.exe
177  FILE *f = fopen("rf_go.exe","r");
178  if (f==0) {
179  fLogger << kWARNING << "No rf_go.exe file in directory : " << fRFWorkDir << Endl;
180  HowtoSetupRF();
181  fLogger << kFATAL << "Setup failed - aborting!" << Endl;
182  }
183  fclose(f);
184  gSystem->cd(oldDir.Data());
185 }
186 
187 ////////////////////////////////////////////////////////////////////////////////
188 /// set the training parameters
189 
191 {
192  ImportSetup();
193  //
195  // Double_t neff = Double_t(n); // When weights are added: should be sum(wt)^2/sum(wt^2)
196  fRFIntParms.n = n; // number of data points in tree
198 }
199 
200 ////////////////////////////////////////////////////////////////////////////////
201 /// set the test params
202 
204 {
205  ImportSetup();
207  // Double_t neff = Double_t(n); // When weights are added: should be sum(wt)^2/sum(wt^2)
208  fRFIntParms.n = n; // number of data points in tree
210 }
211 
212 ////////////////////////////////////////////////////////////////////////////////
213 /// set default real params
214 
216 {
217  fRFRealParms.xmiss = 9.0e30;
218  fRFRealParms.trim_qntl = 0.025;
219  fRFRealParms.huber = 0.8;
220  fRFRealParms.inter_supp = 3.0;
221  fRFRealParms.memory_par = 0.01;
222  fRFRealParms.samp_fract = 0.5; // calculated later
223  fRFRealParms.path_inc = 0.01;
224  fRFRealParms.conv_fac = 1.1;
225 }
226 
227 ////////////////////////////////////////////////////////////////////////////////
228 /// set default int params
229 
231 {
232  fRFIntParms.mode = (int)kRfClass;
233  fRFIntParms.lmode = (int)kRfBoth;
234  // fRFIntParms.n;
235  // fRFIntParms.p;
236  fRFIntParms.max_rules = 2000;
240  fRFIntParms.path_steps = 50000;
242  fRFIntParms.tree_store = 10000000;
243  fRFIntParms.cat_store = 1000000;
244 
245 }
246 
247 ////////////////////////////////////////////////////////////////////////////////
248 /// write all files read by rf_go.exe
249 
251 {
252  WriteIntParms();
253  WriteRealParms();
254  WriteLx();
255  WriteProgram();
256  WriteVarNames();
260  return kTRUE;
261 }
262 
263 ////////////////////////////////////////////////////////////////////////////////
264 /// write int params file
265 
267 {
268  std::ofstream f;
269  if (!OpenRFile("intparms",f)) return kFALSE;
270  WriteInt(f,&fRFIntParms.mode,sizeof(fRFIntParms)/sizeof(Int_t));
271  return kTRUE;
272 }
273 
274 ////////////////////////////////////////////////////////////////////////////////
275 /// write int params file
276 
278 {
279  std::ofstream f;
280  if (!OpenRFile("realparms",f)) return kFALSE;
281  WriteFloat(f,&fRFRealParms.xmiss,sizeof(fRFRealParms)/sizeof(Float_t));
282  return kTRUE;
283 }
284 
285 ////////////////////////////////////////////////////////////////////////////////
286 /// Save input variable mask
287 ///
288 /// If the lx vector size is not the same as inputVars,
289 /// resize it and fill it with 1
290 /// NOTE: Always set all to 1
291 /// if (fRFLx.size() != m_inputVars->size()) {
292 
294 {
295  fRFLx.clear();
297  // }
298  std::ofstream f;
299  if (!OpenRFile("lx",f)) return kFALSE;
300  WriteInt(f,&fRFLx[0],fRFLx.size());
301  return kTRUE;
302 }
303 
304 ////////////////////////////////////////////////////////////////////////////////
305 /// write command to rf_go.exe
306 
308 {
309  std::ofstream f;
310  if (!OpenRFile("program",f)) return kFALSE;
311  TString program;
312  switch (fRFProgram) {
313  case kRfTrain:
314  program = "rulefit";
315  break;
316  case kRfPredict:
317  program = "rulefit_pred";
318  break;
319  // calculate variable importance
320  case kRfVarimp:
321  program = "varimp";
322  break;
323  default:
325  program="rulefit";
326  break;
327  }
328  f << program;
329  return kTRUE;
330 }
331 
332 ////////////////////////////////////////////////////////////////////////////////
333 /// write the minimum importance to be considered
334 
336 {
337  std::ofstream f;
338  if (!OpenRFile("realvarimp",f)) return kFALSE;
339  Float_t rvp[2];
340  rvp[0] = 0.0; // Mode: see varimp() in rulefit.r
341  rvp[1] = 0.0; // Minimum importance considered (1 is max)
342  WriteFloat(f,&rvp[0],2);
343  return kTRUE;
344 }
345 
346 ////////////////////////////////////////////////////////////////////////////////
347 /// written by rf_go.exe; write rulefit output (rfout)
348 
350 {
351  fLogger << kWARNING << "WriteRfOut is not yet implemented" << Endl;
352  return kTRUE;
353 }
354 
355 ////////////////////////////////////////////////////////////////////////////////
356 /// written by rf_go.exe; write rulefit status
357 
359 {
360  fLogger << kWARNING << "WriteRfStatus is not yet implemented" << Endl;
361  return kTRUE;
362 }
363 
364 ////////////////////////////////////////////////////////////////////////////////
365 /// written by rf_go.exe (NOTE:Format unknown!)
366 
368 {
369  fLogger << kWARNING << "WriteRuleFitMod is not yet implemented" << Endl;
370  return kTRUE;
371 }
372 
373 ////////////////////////////////////////////////////////////////////////////////
374 /// written by rf_go.exe (NOTE: format unknown!)
375 
377 {
378  fLogger << kWARNING << "WriteRuleFitSum is not yet implemented" << Endl;
379  return kTRUE;
380 }
381 
382 ////////////////////////////////////////////////////////////////////////////////
383 /// write training data, column wise
384 
386 {
387  std::ofstream fx;
388  std::ofstream fy;
389  std::ofstream fw;
390  //
391  if (!OpenRFile("train.x",fx)) return kFALSE;
392  if (!OpenRFile("train.y",fy)) return kFALSE;
393  if (!OpenRFile("train.w",fw)) return kFALSE;
394  //
395  Float_t x,y,w;
396  //
397  // The loop order cannot be changed.
398  // The data is stored <var1(eve1), var1(eve2), ...var1(eveN), var2(eve1),....
399  //
400  for (UInt_t ivar=0; ivar<fMethodRuleFit->DataInfo().GetNVariables(); ivar++) {
401  for (Int_t ievt=0;ievt<fMethodRuleFit->Data()->GetNTrainingEvents(); ievt++) {
402  const Event * ev = fMethodRuleFit->GetTrainingEvent(ievt);
403  x = ev->GetValue(ivar);
404  WriteFloat(fx,&x,1);
405  if (ivar==0) {
406  w = ev->GetWeight();
407  y = fMethodRuleFit->DataInfo().IsSignal(ev)? 1.0 : -1.0;
408  WriteFloat(fy,&y,1);
409  WriteFloat(fw,&w,1);
410  }
411  }
412  }
413  fLogger << kINFO << "Number of training data written: " << fMethodRuleFit->Data()->GetNTrainingEvents() << Endl;
414  return kTRUE;
415 }
416 
417 ////////////////////////////////////////////////////////////////////////////////
418 /// Write test data
419 
421 {
423 
424  std::ofstream f;
425  //
426  if (!OpenRFile("test.x",f)) return kFALSE;
427  //
428  Float_t vf;
429  Float_t neve;
430  //
431  neve = static_cast<Float_t>(fMethodRuleFit->Data()->GetNEvents());
432  WriteFloat(f,&neve,1);
433  // Test data is saved as:
434  // 0 : <N> num of events, type float, 4 bytes
435  // 1-N : First variable for all events
436  // N+1-2N : Second variable...
437  // ...
438  for (UInt_t ivar=0; ivar<fMethodRuleFit->DataInfo().GetNVariables(); ivar++) {
439  for (Int_t ievt=0;ievt<fMethodRuleFit->Data()->GetNEvents(); ievt++) {
440  vf = fMethodRuleFit->GetEvent(ievt)->GetValue(ivar);
441  WriteFloat(f,&vf,1);
442  }
443  }
444  fLogger << kINFO << "Number of test data written: " << fMethodRuleFit->Data()->GetNEvents() << Endl;
445  //
446  return kTRUE;
447 }
448 
449 ////////////////////////////////////////////////////////////////////////////////
450 /// write variable names, ascii
451 
453 {
454  std::ofstream f;
455  if (!OpenRFile("varnames",f)) return kFALSE;
456  for (UInt_t ivar=0; ivar<fMethodRuleFit->DataInfo().GetNVariables(); ivar++) {
457  f << fMethodRuleFit->DataInfo().GetVariableInfo(ivar).GetExpression() << '\n';
458  }
459  return kTRUE;
460 }
461 
462 ////////////////////////////////////////////////////////////////////////////////
463 
465 
466 {
467  // written by rf_go.exe
468  fLogger << kWARNING << "WriteVarImp is not yet implemented" << Endl;
469  return kTRUE;
470 }
471 
472 ////////////////////////////////////////////////////////////////////////////////
473 /// written by rf_go.exe
474 
476 {
477  fLogger << kWARNING << "WriteYhat is not yet implemented" << Endl;
478  return kTRUE;
479 }
480 
481 ////////////////////////////////////////////////////////////////////////////////
482 /// read the score
483 
485 {
486  fRFYhat.clear();
487  //
488  std::ifstream f;
489  if (!OpenRFile("yhat",f)) return kFALSE;
490  Int_t neve;
491  Float_t xval;
492  ReadFloat(f,&xval,1);
493  neve = static_cast<Int_t>(xval);
494  if (neve!=fMethodRuleFit->Data()->GetNTestEvents()) {
495  fLogger << kWARNING << "Inconsistent size of yhat file and test tree!" << Endl;
496  fLogger << kWARNING << "neve = " << neve << " , tree = " << fMethodRuleFit->Data()->GetNTestEvents() << Endl;
497  return kFALSE;
498  }
499  for (Int_t ievt=0; ievt<fMethodRuleFit->Data()->GetNTestEvents(); ievt++) {
500  ReadFloat(f,&xval,1);
501  fRFYhat.push_back(xval);
502  }
503  return kTRUE;
504 }
505 
506 ////////////////////////////////////////////////////////////////////////////////
507 /// read variable importance
508 
510 {
511  fRFVarImp.clear();
512  //
513  std::ifstream f;
514  if (!OpenRFile("varimp",f)) return kFALSE;
515  UInt_t nvars;
516  Float_t xval;
517  Float_t xmax=1.0;
519  //
520  // First read all importances
521  //
522  for (UInt_t ivar=0; ivar<nvars; ivar++) {
523  ReadFloat(f,&xval,1);
524  if (ivar==0) {
525  xmax=xval;
526  } else {
527  if (xval>xmax) xmax=xval;
528  }
529  fRFVarImp.push_back(xval);
530  }
531  //
532  // Read the indices.
533  // They are saved as float (!) by rf_go.exe.
534  //
535  for (UInt_t ivar=0; ivar<nvars; ivar++) {
536  fRFVarImp[ivar] = fRFVarImp[ivar]/xmax;
537  ReadFloat(f,&xval,1);
538  fRFVarImpInd.push_back(Int_t(xval)-1);
539  }
540  return kTRUE;
541 }
542 
543 ////////////////////////////////////////////////////////////////////////////////
544 /// read model from rulefit.sum
545 
547 {
548  fRFVarImp.clear();
549  //
550  fLogger << kVERBOSE << "Reading RuleFit summary file" << Endl;
551  std::ifstream f;
552  if (!OpenRFile("rulefit.sum",f)) return kFALSE;
553  Int_t lines=0;
554  Int_t nrules=0;
555  Int_t nvars=0;
556  Int_t nvarsOpt=0;
557  Int_t dumI;
558  Float_t dumF;
559  Float_t offset;
560  Double_t impref=-1.0;
561  Double_t imp;
562 
563  fRuleFit->GetRuleEnsemblePtr()->SetAverageRuleSigma(0.4); // value used by Friedmans RuleFit
564  //
565  //--------------------------------------------
566  // first read rulefit.sum header
567  //--------------------------------------------
568  // line type val descr
569  // 0 <int> 86 N(rules)x2
570  // 1 <int> 155 ???
571  // 2 <int> 1 ???
572  // 3 <int> 1916 ???
573  // 4 <int> 2 N(vars) ?
574  // 5 <int> 2 N(vars) ?
575  // 6 <float> 9e+30 xmiss
576  // 7 <float> 1.1e-1 a0 (model offset)
577  //--------------------------------------------
578  //
579  // NOTE: a model without any rules, will look like
580  // for the first four lines:
581  //
582  // 0 1
583  // 1 1
584  // 2 1
585  // 3 0
586  //
587  // There will later be one block of dummy data for one rule.
588  // In order to catch this situation, some special checks are made below.
589  //
590  Bool_t norules;
591  lines += ReadInt(f,&nrules);
592  norules = (nrules==1);
593  lines += ReadInt(f,&dumI);
594  norules = norules && (dumI==1);
595  lines += ReadInt(f,&dumI);
596  norules = norules && (dumI==1);
597  lines += ReadInt(f,&dumI);
598  norules = norules && (dumI==0);
599  if (nrules==0) norules=kTRUE; // this ugly construction is needed:(
600  if (norules) nrules = 0;
601  //
602  lines += ReadInt(f,&nvars);
603  lines += ReadInt(f,&nvarsOpt);
604  lines += ReadFloat(f,&dumF);
605  lines += ReadFloat(f,&offset);
606  fLogger << kDEBUG << "N(rules) = " << nrules << Endl;
607  fLogger << kDEBUG << "N(vars) = " << nvars << Endl;
608  fLogger << kDEBUG << "N(varsO) = " << nvarsOpt << Endl;
609  fLogger << kDEBUG << "xmiss = " << dumF << Endl;
610  fLogger << kDEBUG << "offset = " << offset << Endl;
611  if (nvars!=nvarsOpt) {
612  fLogger << kWARNING << "Format of rulefit.sum is ... weird?? Continuing but who knows how it will end...?" << Endl;
613  }
614  std::vector<Double_t> rfSupp;
615  std::vector<Double_t> rfCoef;
616  std::vector<Int_t> rfNcut;
617  std::vector<Rule *> rfRules;
618  if (norules) {
619  // if no rules, read 8 blocks of data
620  // this corresponds to one dummy rule
621  for (Int_t t=0; t<8; t++) {
622  lines += ReadFloat(f,&dumF);
623  }
624  }
625  //
626  //--------------------------------------------
627  // read first part of rule info
628  //--------------------------------------------
629  //
630  // 8 <int> 10 ???
631  // 9 <float> 0.185 support
632  // 10 <float> 0.051 coefficient
633  // 11 <float> 2 num of cuts in rule
634  // 12 <float> 1 ??? not used by this interface
635  //
636  for (Int_t r=0; r<nrules; r++) {
637  lines += ReadFloat(f,&dumF);
638  lines += ReadFloat(f,&dumF);
639  rfSupp.push_back(dumF);
640  lines += ReadFloat(f,&dumF);
641  rfCoef.push_back(dumF);
642  lines += ReadFloat(f,&dumF);
643  rfNcut.push_back(static_cast<int>(dumF+0.5));
644  lines += ReadFloat(f,&dumF);
645  //
646  }
647  //--------------------------------------------
648  // read second part of rule info
649  //--------------------------------------------
650  //
651  // Per range (cut):
652  // 0 <float> 1 varind
653  // 1 <float> -1.0 low
654  // 2 <float> 1.56 high
655  //
656 
657  for (Int_t r=0; r<nrules; r++) {
658  Int_t varind;
659  Double_t xmin;
660  Double_t xmax;
661  Rule *rule = new Rule(fRuleFit->GetRuleEnsemblePtr());
662  rfRules.push_back( rule );
663  RuleCut *rfcut = new RuleCut();
664  rfcut->SetNvars(rfNcut[r]);
665  rule->SetRuleCut( rfcut );
666  // the below are set to default values since no info is
667  // available in rulefit.sum
668  rule->SetNorm(1.0);
669  rule->SetSupport(0);
670  rule->SetSSB(0.0);
671  rule->SetSSBNeve(0.0);
672  rule->SetImportanceRef(1.0);
673  rule->SetSSB(0.0);
674  rule->SetSSBNeve(0.0);
675  // set support etc
676  rule->SetSupport(rfSupp[r]);
677  rule->SetCoefficient(rfCoef[r]);
678  rule->CalcImportance();
679  imp = rule->GetImportance();
680  if (imp>impref) impref = imp; // find max importance
681  //
682  fLogger << kDEBUG << "Rule #" << r << " : " << nvars << Endl;
683  fLogger << kDEBUG << " support = " << rfSupp[r] << Endl;
684  fLogger << kDEBUG << " sigma = " << rule->GetSigma() << Endl;
685  fLogger << kDEBUG << " coeff = " << rfCoef[r] << Endl;
686  fLogger << kDEBUG << " N(cut) = " << rfNcut[r] << Endl;
687 
688  for (Int_t c=0; c<rfNcut[r]; c++) {
689  lines += ReadFloat(f,&dumF);
690  varind = static_cast<Int_t>(dumF+0.5)-1;
691  lines += ReadFloat(f,&dumF);
692  xmin = static_cast<Double_t>(dumF);
693  lines += ReadFloat(f,&dumF);
694  xmax = static_cast<Double_t>(dumF);
695  // create Rule HERE!
696  rfcut->SetSelector(c,varind);
697  rfcut->SetCutMin(c,xmin);
698  rfcut->SetCutMax(c,xmax);
699  // the following is not nice - this is however defined
700  // by the rulefit.sum format.
701  rfcut->SetCutDoMin(c,(xmin<-8.99e35 ? kFALSE:kTRUE));
702  rfcut->SetCutDoMax(c,(xmax> 8.99e35 ? kFALSE:kTRUE));
703  //
704  }
705  }
706  fRuleFit->GetRuleEnsemblePtr()->SetRules( rfRules );
707  fRuleFit->GetRuleEnsemblePtr()->SetOffset( offset );
708  //--------------------------------------------
709  // read second part of rule info
710  //--------------------------------------------
711  //
712  // Per linear term:
713  // 73 1 var index
714  // 74 -1.99594 min
715  // 75 1.99403 max
716  // 76 -0.000741858 ??? average ???
717  // 77 0.970935 std
718  // 78 0 coeff
719  //
720  std::vector<Int_t> varind;
721  std::vector<Double_t> xmin;
722  std::vector<Double_t> xmax;
723  std::vector<Double_t> average;
724  std::vector<Double_t> stdev;
725  std::vector<Double_t> norm;
726  std::vector<Double_t> coeff;
727  //
728  for (Int_t c=0; c<nvars; c++) {
729  lines += ReadFloat(f,&dumF);
730  varind.push_back(static_cast<Int_t>(dumF+0.5)-1);
731  lines += ReadFloat(f,&dumF);
732  xmin.push_back(static_cast<Double_t>(dumF));
733  lines += ReadFloat(f,&dumF);
734  xmax.push_back(static_cast<Double_t>(dumF));
735  lines += ReadFloat(f,&dumF);
736  average.push_back(static_cast<Double_t>(dumF));
737  lines += ReadFloat(f,&dumF);
738  stdev.push_back(static_cast<Double_t>(dumF));
739  Double_t nv = fRuleFit->GetRuleEnsemblePtr()->CalcLinNorm(stdev.back());
740  norm.push_back(nv);
741  lines += ReadFloat(f,&dumF);
742  coeff.push_back(dumF/nv); // save coefficient for normalised var
743  //
744  fLogger << kDEBUG << "Linear #" << c << Endl;
745  fLogger << kDEBUG << " varind = " << varind.back() << Endl;
746  fLogger << kDEBUG << " xmin = " << xmin.back() << Endl;
747  fLogger << kDEBUG << " xmax = " << xmax.back() << Endl;
748  fLogger << kDEBUG << " average = " << average.back() << Endl;
749  fLogger << kDEBUG << " stdev = " << stdev.back() << Endl;
750  fLogger << kDEBUG << " coeff = " << coeff.back() << Endl;
751  }
752  if (xmin.size()>0) {
757  }
758  // fRuleFit->GetRuleEnsemblePtr()->CalcImportance();
760  if (imp>impref) impref=imp;
762  fRuleFit->GetRuleEnsemblePtr()->CleanupLinear(); // to fill fLinTermOK vector
763 
765  // fRuleFit->GetRuleEnsemblePtr()->CalcRuleSupport();
766 
767  fLogger << kDEBUG << "Reading model done" << Endl;
768  return kTRUE;
769 }
770 
771 ////////////////////////////////////////////////////////////////////////////////
772 /// execute rf_go.exe
773 
775 {
776  TString oldDir = gSystem->pwd();
777  TString cmd = "./rf_go.exe";
779  int rval = gSystem->Exec(cmd.Data());
780  gSystem->cd(oldDir.Data());
781  return rval;
782 }
Bool_t WriteLx()
Save input variable mask.
Definition: RuleFitAPI.cxx:293
Bool_t ReadVarImp()
read variable importance
Definition: RuleFitAPI.cxx:509
void WelcomeMessage()
welcome message
Definition: RuleFitAPI.cxx:78
UInt_t GetNVariables() const
Definition: DataSetInfo.h:110
void SetCoefficient(Double_t v)
Definition: Rule.h:82
J Friedman&#39;s RuleFit method.
Definition: MethodRuleFit.h:47
Double_t GetTreeEveFrac() const
Definition: MethodRuleFit.h:93
float xmin
Definition: THbookFile.cxx:93
void HowtoSetupRF()
howto message
Definition: RuleFitAPI.cxx:94
void SetLinDP(const std::vector< Double_t > &xmax)
Definition: RuleEnsemble.h:117
MsgLogger & Endl(MsgLogger &ml)
Definition: MsgLogger.h:158
void SetSSBNeve(Double_t v)
Definition: Rule.h:91
RuleFit * fRuleFit
Definition: RuleFitAPI.h:180
void SetRuleCut(RuleCut *rc)
Definition: Rule.h:76
void SetCutMax(Int_t i, Double_t v)
Definition: RuleCut.h:65
Double_t GetGDErrScale() const
J Friedman&#39;s RuleFit method.
Definition: RuleFitAPI.h:50
Double_t GetSigma() const
Definition: Rule.h:141
void SetLinDM(const std::vector< Double_t > &xmin)
Definition: RuleEnsemble.h:116
float Float_t
Definition: RtypesCore.h:53
A class implementing various fits of rule ensembles.
Definition: RuleFit.h:44
ERFProgram fRFProgram
Definition: RuleFitAPI.h:189
virtual ~RuleFitAPI()
destructor
Definition: RuleFitAPI.cxx:71
Bool_t WriteRfStatus()
written by rf_go.exe; write rulefit status
Definition: RuleFitAPI.cxx:358
void SetRFWorkDir(const char *wdir)
set the directory containing rf_go.exe.
Definition: RuleFitAPI.cxx:157
Double_t CalcLinNorm(Double_t stdev)
Definition: RuleEnsemble.h:120
Bool_t WriteAll()
write all files read by rf_go.exe
Definition: RuleFitAPI.cxx:250
void FillIntParmsDef()
set default int params
Definition: RuleFitAPI.cxx:230
Bool_t cd(const char *path)
Definition: TSystem.h:404
Basic string class.
Definition: TString.h:129
Bool_t WriteTrain()
write training data, column wise
Definition: RuleFitAPI.cxx:385
const MethodRuleFit * fMethodRuleFit
Definition: RuleFitAPI.h:179
int Int_t
Definition: RtypesCore.h:41
bool Bool_t
Definition: RtypesCore.h:59
Bool_t WriteRfOut()
written by rf_go.exe; write rulefit output (rfout)
Definition: RuleFitAPI.cxx:349
Bool_t WriteIntParms()
write int params file
Definition: RuleFitAPI.cxx:266
Bool_t WriteRealVarImp()
write the minimum importance to be considered
Definition: RuleFitAPI.cxx:335
IntParms fRFIntParms
Definition: RuleFitAPI.h:186
void CleanupLinear()
cleanup linear model
void ImportSetup()
import setup from MethodRuleFit
Definition: RuleFitAPI.cxx:134
void CheckRFWorkDir()
check if the rulefit work dir is properly setup.
Definition: RuleFitAPI.cxx:168
void SetNvars(UInt_t nc)
Definition: RuleCut.h:141
const TString & GetExpression() const
Definition: VariableInfo.h:57
Implementation of a rule.
Definition: Rule.h:48
void SetAverageRuleSigma(Double_t v)
Definition: RuleEnsemble.h:137
Bool_t DoOnlyLinear() const
Definition: RuleEnsemble.h:260
void SetLinCoefficients(const std::vector< Double_t > &v)
Definition: RuleEnsemble.h:114
void SetImportanceRef(Double_t impref)
set reference importance
Double_t GetGDPathStep() const
void SetCutMin(Int_t i, Double_t v)
Definition: RuleCut.h:64
std::vector< int > fRFLx
Definition: RuleFitAPI.h:188
Double_t x[n]
Definition: legend1.C:17
std::vector< Float_t > fRFYhat
Definition: RuleFitAPI.h:182
Bool_t WriteProgram()
write command to rf_go.exe
Definition: RuleFitAPI.cxx:307
void FillRealParmsDef()
set default real params
Definition: RuleFitAPI.cxx:215
const Event * GetEvent() const
Definition: MethodBase.h:733
DataSet * Data() const
Definition: MethodBase.h:393
void SetSSB(Double_t v)
Definition: Rule.h:88
void SetCutDoMin(Int_t i, Bool_t v)
Definition: RuleCut.h:66
void SetRules(const std::vector< TMVA::Rule *> &rules)
set rules
DataSetInfo & DataInfo() const
Definition: MethodBase.h:394
Bool_t DoOnlyRules() const
Definition: RuleEnsemble.h:259
Bool_t ReadModelSum()
read model from rulefit.sum
Definition: RuleFitAPI.cxx:546
void SetSelector(Int_t i, UInt_t s)
Definition: RuleCut.h:63
RuleEnsemble * GetRuleEnsemblePtr()
Definition: RuleFit.h:141
Int_t ReadInt(std::ifstream &f, Int_t *v, Int_t n=1) const
Definition: RuleFitAPI.h:281
Double_t GetWeight() const
return the event weight - depending on whether the flag IgnoreNegWeightsInTraining is or not...
Definition: Event.cxx:382
Long64_t GetNTrainingEvents() const
Definition: DataSet.h:79
const Event * GetTrainingEvent(Long64_t ievt) const
Definition: MethodBase.h:753
Bool_t WriteRuleFitMod()
written by rf_go.exe (NOTE:Format unknown!)
Definition: RuleFitAPI.cxx:367
const char * pwd()
Definition: TSystem.h:405
void SetTrainParms()
set the training parameters
Definition: RuleFitAPI.cxx:190
void CalcVarImportance()
Calculates variable importance using eq (35) in RuleFit paper by Friedman et.al.
Int_t GetRFNrules() const
TRandom2 r(17)
R__EXTERN TSystem * gSystem
Definition: TSystem.h:539
A class describing a &#39;rule cut&#39;.
Definition: RuleCut.h:34
Double_t GetLinQuantile() const
Bool_t WriteInt(std::ofstream &f, const Int_t *v, Int_t n=1)
Definition: RuleFitAPI.h:265
void SetOffset(Double_t v=0.0)
Definition: RuleEnsemble.h:112
void CalcImportance()
Definition: Rule.h:97
unsigned int UInt_t
Definition: RtypesCore.h:42
Int_t ReadFloat(std::ifstream &f, Float_t *v, Int_t n=1) const
Definition: RuleFitAPI.h:290
virtual Int_t Exec(const char *shellcmd)
Execute a command.
Definition: TSystem.cxx:660
float xmax
Definition: THbookFile.cxx:93
void SetTestParms()
set the test params
Definition: RuleFitAPI.cxx:203
Bool_t WriteFloat(std::ofstream &f, const Float_t *v, Int_t n=1)
Definition: RuleFitAPI.h:273
void SetImportanceRef(Double_t v)
Definition: Rule.h:94
Int_t GetRFNendnodes() const
Long64_t GetNTestEvents() const
Definition: DataSet.h:80
const Bool_t kFALSE
Definition: RtypesCore.h:92
Float_t GetValue(UInt_t ivar) const
return value of i&#39;th variable
Definition: Event.cxx:237
void SetSupport(Double_t v)
Definition: Rule.h:85
void SetNorm(Double_t norm)
Definition: Rule.h:79
std::vector< Float_t > fRFVarImp
Definition: RuleFitAPI.h:183
void SetCutDoMax(Int_t i, Bool_t v)
Definition: RuleCut.h:67
#define ClassImp(name)
Definition: Rtypes.h:336
double f(double x)
double Double_t
Definition: RtypesCore.h:55
MsgLogger fLogger
Definition: RuleFitAPI.h:192
Int_t GetGDNPathSteps() const
Double_t y[n]
Definition: legend1.C:17
Bool_t WriteVarNames()
write variable names, ascii
Definition: RuleFitAPI.cxx:452
void SetCurrentType(Types::ETreeType type) const
Definition: DataSet.h:100
Bool_t WriteTest()
Write test data.
Definition: RuleFitAPI.cxx:420
TString fRFWorkDir
Definition: RuleFitAPI.h:185
VariableInfo & GetVariableInfo(Int_t i)
Definition: DataSetInfo.h:96
Bool_t OpenRFile(TString name, std::ofstream &f)
Definition: RuleFitAPI.h:237
Abstract ClassifierFactory template that handles arbitrary types.
RealParms fRFRealParms
Definition: RuleFitAPI.h:187
Double_t GetImportance() const
Definition: Rule.h:143
Bool_t WriteYhat()
written by rf_go.exe
Definition: RuleFitAPI.cxx:475
Bool_t WriteRuleFitSum()
written by rf_go.exe (NOTE: format unknown!)
Definition: RuleFitAPI.cxx:376
Bool_t ReadYhat()
read the score
Definition: RuleFitAPI.cxx:484
Int_t RunRuleFit()
execute rf_go.exe
Definition: RuleFitAPI.cxx:774
void SetLinNorm(const std::vector< Double_t > &norm)
Definition: RuleEnsemble.h:118
Long64_t GetNEvents(Types::ETreeType type=Types::kMaxTreeType) const
Definition: DataSet.h:215
Bool_t WriteRealParms()
write int params file
Definition: RuleFitAPI.cxx:277
Bool_t WriteVarImp()
Definition: RuleFitAPI.cxx:464
Bool_t IsSignal(const Event *ev) const
Double_t CalcLinImportance()
calculate the linear importance for each rule
std::vector< Int_t > fRFVarImpInd
Definition: RuleFitAPI.h:184
const Bool_t kTRUE
Definition: RtypesCore.h:91
double norm(double *x, double *p)
Definition: unuranDistr.cxx:40
const Int_t n
Definition: legend1.C:16
void InitRuleFit()
default initialisation SetRFWorkDir("./rulefit");
Definition: RuleFitAPI.cxx:124
const char * Data() const
Definition: TString.h:347