ROOT  6.06/09
Reference Guide
RuleFitAPI.cxx
Go to the documentation of this file.
1 // @(#)root/tmva $Id$
2 // Author: Andreas Hoecker, Joerg Stelzer, Fredrik Tegenfeldt, Helge Voss
3 
4 /**********************************************************************************
5  * Project: TMVA - a Root-integrated toolkit for multivariate data analysis *
6  * Package: TMVA *
7  * Class : RuleFitAPI *
8  * Web : http://tmva.sourceforge.net *
9  * *
10  * Description: *
11  * Implementation (see header file for description) *
12  * *
13  * Authors (alphabetical): *
14  * Fredrik Tegenfeldt <Fredrik.Tegenfeldt@cern.ch> - Iowa State U., USA *
15  * *
16  * Copyright (c) 2005: *
17  * CERN, Switzerland *
18  * Iowa State U. *
19  * MPI-KP Heidelberg, Germany *
20  * *
21  * Redistribution and use in source and binary forms, with or without *
22  * modification, are permitted according to the terms listed in LICENSE *
23  * (http://tmva.sourceforge.net/LICENSE) *
24  **********************************************************************************/
25 
26 //_______________________________________________________________________
27 //
28 // J Friedman's RuleFit method
29 //_______________________________________________________________________
30 
31 #include <algorithm>
32 
33 #include "TROOT.h"
34 #include "TSystem.h"
35 #include "TMath.h"
36 
37 #include "TMVA/MethodRuleFit.h"
38 #include "TMVA/RuleFitAPI.h"
39 #include "TMVA/RuleFit.h"
40 #include "TMVA/Tools.h"
41 #include "TMVA/Timer.h"
42 
44 
45 TMVA::RuleFitAPI::RuleFitAPI( const MethodRuleFit *rfbase,
46  RuleFit *rulefit,
47  EMsgType minType = kINFO ) :
48  fMethodRuleFit(rfbase),
49  fRuleFit(rulefit),
50  fRFProgram(kRfTrain),
51  fLogger("RuleFitAPI",minType)
52 {
53  // standard constructor
54  if (rfbase) {
55  SetRFWorkDir(rfbase->GetRFWorkDir());
56  } else {
57  SetRFWorkDir("./rulefit");
58  }
59  InitRuleFit();
60 }
61 
62 
63 ////////////////////////////////////////////////////////////////////////////////
64 /// destructor
65 
67 {
68 }
69 
70 ////////////////////////////////////////////////////////////////////////////////
71 /// welcome message
72 
74 {
75  fLogger << kINFO
76  << "\n"
77  << "---------------------------------------------------------------------------\n"
78  << "- You are running the interface to Jerome Friedmans RuleFit(tm) code. -\n"
79  << "- For a full manual see the following web page: -\n"
80  << "- -\n"
81  << "- http://www-stat.stanford.edu/~jhf/R-RuleFit.html -\n"
82  << "- -\n"
83  << "---------------------------------------------------------------------------"
84  << Endl;
85 }
86 ////////////////////////////////////////////////////////////////////////////////
87 /// howto message
88 
90 {
91  fLogger << kINFO
92  << "\n"
93  << "------------------------ RULEFIT-JF INTERFACE SETUP -----------------------\n"
94  << "\n"
95  << "1. Create a rulefit directory in your current work directory:\n"
96  << " mkdir " << fRFWorkDir << "\n\n"
97  << " the directory may be set using the option RuleFitDir\n"
98  << "\n"
99  << "2. Copy (or make a link) the file rf_go.exe into this directory\n"
100  << "\n"
101  << "The file can be obtained from Jerome Friedmans homepage (linux):\n"
102  << " wget http://www-stat.stanford.edu/~jhf/r-rulefit/linux/rf_go.exe\n"
103  << "\n"
104  << "Don't forget to do:\n"
105  << " chmod +x rf_go.exe\n"
106  << "\n"
107  << "For Windows download:\n"
108  << " http://www-stat.stanford.edu/~jhf/r-rulefit/windows/rf_go.exe\n"
109  << "\n"
110  << "NOTE: other platforms are not supported (see Friedmans homepage)\n"
111  << "\n"
112  << "---------------------------------------------------------------------------\n"
113  << Endl;
114 }
115 ////////////////////////////////////////////////////////////////////////////////
116 /// default initialisation
117 /// SetRFWorkDir("./rulefit");
118 
120 {
121  CheckRFWorkDir();
122  FillIntParmsDef();
123  FillRealParmsDef();
124 }
125 
126 ////////////////////////////////////////////////////////////////////////////////
127 /// import setup from MethodRuleFit
128 
130 {
131  fRFIntParms.p = fMethodRuleFit->DataInfo().GetNVariables();
132  fRFIntParms.max_rules = fMethodRuleFit->GetRFNrules();
133  fRFIntParms.tree_size = fMethodRuleFit->GetRFNendnodes();
134  fRFIntParms.path_steps = fMethodRuleFit->GetGDNPathSteps();
135  //
136  fRFRealParms.path_inc = fMethodRuleFit->GetGDPathStep();
137  fRFRealParms.samp_fract = fMethodRuleFit->GetTreeEveFrac();
138  fRFRealParms.trim_qntl = fMethodRuleFit->GetLinQuantile();
139  fRFRealParms.conv_fac = fMethodRuleFit->GetGDErrScale();
140  //
141  if (fRuleFit->GetRuleEnsemblePtr()->DoOnlyLinear() )
142  fRFIntParms.lmode = kRfLinear;
143  else if (fRuleFit->GetRuleEnsemblePtr()->DoOnlyRules() )
144  fRFIntParms.lmode = kRfRules;
145  else
146  fRFIntParms.lmode = kRfBoth;
147 }
148 
149 ////////////////////////////////////////////////////////////////////////////////
150 /// set the directory containing rf_go.exe.
151 
152 void TMVA::RuleFitAPI::SetRFWorkDir(const char * wdir)
153 {
154  fRFWorkDir = wdir;
155 }
156 
157 ////////////////////////////////////////////////////////////////////////////////
158 /// check if the rulefit work dir is properly setup.
159 /// it aborts (kFATAL) if not.
160 ///
161 /// Check existance of directory
162 
164 {
165  TString oldDir = gSystem->pwd();
166  if (!gSystem->cd(fRFWorkDir)) {
167  fLogger << kWARNING << "Must create a rulefit directory named : " << fRFWorkDir << Endl;
168  HowtoSetupRF();
169  fLogger << kFATAL << "Setup failed - aborting!" << Endl;
170  }
171  // check rf_go.exe
172  FILE *f = fopen("rf_go.exe","r");
173  if (f==0) {
174  fLogger << kWARNING << "No rf_go.exe file in directory : " << fRFWorkDir << Endl;
175  HowtoSetupRF();
176  fLogger << kFATAL << "Setup failed - aborting!" << Endl;
177  }
178  fclose(f);
179  gSystem->cd(oldDir.Data());
180 }
181 
182 ////////////////////////////////////////////////////////////////////////////////
183 /// set the training parameters
184 
186 {
187  ImportSetup();
188  //
189  Int_t n = fMethodRuleFit->Data()->GetNTrainingEvents();
190  // Double_t neff = Double_t(n); // When weights are added: should be sum(wt)^2/sum(wt^2)
191  fRFIntParms.n = n; // number of data points in tree
192  fRFProgram = kRfTrain;
193 }
194 
195 ////////////////////////////////////////////////////////////////////////////////
196 /// set the test params
197 
199 {
200  ImportSetup();
201  Int_t n = fMethodRuleFit->Data()->GetNTestEvents();
202  // Double_t neff = Double_t(n); // When weights are added: should be sum(wt)^2/sum(wt^2)
203  fRFIntParms.n = n; // number of data points in tree
204  fRFProgram = kRfPredict;
205 }
206 
207 ////////////////////////////////////////////////////////////////////////////////
208 /// set default real params
209 
211 {
212  fRFRealParms.xmiss = 9.0e30;
213  fRFRealParms.trim_qntl = 0.025;
214  fRFRealParms.huber = 0.8;
215  fRFRealParms.inter_supp = 3.0;
216  fRFRealParms.memory_par = 0.01;
217  fRFRealParms.samp_fract = 0.5; // calculated later
218  fRFRealParms.path_inc = 0.01;
219  fRFRealParms.conv_fac = 1.1;
220 }
221 
222 ////////////////////////////////////////////////////////////////////////////////
223 /// set default int params
224 
226 {
227  fRFIntParms.mode = (int)kRfClass;
228  fRFIntParms.lmode = (int)kRfBoth;
229  // fRFIntParms.n;
230  // fRFIntParms.p;
231  fRFIntParms.max_rules = 2000;
232  fRFIntParms.tree_size = 4;
233  fRFIntParms.path_speed = 2;
234  fRFIntParms.path_xval = 3;
235  fRFIntParms.path_steps = 50000;
236  fRFIntParms.path_testfreq = 100;
237  fRFIntParms.tree_store = 10000000;
238  fRFIntParms.cat_store = 1000000;
239 
240 }
241 
242 ////////////////////////////////////////////////////////////////////////////////
243 /// write all files read by rf_go.exe
244 
246 {
247  WriteIntParms();
248  WriteRealParms();
249  WriteLx();
250  WriteProgram();
251  WriteVarNames();
252  if (fRFProgram==kRfTrain) WriteTrain();
253  if (fRFProgram==kRfPredict) WriteTest();
254  if (fRFProgram==kRfVarimp) WriteRealVarImp();
255  return kTRUE;
256 }
257 
258 ////////////////////////////////////////////////////////////////////////////////
259 /// write int params file
260 
262 {
263  std::ofstream f;
264  if (!OpenRFile("intparms",f)) return kFALSE;
265  WriteInt(f,&fRFIntParms.mode,sizeof(fRFIntParms)/sizeof(Int_t));
266  return kTRUE;
267 }
268 
269 ////////////////////////////////////////////////////////////////////////////////
270 /// write int params file
271 
273 {
274  std::ofstream f;
275  if (!OpenRFile("realparms",f)) return kFALSE;
276  WriteFloat(f,&fRFRealParms.xmiss,sizeof(fRFRealParms)/sizeof(Float_t));
277  return kTRUE;
278 }
279 
280 ////////////////////////////////////////////////////////////////////////////////
281 /// Save input variable mask
282 ///
283 /// If the lx vector size is not the same as inputVars,
284 /// resize it and fill it with 1
285 /// NOTE: Always set all to 1
286 /// if (fRFLx.size() != m_inputVars->size()) {
287 
289 {
290  fRFLx.clear();
291  fRFLx.resize(fMethodRuleFit->DataInfo().GetNVariables(),1);
292  // }
293  std::ofstream f;
294  if (!OpenRFile("lx",f)) return kFALSE;
295  WriteInt(f,&fRFLx[0],fRFLx.size());
296  return kTRUE;
297 }
298 
299 ////////////////////////////////////////////////////////////////////////////////
300 /// write command to rf_go.exe
301 
303 {
304  std::ofstream f;
305  if (!OpenRFile("program",f)) return kFALSE;
306  TString program;
307  switch (fRFProgram) {
308  case kRfTrain:
309  program = "rulefit";
310  break;
311  case kRfPredict:
312  program = "rulefit_pred";
313  break;
314  // calculate variable importance
315  case kRfVarimp:
316  program = "varimp";
317  break;
318  default:
319  fRFProgram = kRfTrain;
320  program="rulefit";
321  break;
322  }
323  f << program;
324  return kTRUE;
325 }
326 
327 ////////////////////////////////////////////////////////////////////////////////
328 /// write the minimum importance to be considered
329 
331 {
332  std::ofstream f;
333  if (!OpenRFile("realvarimp",f)) return kFALSE;
334  Float_t rvp[2];
335  rvp[0] = 0.0; // Mode: see varimp() in rulefit.r
336  rvp[1] = 0.0; // Minimum importance considered (1 is max)
337  WriteFloat(f,&rvp[0],2);
338  return kTRUE;
339 }
340 
341 ////////////////////////////////////////////////////////////////////////////////
342 /// written by rf_go.exe; write rulefit output (rfout)
343 
345 {
346  fLogger << kWARNING << "WriteRfOut is not yet implemented" << Endl;
347  return kTRUE;
348 }
349 
350 ////////////////////////////////////////////////////////////////////////////////
351 /// written by rf_go.exe; write rulefit status
352 
354 {
355  fLogger << kWARNING << "WriteRfStatus is not yet implemented" << Endl;
356  return kTRUE;
357 }
358 
359 ////////////////////////////////////////////////////////////////////////////////
360 /// written by rf_go.exe (NOTE:Format unknown!)
361 
363 {
364  fLogger << kWARNING << "WriteRuleFitMod is not yet implemented" << Endl;
365  return kTRUE;
366 }
367 
368 ////////////////////////////////////////////////////////////////////////////////
369 /// written by rf_go.exe (NOTE: format unknown!)
370 
372 {
373  fLogger << kWARNING << "WriteRuleFitSum is not yet implemented" << Endl;
374  return kTRUE;
375 }
376 
377 ////////////////////////////////////////////////////////////////////////////////
378 /// write training data, columnwise
379 
381 {
382  std::ofstream fx;
383  std::ofstream fy;
384  std::ofstream fw;
385  //
386  if (!OpenRFile("train.x",fx)) return kFALSE;
387  if (!OpenRFile("train.y",fy)) return kFALSE;
388  if (!OpenRFile("train.w",fw)) return kFALSE;
389  //
390  Float_t x,y,w;
391  //
392  // The loop order cannot be changed.
393  // The data is stored <var1(eve1), var1(eve2), ...var1(eveN), var2(eve1),....
394  //
395  for (UInt_t ivar=0; ivar<fMethodRuleFit->DataInfo().GetNVariables(); ivar++) {
396  for (Int_t ievt=0;ievt<fMethodRuleFit->Data()->GetNTrainingEvents(); ievt++) {
397  const Event * ev = fMethodRuleFit->GetTrainingEvent(ievt);
398  x = ev->GetValue(ivar);
399  WriteFloat(fx,&x,1);
400  if (ivar==0) {
401  w = ev->GetWeight();
402  y = fMethodRuleFit->DataInfo().IsSignal(ev)? 1.0 : -1.0;
403  WriteFloat(fy,&y,1);
404  WriteFloat(fw,&w,1);
405  }
406  }
407  }
408  fLogger << kINFO << "Number of training data written: " << fMethodRuleFit->Data()->GetNTrainingEvents() << Endl;
409  return kTRUE;
410 }
411 
412 ////////////////////////////////////////////////////////////////////////////////
413 /// Write test data
414 
416 {
417  fMethodRuleFit->Data()->SetCurrentType(Types::kTesting);
418 
419  std::ofstream f;
420  //
421  if (!OpenRFile("test.x",f)) return kFALSE;
422  //
423  Float_t vf;
424  Float_t neve;
425  //
426  neve = static_cast<Float_t>(fMethodRuleFit->Data()->GetNEvents());
427  WriteFloat(f,&neve,1);
428  // Test data is saved as:
429  // 0 : <N> num of events, type float, 4 bytes
430  // 1-N : First variable for all events
431  // N+1-2N : Second variable...
432  // ...
433  for (UInt_t ivar=0; ivar<fMethodRuleFit->DataInfo().GetNVariables(); ivar++) {
434  for (Int_t ievt=0;ievt<fMethodRuleFit->Data()->GetNEvents(); ievt++) {
435  vf = fMethodRuleFit->GetEvent(ievt)->GetValue(ivar);
436  WriteFloat(f,&vf,1);
437  }
438  }
439  fLogger << kINFO << "Number of test data written: " << fMethodRuleFit->Data()->GetNEvents() << Endl;
440  //
441  return kTRUE;
442 }
443 
444 ////////////////////////////////////////////////////////////////////////////////
445 /// write variable names, ascii
446 
448 {
449  std::ofstream f;
450  if (!OpenRFile("varnames",f)) return kFALSE;
451  for (UInt_t ivar=0; ivar<fMethodRuleFit->DataInfo().GetNVariables(); ivar++) {
452  f << fMethodRuleFit->DataInfo().GetVariableInfo(ivar).GetExpression() << '\n';
453  }
454  return kTRUE;
455 }
456 
457 ////////////////////////////////////////////////////////////////////////////////
458 
460 
461 {
462  // written by rf_go.exe
463  fLogger << kWARNING << "WriteVarImp is not yet implemented" << Endl;
464  return kTRUE;
465 }
466 
467 ////////////////////////////////////////////////////////////////////////////////
468 /// written by rf_go.exe
469 
471 {
472  fLogger << kWARNING << "WriteYhat is not yet implemented" << Endl;
473  return kTRUE;
474 }
475 
476 ////////////////////////////////////////////////////////////////////////////////
477 /// read the score
478 
480 {
481  fRFYhat.clear();
482  //
483  std::ifstream f;
484  if (!OpenRFile("yhat",f)) return kFALSE;
485  Int_t neve;
486  Float_t xval;
487  ReadFloat(f,&xval,1);
488  neve = static_cast<Int_t>(xval);
489  if (neve!=fMethodRuleFit->Data()->GetNTestEvents()) {
490  fLogger << kWARNING << "Inconsistent size of yhat file and test tree!" << Endl;
491  fLogger << kWARNING << "neve = " << neve << " , tree = " << fMethodRuleFit->Data()->GetNTestEvents() << Endl;
492  return kFALSE;
493  }
494  for (Int_t ievt=0; ievt<fMethodRuleFit->Data()->GetNTestEvents(); ievt++) {
495  ReadFloat(f,&xval,1);
496  fRFYhat.push_back(xval);
497  }
498  return kTRUE;
499 }
500 
501 ////////////////////////////////////////////////////////////////////////////////
502 /// read variable importance
503 
505 {
506  fRFVarImp.clear();
507  //
508  std::ifstream f;
509  if (!OpenRFile("varimp",f)) return kFALSE;
510  UInt_t nvars;
511  Float_t xval;
512  Float_t xmax=1.0;
513  nvars=fMethodRuleFit->DataInfo().GetNVariables();
514  //
515  // First read all importances
516  //
517  for (UInt_t ivar=0; ivar<nvars; ivar++) {
518  ReadFloat(f,&xval,1);
519  if (ivar==0) {
520  xmax=xval;
521  } else {
522  if (xval>xmax) xmax=xval;
523  }
524  fRFVarImp.push_back(xval);
525  }
526  //
527  // Read the indices.
528  // They are saved as float (!) by rf_go.exe.
529  //
530  for (UInt_t ivar=0; ivar<nvars; ivar++) {
531  fRFVarImp[ivar] = fRFVarImp[ivar]/xmax;
532  ReadFloat(f,&xval,1);
533  fRFVarImpInd.push_back(Int_t(xval)-1);
534  }
535  return kTRUE;
536 }
537 
538 ////////////////////////////////////////////////////////////////////////////////
539 /// read model from rulefit.sum
540 
542 {
543  fRFVarImp.clear();
544  //
545  fLogger << kVERBOSE << "Reading RuleFit summary file" << Endl;
546  std::ifstream f;
547  if (!OpenRFile("rulefit.sum",f)) return kFALSE;
548  Int_t lines=0;
549  Int_t nrules=0;
550  Int_t nvars=0;
551  Int_t nvarsOpt=0;
552  Int_t dumI;
553  Float_t dumF;
554  Float_t offset;
555  Double_t impref=-1.0;
556  Double_t imp;
557 
558  fRuleFit->GetRuleEnsemblePtr()->SetAverageRuleSigma(0.4); // value used by Friedmans RuleFit
559  //
560  //--------------------------------------------
561  // first read rulefit.sum header
562  //--------------------------------------------
563  // line type val descr
564  // 0 <int> 86 N(rules)x2
565  // 1 <int> 155 ???
566  // 2 <int> 1 ???
567  // 3 <int> 1916 ???
568  // 4 <int> 2 N(vars) ?
569  // 5 <int> 2 N(vars) ?
570  // 6 <float> 9e+30 xmiss
571  // 7 <float> 1.1e-1 a0 (model offset)
572  //--------------------------------------------
573  //
574  // NOTE: a model without any rules, will look like
575  // for the first four lines:
576  //
577  // 0 1
578  // 1 1
579  // 2 1
580  // 3 0
581  //
582  // There will later be one block of dummy data for one rule.
583  // In order to catch this situation, some special checks are made below.
584  //
585  Bool_t norules;
586  lines += ReadInt(f,&nrules);
587  norules = (nrules==1);
588  lines += ReadInt(f,&dumI);
589  norules = norules && (dumI==1);
590  lines += ReadInt(f,&dumI);
591  norules = norules && (dumI==1);
592  lines += ReadInt(f,&dumI);
593  norules = norules && (dumI==0);
594  if (nrules==0) norules=kTRUE; // this ugly construction is needed:(
595  if (norules) nrules = 0;
596  //
597  lines += ReadInt(f,&nvars);
598  lines += ReadInt(f,&nvarsOpt);
599  lines += ReadFloat(f,&dumF);
600  lines += ReadFloat(f,&offset);
601  fLogger << kDEBUG << "N(rules) = " << nrules << Endl;
602  fLogger << kDEBUG << "N(vars) = " << nvars << Endl;
603  fLogger << kDEBUG << "N(varsO) = " << nvarsOpt << Endl;
604  fLogger << kDEBUG << "xmiss = " << dumF << Endl;
605  fLogger << kDEBUG << "offset = " << offset << Endl;
606  if (nvars!=nvarsOpt) {
607  fLogger << kWARNING << "Format of rulefit.sum is ... weird?? Continuing but who knows how it will end...?" << Endl;
608  }
609  std::vector<Double_t> rfSupp;
610  std::vector<Double_t> rfCoef;
611  std::vector<Int_t> rfNcut;
612  std::vector<Rule *> rfRules;
613  if (norules) {
614  // if no rules, read 8 blocks of data
615  // this corresponds to one dummy rule
616  for (Int_t t=0; t<8; t++) {
617  lines += ReadFloat(f,&dumF);
618  }
619  }
620  //
621  //--------------------------------------------
622  // read first part of rule info
623  //--------------------------------------------
624  //
625  // 8 <int> 10 ???
626  // 9 <float> 0.185 support
627  // 10 <float> 0.051 coefficient
628  // 11 <float> 2 num of cuts in rule
629  // 12 <float> 1 ??? not used by this interface
630  //
631  for (Int_t r=0; r<nrules; r++) {
632  lines += ReadFloat(f,&dumF);
633  lines += ReadFloat(f,&dumF);
634  rfSupp.push_back(dumF);
635  lines += ReadFloat(f,&dumF);
636  rfCoef.push_back(dumF);
637  lines += ReadFloat(f,&dumF);
638  rfNcut.push_back(static_cast<int>(dumF+0.5));
639  lines += ReadFloat(f,&dumF);
640  //
641  }
642  //--------------------------------------------
643  // read second part of rule info
644  //--------------------------------------------
645  //
646  // Per range (cut):
647  // 0 <float> 1 varind
648  // 1 <float> -1.0 low
649  // 2 <float> 1.56 high
650  //
651 
652  for (Int_t r=0; r<nrules; r++) {
653  Int_t varind;
654  Double_t xmin;
655  Double_t xmax;
656  Rule *rule = new Rule(fRuleFit->GetRuleEnsemblePtr());
657  rfRules.push_back( rule );
658  RuleCut *rfcut = new RuleCut();
659  rfcut->SetNvars(rfNcut[r]);
660  rule->SetRuleCut( rfcut );
661  // the below are set to default values since no info is
662  // available in rulefit.sum
663  rule->SetNorm(1.0);
664  rule->SetSupport(0);
665  rule->SetSSB(0.0);
666  rule->SetSSBNeve(0.0);
667  rule->SetImportanceRef(1.0);
668  rule->SetSSB(0.0);
669  rule->SetSSBNeve(0.0);
670  // set support etc
671  rule->SetSupport(rfSupp[r]);
672  rule->SetCoefficient(rfCoef[r]);
673  rule->CalcImportance();
674  imp = rule->GetImportance();
675  if (imp>impref) impref = imp; // find max importance
676  //
677  fLogger << kDEBUG << "Rule #" << r << " : " << nvars << Endl;
678  fLogger << kDEBUG << " support = " << rfSupp[r] << Endl;
679  fLogger << kDEBUG << " sigma = " << rule->GetSigma() << Endl;
680  fLogger << kDEBUG << " coeff = " << rfCoef[r] << Endl;
681  fLogger << kDEBUG << " N(cut) = " << rfNcut[r] << Endl;
682 
683  for (Int_t c=0; c<rfNcut[r]; c++) {
684  lines += ReadFloat(f,&dumF);
685  varind = static_cast<Int_t>(dumF+0.5)-1;
686  lines += ReadFloat(f,&dumF);
687  xmin = static_cast<Double_t>(dumF);
688  lines += ReadFloat(f,&dumF);
689  xmax = static_cast<Double_t>(dumF);
690  // create Rule HERE!
691  rfcut->SetSelector(c,varind);
692  rfcut->SetCutMin(c,xmin);
693  rfcut->SetCutMax(c,xmax);
694  // the following is not nice - this is however defined
695  // by the rulefit.sum format.
696  rfcut->SetCutDoMin(c,(xmin<-8.99e35 ? kFALSE:kTRUE));
697  rfcut->SetCutDoMax(c,(xmax> 8.99e35 ? kFALSE:kTRUE));
698  //
699  }
700  }
701  fRuleFit->GetRuleEnsemblePtr()->SetRules( rfRules );
702  fRuleFit->GetRuleEnsemblePtr()->SetOffset( offset );
703  //--------------------------------------------
704  // read second part of rule info
705  //--------------------------------------------
706  //
707  // Per linear term:
708  // 73 1 var index
709  // 74 -1.99594 min
710  // 75 1.99403 max
711  // 76 -0.000741858 ??? average ???
712  // 77 0.970935 std
713  // 78 0 coeff
714  //
715  std::vector<Int_t> varind;
716  std::vector<Double_t> xmin;
717  std::vector<Double_t> xmax;
718  std::vector<Double_t> average;
719  std::vector<Double_t> stdev;
720  std::vector<Double_t> norm;
721  std::vector<Double_t> coeff;
722  //
723  for (Int_t c=0; c<nvars; c++) {
724  lines += ReadFloat(f,&dumF);
725  varind.push_back(static_cast<Int_t>(dumF+0.5)-1);
726  lines += ReadFloat(f,&dumF);
727  xmin.push_back(static_cast<Double_t>(dumF));
728  lines += ReadFloat(f,&dumF);
729  xmax.push_back(static_cast<Double_t>(dumF));
730  lines += ReadFloat(f,&dumF);
731  average.push_back(static_cast<Double_t>(dumF));
732  lines += ReadFloat(f,&dumF);
733  stdev.push_back(static_cast<Double_t>(dumF));
734  Double_t nv = fRuleFit->GetRuleEnsemblePtr()->CalcLinNorm(stdev.back());
735  norm.push_back(nv);
736  lines += ReadFloat(f,&dumF);
737  coeff.push_back(dumF/nv); // save coefficient for normalised var
738  //
739  fLogger << kDEBUG << "Linear #" << c << Endl;
740  fLogger << kDEBUG << " varind = " << varind.back() << Endl;
741  fLogger << kDEBUG << " xmin = " << xmin.back() << Endl;
742  fLogger << kDEBUG << " xmax = " << xmax.back() << Endl;
743  fLogger << kDEBUG << " average = " << average.back() << Endl;
744  fLogger << kDEBUG << " stdev = " << stdev.back() << Endl;
745  fLogger << kDEBUG << " coeff = " << coeff.back() << Endl;
746  }
747  if (xmin.size()>0) {
748  fRuleFit->GetRuleEnsemblePtr()->SetLinCoefficients(coeff);
749  fRuleFit->GetRuleEnsemblePtr()->SetLinDM(xmin);
750  fRuleFit->GetRuleEnsemblePtr()->SetLinDP(xmax);
751  fRuleFit->GetRuleEnsemblePtr()->SetLinNorm(norm);
752  }
753  // fRuleFit->GetRuleEnsemblePtr()->CalcImportance();
754  imp = fRuleFit->GetRuleEnsemblePtr()->CalcLinImportance();
755  if (imp>impref) impref=imp;
756  fRuleFit->GetRuleEnsemblePtr()->SetImportanceRef(impref);
757  fRuleFit->GetRuleEnsemblePtr()->CleanupLinear(); // to fill fLinTermOK vector
758 
759  fRuleFit->GetRuleEnsemblePtr()->CalcVarImportance();
760  // fRuleFit->GetRuleEnsemblePtr()->CalcRuleSupport();
761 
762  fLogger << kDEBUG << "Reading model done" << Endl;
763  return kTRUE;
764 }
765 
766 ////////////////////////////////////////////////////////////////////////////////
767 /// execute rf_go.exe
768 
770 {
771  TString oldDir = gSystem->pwd();
772  TString cmd = "./rf_go.exe";
773  gSystem->cd(fRFWorkDir.Data());
774  int rval = gSystem->Exec(cmd.Data());
775  gSystem->cd(oldDir.Data());
776  return rval;
777 }
Bool_t WriteLx()
Save input variable mask.
Definition: RuleFitAPI.cxx:288
Bool_t ReadVarImp()
read variable importance
Definition: RuleFitAPI.cxx:504
void WelcomeMessage()
welcome message
Definition: RuleFitAPI.cxx:73
void SetCoefficient(Double_t v)
Definition: Rule.h:90
float xmin
Definition: THbookFile.cxx:93
void HowtoSetupRF()
howto message
Definition: RuleFitAPI.cxx:89
MsgLogger & Endl(MsgLogger &ml)
Definition: MsgLogger.h:162
void SetSSBNeve(Double_t v)
Definition: Rule.h:99
ClassImp(TMVA::RuleFitAPI) TMVA
Definition: RuleFitAPI.cxx:43
void SetRuleCut(RuleCut *rc)
Definition: Rule.h:84
void SetCutMax(Int_t i, Double_t v)
Definition: RuleCut.h:67
float Float_t
Definition: RtypesCore.h:53
virtual ~RuleFitAPI()
destructor
Definition: RuleFitAPI.cxx:66
Bool_t WriteRfStatus()
written by rf_go.exe; write rulefit status
Definition: RuleFitAPI.cxx:353
void SetRFWorkDir(const char *wdir)
set the directory containing rf_go.exe.
Definition: RuleFitAPI.cxx:152
Bool_t WriteAll()
write all files read by rf_go.exe
Definition: RuleFitAPI.cxx:245
void FillIntParmsDef()
set default int params
Definition: RuleFitAPI.cxx:225
Bool_t cd(const char *path)
Definition: TSystem.h:414
Basic string class.
Definition: TString.h:137
Bool_t WriteTrain()
write training data, columnwise
Definition: RuleFitAPI.cxx:380
int Int_t
Definition: RtypesCore.h:41
bool Bool_t
Definition: RtypesCore.h:59
Bool_t WriteRfOut()
written by rf_go.exe; write rulefit output (rfout)
Definition: RuleFitAPI.cxx:344
const Bool_t kFALSE
Definition: Rtypes.h:92
Bool_t WriteIntParms()
write int params file
Definition: RuleFitAPI.cxx:261
Bool_t WriteRealVarImp()
write the minimum importance to be considered
Definition: RuleFitAPI.cxx:330
void ImportSetup()
import setup from MethodRuleFit
Definition: RuleFitAPI.cxx:129
Double_t GetWeight() const
return the event weight - depending on whether the flag IgnoreNegWeightsInTraining is or not...
Definition: Event.cxx:376
void CheckRFWorkDir()
check if the rulefit work dir is properly setup.
Definition: RuleFitAPI.cxx:163
void SetNvars(UInt_t nc)
Definition: RuleCut.h:143
Float_t GetValue(UInt_t ivar) const
return value of i'th variable
Definition: Event.cxx:231
const char * Data() const
Definition: TString.h:349
void SetCutMin(Int_t i, Double_t v)
Definition: RuleCut.h:66
Double_t x[n]
Definition: legend1.C:17
Bool_t WriteProgram()
write command to rf_go.exe
Definition: RuleFitAPI.cxx:302
void FillRealParmsDef()
set default real params
Definition: RuleFitAPI.cxx:210
void SetSSB(Double_t v)
Definition: Rule.h:96
void SetCutDoMin(Int_t i, Bool_t v)
Definition: RuleCut.h:68
if(pyself &&pyself!=Py_None)
Bool_t ReadModelSum()
read model from rulefit.sum
Definition: RuleFitAPI.cxx:541
void SetSelector(Int_t i, UInt_t s)
Definition: RuleCut.h:65
Bool_t WriteRuleFitMod()
written by rf_go.exe (NOTE:Format unknown!)
Definition: RuleFitAPI.cxx:362
const char * pwd()
Definition: TSystem.h:415
void SetTrainParms()
set the training parameters
Definition: RuleFitAPI.cxx:185
ROOT::R::TRInterface & r
Definition: Object.C:4
R__EXTERN TSystem * gSystem
Definition: TSystem.h:549
Double_t GetImportance() const
Definition: Rule.h:151
EMsgType
Definition: Types.h:61
void CalcImportance()
Definition: Rule.h:105
Double_t GetSigma() const
Definition: Rule.h:149
unsigned int UInt_t
Definition: RtypesCore.h:42
virtual Int_t Exec(const char *shellcmd)
Execute a command.
Definition: TSystem.cxx:657
float xmax
Definition: THbookFile.cxx:93
void SetTestParms()
set the test params
Definition: RuleFitAPI.cxx:198
void SetImportanceRef(Double_t v)
Definition: Rule.h:102
void SetSupport(Double_t v)
Definition: Rule.h:93
void SetNorm(Double_t norm)
Definition: Rule.h:87
void SetCutDoMax(Int_t i, Bool_t v)
Definition: RuleCut.h:69
double f(double x)
double Double_t
Definition: RtypesCore.h:55
Double_t y[n]
Definition: legend1.C:17
Bool_t WriteVarNames()
write variable names, ascii
Definition: RuleFitAPI.cxx:447
Bool_t WriteTest()
Write test data.
Definition: RuleFitAPI.cxx:415
Abstract ClassifierFactory template that handles arbitrary types.
Bool_t WriteYhat()
written by rf_go.exe
Definition: RuleFitAPI.cxx:470
Bool_t WriteRuleFitSum()
written by rf_go.exe (NOTE: format unknown!)
Definition: RuleFitAPI.cxx:371
Bool_t ReadYhat()
read the score
Definition: RuleFitAPI.cxx:479
Int_t RunRuleFit()
execute rf_go.exe
Definition: RuleFitAPI.cxx:769
Bool_t WriteRealParms()
write int params file
Definition: RuleFitAPI.cxx:272
Bool_t WriteVarImp()
Definition: RuleFitAPI.cxx:459
const Bool_t kTRUE
Definition: Rtypes.h:91
double norm(double *x, double *p)
Definition: unuranDistr.cxx:40
const Int_t n
Definition: legend1.C:16
void InitRuleFit()
default initialisation SetRFWorkDir("./rulefit");
Definition: RuleFitAPI.cxx:119