Logo ROOT   6.18/05
Reference Guide
RuleFitAPI.cxx
Go to the documentation of this file.
1// @(#)root/tmva $Id$
2// Author: Andreas Hoecker, Joerg Stelzer, Fredrik Tegenfeldt, Helge Voss
3
4/**********************************************************************************
5 * Project: TMVA - a Root-integrated toolkit for multivariate data analysis *
6 * Package: TMVA *
7 * Class : RuleFitAPI *
8 * Web : http://tmva.sourceforge.net *
9 * *
10 * Description: *
11 * Implementation (see header file for description) *
12 * *
13 * Authors (alphabetical): *
14 * Fredrik Tegenfeldt <Fredrik.Tegenfeldt@cern.ch> - Iowa State U., USA *
15 * *
16 * Copyright (c) 2005: *
17 * CERN, Switzerland *
18 * Iowa State U. *
19 * MPI-KP Heidelberg, Germany *
20 * *
21 * Redistribution and use in source and binary forms, with or without *
22 * modification, are permitted according to the terms listed in LICENSE *
23 * (http://tmva.sourceforge.net/LICENSE) *
24 **********************************************************************************/
25
26/*! \class TMVA::RuleFitAPI
27\ingroup TMVA
28J Friedman's RuleFit method
29*/
30
31#include "TMVA/RuleFitAPI.h"
32
33#include "TMVA/DataSet.h"
34#include "TMVA/DataSetInfo.h"
35#include "TMVA/MethodRuleFit.h"
36#include "TMVA/RuleFit.h"
37#include "TMVA/Timer.h"
38#include "TMVA/Tools.h"
39#include "TMVA/Types.h"
40#include "TMVA/VariableInfo.h"
41
42#include "TROOT.h"
43#include "TSystem.h"
44#include "TMath.h"
45
46#include <algorithm>
47
49
51 RuleFit *rulefit,
52 EMsgType minType = kINFO ) :
53fMethodRuleFit(rfbase),
54 fRuleFit(rulefit),
55 fRFProgram(kRfTrain),
56 fLogger("RuleFitAPI",minType)
57{
58 // standard constructor
59 if (rfbase) {
60 SetRFWorkDir(rfbase->GetRFWorkDir());
61 } else {
62 SetRFWorkDir("./rulefit");
63 }
65}
66
67
68////////////////////////////////////////////////////////////////////////////////
69/// destructor
70
72{
73}
74
75////////////////////////////////////////////////////////////////////////////////
76/// welcome message
77
79{
80 fLogger << kINFO
81 << "\n"
82 << "---------------------------------------------------------------------------\n"
83 << "- You are running the interface to Jerome Friedmans RuleFit(tm) code. -\n"
84 << "- For a full manual see the following web page: -\n"
85 << "- -\n"
86 << "- http://www-stat.stanford.edu/~jhf/R-RuleFit.html -\n"
87 << "- -\n"
88 << "---------------------------------------------------------------------------"
89 << Endl;
90}
91////////////////////////////////////////////////////////////////////////////////
92/// howto message
93
95{
96 fLogger << kINFO
97 << "\n"
98 << "------------------------ RULEFIT-JF INTERFACE SETUP -----------------------\n"
99 << "\n"
100 << "1. Create a rulefit directory in your current work directory:\n"
101 << " mkdir " << fRFWorkDir << "\n\n"
102 << " the directory may be set using the option RuleFitDir\n"
103 << "\n"
104 << "2. Copy (or make a link) the file rf_go.exe into this directory\n"
105 << "\n"
106 << "The file can be obtained from Jerome Friedmans homepage (linux):\n"
107 << " wget http://www-stat.stanford.edu/~jhf/r-rulefit/linux/rf_go.exe\n"
108 << "\n"
109 << "Don't forget to do:\n"
110 << " chmod +x rf_go.exe\n"
111 << "\n"
112 << "For Windows download:\n"
113 << " http://www-stat.stanford.edu/~jhf/r-rulefit/windows/rf_go.exe\n"
114 << "\n"
115 << "NOTE: other platforms are not supported (see Friedmans homepage)\n"
116 << "\n"
117 << "---------------------------------------------------------------------------\n"
118 << Endl;
119}
120////////////////////////////////////////////////////////////////////////////////
121/// default initialisation
122/// SetRFWorkDir("./rulefit");
123
125{
126 CheckRFWorkDir();
127 FillIntParmsDef();
128 FillRealParmsDef();
129}
130
131////////////////////////////////////////////////////////////////////////////////
132/// import setup from MethodRuleFit
133
135{
136 fRFIntParms.p = fMethodRuleFit->DataInfo().GetNVariables();
137 fRFIntParms.max_rules = fMethodRuleFit->GetRFNrules();
138 fRFIntParms.tree_size = fMethodRuleFit->GetRFNendnodes();
139 fRFIntParms.path_steps = fMethodRuleFit->GetGDNPathSteps();
140 //
141 fRFRealParms.path_inc = fMethodRuleFit->GetGDPathStep();
142 fRFRealParms.samp_fract = fMethodRuleFit->GetTreeEveFrac();
143 fRFRealParms.trim_qntl = fMethodRuleFit->GetLinQuantile();
144 fRFRealParms.conv_fac = fMethodRuleFit->GetGDErrScale();
145 //
146 if (fRuleFit->GetRuleEnsemblePtr()->DoOnlyLinear() )
147 fRFIntParms.lmode = kRfLinear;
148 else if (fRuleFit->GetRuleEnsemblePtr()->DoOnlyRules() )
149 fRFIntParms.lmode = kRfRules;
150 else
151 fRFIntParms.lmode = kRfBoth;
152}
153
154////////////////////////////////////////////////////////////////////////////////
155/// set the directory containing rf_go.exe.
156
157void TMVA::RuleFitAPI::SetRFWorkDir(const char * wdir)
158{
159 fRFWorkDir = wdir;
160}
161
162////////////////////////////////////////////////////////////////////////////////
163/// check if the rulefit work dir is properly setup.
164/// it aborts (kFATAL) if not.
165///
166/// Check existence of directory
167
169{
170 TString oldDir = gSystem->pwd();
171 if (!gSystem->cd(fRFWorkDir)) {
172 fLogger << kWARNING << "Must create a rulefit directory named : " << fRFWorkDir << Endl;
173 HowtoSetupRF();
174 fLogger << kFATAL << "Setup failed - aborting!" << Endl;
175 }
176 // check rf_go.exe
177 FILE *f = fopen("rf_go.exe","r");
178 if (f==0) {
179 fLogger << kWARNING << "No rf_go.exe file in directory : " << fRFWorkDir << Endl;
180 HowtoSetupRF();
181 fLogger << kFATAL << "Setup failed - aborting!" << Endl;
182 }
183 fclose(f);
184 gSystem->cd(oldDir.Data());
185}
186
187////////////////////////////////////////////////////////////////////////////////
188/// set the training parameters
189
191{
192 ImportSetup();
193 //
194 Int_t n = fMethodRuleFit->Data()->GetNTrainingEvents();
195 // Double_t neff = Double_t(n); // When weights are added: should be sum(wt)^2/sum(wt^2)
196 fRFIntParms.n = n; // number of data points in tree
197 fRFProgram = kRfTrain;
198}
199
200////////////////////////////////////////////////////////////////////////////////
201/// set the test params
202
204{
205 ImportSetup();
206 Int_t n = fMethodRuleFit->Data()->GetNTestEvents();
207 // Double_t neff = Double_t(n); // When weights are added: should be sum(wt)^2/sum(wt^2)
208 fRFIntParms.n = n; // number of data points in tree
209 fRFProgram = kRfPredict;
210}
211
212////////////////////////////////////////////////////////////////////////////////
213/// set default real params
214
216{
217 fRFRealParms.xmiss = 9.0e30;
218 fRFRealParms.trim_qntl = 0.025;
219 fRFRealParms.huber = 0.8;
220 fRFRealParms.inter_supp = 3.0;
221 fRFRealParms.memory_par = 0.01;
222 fRFRealParms.samp_fract = 0.5; // calculated later
223 fRFRealParms.path_inc = 0.01;
224 fRFRealParms.conv_fac = 1.1;
225}
226
227////////////////////////////////////////////////////////////////////////////////
228/// set default int params
229
231{
232 fRFIntParms.mode = (int)kRfClass;
233 fRFIntParms.lmode = (int)kRfBoth;
234 // fRFIntParms.n;
235 // fRFIntParms.p;
236 fRFIntParms.max_rules = 2000;
237 fRFIntParms.tree_size = 4;
238 fRFIntParms.path_speed = 2;
239 fRFIntParms.path_xval = 3;
240 fRFIntParms.path_steps = 50000;
241 fRFIntParms.path_testfreq = 100;
242 fRFIntParms.tree_store = 10000000;
243 fRFIntParms.cat_store = 1000000;
244
245}
246
247////////////////////////////////////////////////////////////////////////////////
248/// write all files read by rf_go.exe
249
251{
252 WriteIntParms();
253 WriteRealParms();
254 WriteLx();
255 WriteProgram();
256 WriteVarNames();
257 if (fRFProgram==kRfTrain) WriteTrain();
258 if (fRFProgram==kRfPredict) WriteTest();
259 if (fRFProgram==kRfVarimp) WriteRealVarImp();
260 return kTRUE;
261}
262
263////////////////////////////////////////////////////////////////////////////////
264/// write int params file
265
267{
268 std::ofstream f;
269 if (!OpenRFile("intparms",f)) return kFALSE;
270 WriteInt(f,&fRFIntParms.mode,sizeof(fRFIntParms)/sizeof(Int_t));
271 return kTRUE;
272}
273
274////////////////////////////////////////////////////////////////////////////////
275/// write int params file
276
278{
279 std::ofstream f;
280 if (!OpenRFile("realparms",f)) return kFALSE;
281 WriteFloat(f,&fRFRealParms.xmiss,sizeof(fRFRealParms)/sizeof(Float_t));
282 return kTRUE;
283}
284
285////////////////////////////////////////////////////////////////////////////////
286/// Save input variable mask
287///
288/// If the lx vector size is not the same as inputVars,
289/// resize it and fill it with 1
290/// NOTE: Always set all to 1
291/// if (fRFLx.size() != m_inputVars->size()) {
292
294{
295 fRFLx.clear();
296 fRFLx.resize(fMethodRuleFit->DataInfo().GetNVariables(),1);
297 // }
298 std::ofstream f;
299 if (!OpenRFile("lx",f)) return kFALSE;
300 WriteInt(f,&fRFLx[0],fRFLx.size());
301 return kTRUE;
302}
303
304////////////////////////////////////////////////////////////////////////////////
305/// write command to rf_go.exe
306
308{
309 std::ofstream f;
310 if (!OpenRFile("program",f)) return kFALSE;
311 TString program;
312 switch (fRFProgram) {
313 case kRfTrain:
314 program = "rulefit";
315 break;
316 case kRfPredict:
317 program = "rulefit_pred";
318 break;
319 // calculate variable importance
320 case kRfVarimp:
321 program = "varimp";
322 break;
323 default:
324 fRFProgram = kRfTrain;
325 program="rulefit";
326 break;
327 }
328 f << program;
329 return kTRUE;
330}
331
332////////////////////////////////////////////////////////////////////////////////
333/// write the minimum importance to be considered
334
336{
337 std::ofstream f;
338 if (!OpenRFile("realvarimp",f)) return kFALSE;
339 Float_t rvp[2];
340 rvp[0] = 0.0; // Mode: see varimp() in rulefit.r
341 rvp[1] = 0.0; // Minimum importance considered (1 is max)
342 WriteFloat(f,&rvp[0],2);
343 return kTRUE;
344}
345
346////////////////////////////////////////////////////////////////////////////////
347/// written by rf_go.exe; write rulefit output (rfout)
348
350{
351 fLogger << kWARNING << "WriteRfOut is not yet implemented" << Endl;
352 return kTRUE;
353}
354
355////////////////////////////////////////////////////////////////////////////////
356/// written by rf_go.exe; write rulefit status
357
359{
360 fLogger << kWARNING << "WriteRfStatus is not yet implemented" << Endl;
361 return kTRUE;
362}
363
364////////////////////////////////////////////////////////////////////////////////
365/// written by rf_go.exe (NOTE:Format unknown!)
366
368{
369 fLogger << kWARNING << "WriteRuleFitMod is not yet implemented" << Endl;
370 return kTRUE;
371}
372
373////////////////////////////////////////////////////////////////////////////////
374/// written by rf_go.exe (NOTE: format unknown!)
375
377{
378 fLogger << kWARNING << "WriteRuleFitSum is not yet implemented" << Endl;
379 return kTRUE;
380}
381
382////////////////////////////////////////////////////////////////////////////////
383/// write training data, column wise
384
386{
387 std::ofstream fx;
388 std::ofstream fy;
389 std::ofstream fw;
390 //
391 if (!OpenRFile("train.x",fx)) return kFALSE;
392 if (!OpenRFile("train.y",fy)) return kFALSE;
393 if (!OpenRFile("train.w",fw)) return kFALSE;
394 //
395 Float_t x,y,w;
396 //
397 // The loop order cannot be changed.
398 // The data is stored <var1(eve1), var1(eve2), ...var1(eveN), var2(eve1),....
399 //
400 for (UInt_t ivar=0; ivar<fMethodRuleFit->DataInfo().GetNVariables(); ivar++) {
401 for (Int_t ievt=0;ievt<fMethodRuleFit->Data()->GetNTrainingEvents(); ievt++) {
402 const Event * ev = fMethodRuleFit->GetTrainingEvent(ievt);
403 x = ev->GetValue(ivar);
404 WriteFloat(fx,&x,1);
405 if (ivar==0) {
406 w = ev->GetWeight();
407 y = fMethodRuleFit->DataInfo().IsSignal(ev)? 1.0 : -1.0;
408 WriteFloat(fy,&y,1);
409 WriteFloat(fw,&w,1);
410 }
411 }
412 }
413 fLogger << kINFO << "Number of training data written: " << fMethodRuleFit->Data()->GetNTrainingEvents() << Endl;
414 return kTRUE;
415}
416
417////////////////////////////////////////////////////////////////////////////////
418/// Write test data
419
421{
422 fMethodRuleFit->Data()->SetCurrentType(Types::kTesting);
423
424 std::ofstream f;
425 //
426 if (!OpenRFile("test.x",f)) return kFALSE;
427 //
428 Float_t vf;
429 Float_t neve;
430 //
431 neve = static_cast<Float_t>(fMethodRuleFit->Data()->GetNEvents());
432 WriteFloat(f,&neve,1);
433 // Test data is saved as:
434 // 0 : <N> num of events, type float, 4 bytes
435 // 1-N : First variable for all events
436 // N+1-2N : Second variable...
437 // ...
438 for (UInt_t ivar=0; ivar<fMethodRuleFit->DataInfo().GetNVariables(); ivar++) {
439 for (Int_t ievt=0;ievt<fMethodRuleFit->Data()->GetNEvents(); ievt++) {
440 vf = fMethodRuleFit->GetEvent(ievt)->GetValue(ivar);
441 WriteFloat(f,&vf,1);
442 }
443 }
444 fLogger << kINFO << "Number of test data written: " << fMethodRuleFit->Data()->GetNEvents() << Endl;
445 //
446 return kTRUE;
447}
448
449////////////////////////////////////////////////////////////////////////////////
450/// write variable names, ascii
451
453{
454 std::ofstream f;
455 if (!OpenRFile("varnames",f)) return kFALSE;
456 for (UInt_t ivar=0; ivar<fMethodRuleFit->DataInfo().GetNVariables(); ivar++) {
457 f << fMethodRuleFit->DataInfo().GetVariableInfo(ivar).GetExpression() << '\n';
458 }
459 return kTRUE;
460}
461
462////////////////////////////////////////////////////////////////////////////////
463
465
466{
467 // written by rf_go.exe
468 fLogger << kWARNING << "WriteVarImp is not yet implemented" << Endl;
469 return kTRUE;
470}
471
472////////////////////////////////////////////////////////////////////////////////
473/// written by rf_go.exe
474
476{
477 fLogger << kWARNING << "WriteYhat is not yet implemented" << Endl;
478 return kTRUE;
479}
480
481////////////////////////////////////////////////////////////////////////////////
482/// read the score
483
485{
486 fRFYhat.clear();
487 //
488 std::ifstream f;
489 if (!OpenRFile("yhat",f)) return kFALSE;
490 Int_t neve;
491 Float_t xval;
492 ReadFloat(f,&xval,1);
493 neve = static_cast<Int_t>(xval);
494 if (neve!=fMethodRuleFit->Data()->GetNTestEvents()) {
495 fLogger << kWARNING << "Inconsistent size of yhat file and test tree!" << Endl;
496 fLogger << kWARNING << "neve = " << neve << " , tree = " << fMethodRuleFit->Data()->GetNTestEvents() << Endl;
497 return kFALSE;
498 }
499 for (Int_t ievt=0; ievt<fMethodRuleFit->Data()->GetNTestEvents(); ievt++) {
500 ReadFloat(f,&xval,1);
501 fRFYhat.push_back(xval);
502 }
503 return kTRUE;
504}
505
506////////////////////////////////////////////////////////////////////////////////
507/// read variable importance
508
510{
511 fRFVarImp.clear();
512 //
513 std::ifstream f;
514 if (!OpenRFile("varimp",f)) return kFALSE;
515 UInt_t nvars;
516 Float_t xval;
517 Float_t xmax=1.0;
518 nvars=fMethodRuleFit->DataInfo().GetNVariables();
519 //
520 // First read all importances
521 //
522 for (UInt_t ivar=0; ivar<nvars; ivar++) {
523 ReadFloat(f,&xval,1);
524 if (ivar==0) {
525 xmax=xval;
526 } else {
527 if (xval>xmax) xmax=xval;
528 }
529 fRFVarImp.push_back(xval);
530 }
531 //
532 // Read the indices.
533 // They are saved as float (!) by rf_go.exe.
534 //
535 for (UInt_t ivar=0; ivar<nvars; ivar++) {
536 fRFVarImp[ivar] = fRFVarImp[ivar]/xmax;
537 ReadFloat(f,&xval,1);
538 fRFVarImpInd.push_back(Int_t(xval)-1);
539 }
540 return kTRUE;
541}
542
543////////////////////////////////////////////////////////////////////////////////
544/// read model from rulefit.sum
545
547{
548 fRFVarImp.clear();
549 //
550 fLogger << kVERBOSE << "Reading RuleFit summary file" << Endl;
551 std::ifstream f;
552 if (!OpenRFile("rulefit.sum",f)) return kFALSE;
553 Int_t lines=0;
554 Int_t nrules=0;
555 Int_t nvars=0;
556 Int_t nvarsOpt=0;
557 Int_t dumI;
558 Float_t dumF;
559 Float_t offset;
560 Double_t impref=-1.0;
561 Double_t imp;
562
563 fRuleFit->GetRuleEnsemblePtr()->SetAverageRuleSigma(0.4); // value used by Friedmans RuleFit
564 //
565 //--------------------------------------------
566 // first read rulefit.sum header
567 //--------------------------------------------
568 // line type val descr
569 // 0 <int> 86 N(rules)x2
570 // 1 <int> 155 ???
571 // 2 <int> 1 ???
572 // 3 <int> 1916 ???
573 // 4 <int> 2 N(vars) ?
574 // 5 <int> 2 N(vars) ?
575 // 6 <float> 9e+30 xmiss
576 // 7 <float> 1.1e-1 a0 (model offset)
577 //--------------------------------------------
578 //
579 // NOTE: a model without any rules, will look like
580 // for the first four lines:
581 //
582 // 0 1
583 // 1 1
584 // 2 1
585 // 3 0
586 //
587 // There will later be one block of dummy data for one rule.
588 // In order to catch this situation, some special checks are made below.
589 //
590 Bool_t norules;
591 lines += ReadInt(f,&nrules);
592 norules = (nrules==1);
593 lines += ReadInt(f,&dumI);
594 norules = norules && (dumI==1);
595 lines += ReadInt(f,&dumI);
596 norules = norules && (dumI==1);
597 lines += ReadInt(f,&dumI);
598 norules = norules && (dumI==0);
599 if (nrules==0) norules=kTRUE; // this ugly construction is needed:(
600 if (norules) nrules = 0;
601 //
602 lines += ReadInt(f,&nvars);
603 lines += ReadInt(f,&nvarsOpt);
604 lines += ReadFloat(f,&dumF);
605 lines += ReadFloat(f,&offset);
606 fLogger << kDEBUG << "N(rules) = " << nrules << Endl;
607 fLogger << kDEBUG << "N(vars) = " << nvars << Endl;
608 fLogger << kDEBUG << "N(varsO) = " << nvarsOpt << Endl;
609 fLogger << kDEBUG << "xmiss = " << dumF << Endl;
610 fLogger << kDEBUG << "offset = " << offset << Endl;
611 if (nvars!=nvarsOpt) {
612 fLogger << kWARNING << "Format of rulefit.sum is ... weird?? Continuing but who knows how it will end...?" << Endl;
613 }
614 std::vector<Double_t> rfSupp;
615 std::vector<Double_t> rfCoef;
616 std::vector<Int_t> rfNcut;
617 std::vector<Rule *> rfRules;
618 if (norules) {
619 // if no rules, read 8 blocks of data
620 // this corresponds to one dummy rule
621 for (Int_t t=0; t<8; t++) {
622 lines += ReadFloat(f,&dumF);
623 }
624 }
625 //
626 //--------------------------------------------
627 // read first part of rule info
628 //--------------------------------------------
629 //
630 // 8 <int> 10 ???
631 // 9 <float> 0.185 support
632 // 10 <float> 0.051 coefficient
633 // 11 <float> 2 num of cuts in rule
634 // 12 <float> 1 ??? not used by this interface
635 //
636 for (Int_t r=0; r<nrules; r++) {
637 lines += ReadFloat(f,&dumF);
638 lines += ReadFloat(f,&dumF);
639 rfSupp.push_back(dumF);
640 lines += ReadFloat(f,&dumF);
641 rfCoef.push_back(dumF);
642 lines += ReadFloat(f,&dumF);
643 rfNcut.push_back(static_cast<int>(dumF+0.5));
644 lines += ReadFloat(f,&dumF);
645 //
646 }
647 //--------------------------------------------
648 // read second part of rule info
649 //--------------------------------------------
650 //
651 // Per range (cut):
652 // 0 <float> 1 varind
653 // 1 <float> -1.0 low
654 // 2 <float> 1.56 high
655 //
656
657 for (Int_t r=0; r<nrules; r++) {
658 Int_t varind;
661 Rule *rule = new Rule(fRuleFit->GetRuleEnsemblePtr());
662 rfRules.push_back( rule );
663 RuleCut *rfcut = new RuleCut();
664 rfcut->SetNvars(rfNcut[r]);
665 rule->SetRuleCut( rfcut );
666 // the below are set to default values since no info is
667 // available in rulefit.sum
668 rule->SetNorm(1.0);
669 rule->SetSupport(0);
670 rule->SetSSB(0.0);
671 rule->SetSSBNeve(0.0);
672 rule->SetImportanceRef(1.0);
673 rule->SetSSB(0.0);
674 rule->SetSSBNeve(0.0);
675 // set support etc
676 rule->SetSupport(rfSupp[r]);
677 rule->SetCoefficient(rfCoef[r]);
678 rule->CalcImportance();
679 imp = rule->GetImportance();
680 if (imp>impref) impref = imp; // find max importance
681 //
682 fLogger << kDEBUG << "Rule #" << r << " : " << nvars << Endl;
683 fLogger << kDEBUG << " support = " << rfSupp[r] << Endl;
684 fLogger << kDEBUG << " sigma = " << rule->GetSigma() << Endl;
685 fLogger << kDEBUG << " coeff = " << rfCoef[r] << Endl;
686 fLogger << kDEBUG << " N(cut) = " << rfNcut[r] << Endl;
687
688 for (Int_t c=0; c<rfNcut[r]; c++) {
689 lines += ReadFloat(f,&dumF);
690 varind = static_cast<Int_t>(dumF+0.5)-1;
691 lines += ReadFloat(f,&dumF);
692 xmin = static_cast<Double_t>(dumF);
693 lines += ReadFloat(f,&dumF);
694 xmax = static_cast<Double_t>(dumF);
695 // create Rule HERE!
696 rfcut->SetSelector(c,varind);
697 rfcut->SetCutMin(c,xmin);
698 rfcut->SetCutMax(c,xmax);
699 // the following is not nice - this is however defined
700 // by the rulefit.sum format.
701 rfcut->SetCutDoMin(c,(xmin<-8.99e35 ? kFALSE:kTRUE));
702 rfcut->SetCutDoMax(c,(xmax> 8.99e35 ? kFALSE:kTRUE));
703 //
704 }
705 }
706 fRuleFit->GetRuleEnsemblePtr()->SetRules( rfRules );
707 fRuleFit->GetRuleEnsemblePtr()->SetOffset( offset );
708 //--------------------------------------------
709 // read second part of rule info
710 //--------------------------------------------
711 //
712 // Per linear term:
713 // 73 1 var index
714 // 74 -1.99594 min
715 // 75 1.99403 max
716 // 76 -0.000741858 ??? average ???
717 // 77 0.970935 std
718 // 78 0 coeff
719 //
720 std::vector<Int_t> varind;
721 std::vector<Double_t> xmin;
722 std::vector<Double_t> xmax;
723 std::vector<Double_t> average;
724 std::vector<Double_t> stdev;
725 std::vector<Double_t> norm;
726 std::vector<Double_t> coeff;
727 //
728 for (Int_t c=0; c<nvars; c++) {
729 lines += ReadFloat(f,&dumF);
730 varind.push_back(static_cast<Int_t>(dumF+0.5)-1);
731 lines += ReadFloat(f,&dumF);
732 xmin.push_back(static_cast<Double_t>(dumF));
733 lines += ReadFloat(f,&dumF);
734 xmax.push_back(static_cast<Double_t>(dumF));
735 lines += ReadFloat(f,&dumF);
736 average.push_back(static_cast<Double_t>(dumF));
737 lines += ReadFloat(f,&dumF);
738 stdev.push_back(static_cast<Double_t>(dumF));
739 Double_t nv = fRuleFit->GetRuleEnsemblePtr()->CalcLinNorm(stdev.back());
740 norm.push_back(nv);
741 lines += ReadFloat(f,&dumF);
742 coeff.push_back(dumF/nv); // save coefficient for normalised var
743 //
744 fLogger << kDEBUG << "Linear #" << c << Endl;
745 fLogger << kDEBUG << " varind = " << varind.back() << Endl;
746 fLogger << kDEBUG << " xmin = " << xmin.back() << Endl;
747 fLogger << kDEBUG << " xmax = " << xmax.back() << Endl;
748 fLogger << kDEBUG << " average = " << average.back() << Endl;
749 fLogger << kDEBUG << " stdev = " << stdev.back() << Endl;
750 fLogger << kDEBUG << " coeff = " << coeff.back() << Endl;
751 }
752 if (xmin.size()>0) {
753 fRuleFit->GetRuleEnsemblePtr()->SetLinCoefficients(coeff);
754 fRuleFit->GetRuleEnsemblePtr()->SetLinDM(xmin);
755 fRuleFit->GetRuleEnsemblePtr()->SetLinDP(xmax);
756 fRuleFit->GetRuleEnsemblePtr()->SetLinNorm(norm);
757 }
758 // fRuleFit->GetRuleEnsemblePtr()->CalcImportance();
759 imp = fRuleFit->GetRuleEnsemblePtr()->CalcLinImportance();
760 if (imp>impref) impref=imp;
761 fRuleFit->GetRuleEnsemblePtr()->SetImportanceRef(impref);
762 fRuleFit->GetRuleEnsemblePtr()->CleanupLinear(); // to fill fLinTermOK vector
763
764 fRuleFit->GetRuleEnsemblePtr()->CalcVarImportance();
765 // fRuleFit->GetRuleEnsemblePtr()->CalcRuleSupport();
766
767 fLogger << kDEBUG << "Reading model done" << Endl;
768 return kTRUE;
769}
770
771////////////////////////////////////////////////////////////////////////////////
772/// execute rf_go.exe
773
775{
776 TString oldDir = gSystem->pwd();
777 TString cmd = "./rf_go.exe";
778 gSystem->cd(fRFWorkDir.Data());
779 int rval = gSystem->Exec(cmd.Data());
780 gSystem->cd(oldDir.Data());
781 return rval;
782}
ROOT::R::TRInterface & r
Definition: Object.C:4
#define f(i)
Definition: RSha256.hxx:104
#define c(i)
Definition: RSha256.hxx:101
int Int_t
Definition: RtypesCore.h:41
unsigned int UInt_t
Definition: RtypesCore.h:42
const Bool_t kFALSE
Definition: RtypesCore.h:88
bool Bool_t
Definition: RtypesCore.h:59
double Double_t
Definition: RtypesCore.h:55
float Float_t
Definition: RtypesCore.h:53
const Bool_t kTRUE
Definition: RtypesCore.h:87
#define ClassImp(name)
Definition: Rtypes.h:365
float xmin
Definition: THbookFile.cxx:93
float xmax
Definition: THbookFile.cxx:93
R__EXTERN TSystem * gSystem
Definition: TSystem.h:560
Float_t GetValue(UInt_t ivar) const
return value of i'th variable
Definition: Event.cxx:237
Double_t GetWeight() const
return the event weight - depending on whether the flag IgnoreNegWeightsInTraining is or not.
Definition: Event.cxx:382
J Friedman's RuleFit method.
Definition: MethodRuleFit.h:47
const TString GetRFWorkDir() const
A class describing a 'rule cut'.
Definition: RuleCut.h:34
void SetSelector(Int_t i, UInt_t s)
Definition: RuleCut.h:63
void SetCutDoMin(Int_t i, Bool_t v)
Definition: RuleCut.h:66
void SetCutMin(Int_t i, Double_t v)
Definition: RuleCut.h:64
void SetCutMax(Int_t i, Double_t v)
Definition: RuleCut.h:65
void SetNvars(UInt_t nc)
Definition: RuleCut.h:141
void SetCutDoMax(Int_t i, Bool_t v)
Definition: RuleCut.h:67
J Friedman's RuleFit method.
Definition: RuleFitAPI.h:50
void SetTestParms()
set the test params
Definition: RuleFitAPI.cxx:203
Bool_t WriteRuleFitSum()
written by rf_go.exe (NOTE: format unknown!)
Definition: RuleFitAPI.cxx:376
Bool_t WriteYhat()
written by rf_go.exe
Definition: RuleFitAPI.cxx:475
Bool_t WriteAll()
write all files read by rf_go.exe
Definition: RuleFitAPI.cxx:250
void ImportSetup()
import setup from MethodRuleFit
Definition: RuleFitAPI.cxx:134
Bool_t WriteRfStatus()
written by rf_go.exe; write rulefit status
Definition: RuleFitAPI.cxx:358
Bool_t WriteIntParms()
write int params file
Definition: RuleFitAPI.cxx:266
void CheckRFWorkDir()
check if the rulefit work dir is properly setup.
Definition: RuleFitAPI.cxx:168
Bool_t WriteProgram()
write command to rf_go.exe
Definition: RuleFitAPI.cxx:307
Bool_t ReadModelSum()
read model from rulefit.sum
Definition: RuleFitAPI.cxx:546
Bool_t WriteVarImp()
Definition: RuleFitAPI.cxx:464
void SetRFWorkDir(const char *wdir)
set the directory containing rf_go.exe.
Definition: RuleFitAPI.cxx:157
Bool_t ReadVarImp()
read variable importance
Definition: RuleFitAPI.cxx:509
Bool_t WriteRuleFitMod()
written by rf_go.exe (NOTE:Format unknown!)
Definition: RuleFitAPI.cxx:367
Bool_t WriteRfOut()
written by rf_go.exe; write rulefit output (rfout)
Definition: RuleFitAPI.cxx:349
void InitRuleFit()
default initialisation SetRFWorkDir("./rulefit");
Definition: RuleFitAPI.cxx:124
void FillRealParmsDef()
set default real params
Definition: RuleFitAPI.cxx:215
Bool_t WriteVarNames()
write variable names, ascii
Definition: RuleFitAPI.cxx:452
Bool_t WriteRealVarImp()
write the minimum importance to be considered
Definition: RuleFitAPI.cxx:335
void FillIntParmsDef()
set default int params
Definition: RuleFitAPI.cxx:230
void WelcomeMessage()
welcome message
Definition: RuleFitAPI.cxx:78
Bool_t WriteTrain()
write training data, column wise
Definition: RuleFitAPI.cxx:385
virtual ~RuleFitAPI()
destructor
Definition: RuleFitAPI.cxx:71
Bool_t WriteRealParms()
write int params file
Definition: RuleFitAPI.cxx:277
Bool_t WriteLx()
Save input variable mask.
Definition: RuleFitAPI.cxx:293
Bool_t ReadYhat()
read the score
Definition: RuleFitAPI.cxx:484
void HowtoSetupRF()
howto message
Definition: RuleFitAPI.cxx:94
Bool_t WriteTest()
Write test data.
Definition: RuleFitAPI.cxx:420
void SetTrainParms()
set the training parameters
Definition: RuleFitAPI.cxx:190
Int_t RunRuleFit()
execute rf_go.exe
Definition: RuleFitAPI.cxx:774
A class implementing various fits of rule ensembles.
Definition: RuleFit.h:45
Implementation of a rule.
Definition: Rule.h:48
void SetImportanceRef(Double_t v)
Definition: Rule.h:94
void SetCoefficient(Double_t v)
Definition: Rule.h:82
void SetNorm(Double_t norm)
Definition: Rule.h:79
Double_t GetImportance() const
Definition: Rule.h:143
Double_t GetSigma() const
Definition: Rule.h:141
void SetSSBNeve(Double_t v)
Definition: Rule.h:91
void SetRuleCut(RuleCut *rc)
Definition: Rule.h:76
void CalcImportance()
Definition: Rule.h:97
void SetSupport(Double_t v)
Definition: Rule.h:85
void SetSSB(Double_t v)
Definition: Rule.h:88
@ kTesting
Definition: Types.h:145
Basic string class.
Definition: TString.h:131
const char * Data() const
Definition: TString.h:364
Bool_t cd(const char *path)
Definition: TSystem.h:424
const char * pwd()
Definition: TSystem.h:425
virtual Int_t Exec(const char *shellcmd)
Execute a command.
Definition: TSystem.cxx:662
Double_t y[n]
Definition: legend1.C:17
Double_t x[n]
Definition: legend1.C:17
const Int_t n
Definition: legend1.C:16
MsgLogger & Endl(MsgLogger &ml)
Definition: MsgLogger.h:158