Logo ROOT  
Reference Guide
 
Loading...
Searching...
No Matches
RuleFitAPI.cxx
Go to the documentation of this file.
1// @(#)root/tmva $Id$
2// Author: Andreas Hoecker, Joerg Stelzer, Fredrik Tegenfeldt, Helge Voss
3
4/**********************************************************************************
5 * Project: TMVA - a Root-integrated toolkit for multivariate data analysis *
6 * Package: TMVA *
7 * Class : RuleFitAPI *
8 * *
9 * *
10 * Description: *
11 * Implementation (see header file for description) *
12 * *
13 * Authors (alphabetical): *
14 * Fredrik Tegenfeldt <Fredrik.Tegenfeldt@cern.ch> - Iowa State U., USA *
15 * *
16 * Copyright (c) 2005: *
17 * CERN, Switzerland *
18 * Iowa State U. *
19 * MPI-KP Heidelberg, Germany *
20 * *
21 * Redistribution and use in source and binary forms, with or without *
22 * modification, are permitted according to the terms listed in LICENSE *
23 * (see tmva/doc/LICENSE) *
24 **********************************************************************************/
25
26/*! \class TMVA::RuleFitAPI
27\ingroup TMVA
28J Friedman's RuleFit method
29*/
30
31#include "TMVA/RuleFitAPI.h"
32
33#include "TMVA/DataSet.h"
34#include "TMVA/DataSetInfo.h"
35#include "TMVA/MethodRuleFit.h"
36#include "TMVA/RuleFit.h"
37#include "TMVA/Timer.h"
38#include "TMVA/Tools.h"
39#include "TMVA/Types.h"
40#include "TMVA/VariableInfo.h"
41
42#include "TSystem.h"
43
44#include <algorithm>
45
46
49 EMsgType minType = kINFO ) :
50fMethodRuleFit(rfbase),
51 fRuleFit(rulefit),
52 fRFProgram(kRfTrain),
53 fLogger("RuleFitAPI",minType)
54{
55 // standard constructor
56 if (rfbase) {
57 SetRFWorkDir(rfbase->GetRFWorkDir());
58 } else {
59 SetRFWorkDir("./rulefit");
60 }
62}
63
64
65////////////////////////////////////////////////////////////////////////////////
66/// destructor
67
71
72////////////////////////////////////////////////////////////////////////////////
73/// welcome message
74
76{
77 fLogger << kINFO
78 << "\n"
79 << "---------------------------------------------------------------------------\n"
80 << "- You are running the interface to Jerome Friedmans RuleFit(tm) code. -\n"
81 << "- For a full manual see the following web page: -\n"
82 << "- -\n"
83 << "- http://www-stat.stanford.edu/~jhf/R-RuleFit.html -\n"
84 << "- -\n"
85 << "---------------------------------------------------------------------------"
86 << Endl;
87}
88////////////////////////////////////////////////////////////////////////////////
89/// howto message
90
92{
93 fLogger << kINFO
94 << "\n"
95 << "------------------------ RULEFIT-JF INTERFACE SETUP -----------------------\n"
96 << "\n"
97 << "1. Create a rulefit directory in your current work directory:\n"
98 << " mkdir " << fRFWorkDir << "\n\n"
99 << " the directory may be set using the option RuleFitDir\n"
100 << "\n"
101 << "2. Copy (or make a link) the file rf_go.exe into this directory\n"
102 << "\n"
103 << "The file can be obtained from Jerome Friedmans homepage (linux):\n"
104 << " wget http://www-stat.stanford.edu/~jhf/r-rulefit/linux/rf_go.exe\n"
105 << "\n"
106 << "Don't forget to do:\n"
107 << " chmod +x rf_go.exe\n"
108 << "\n"
109 << "For Windows download:\n"
110 << " http://www-stat.stanford.edu/~jhf/r-rulefit/windows/rf_go.exe\n"
111 << "\n"
112 << "NOTE: other platforms are not supported (see Friedmans homepage)\n"
113 << "\n"
114 << "---------------------------------------------------------------------------\n"
115 << Endl;
116}
117////////////////////////////////////////////////////////////////////////////////
118/// default initialisation
119/// SetRFWorkDir("./rulefit");
120
122{
123 CheckRFWorkDir();
124 FillIntParmsDef();
125 FillRealParmsDef();
126}
127
128////////////////////////////////////////////////////////////////////////////////
129/// import setup from MethodRuleFit
130
132{
133 fRFIntParms.p = fMethodRuleFit->DataInfo().GetNVariables();
134 fRFIntParms.max_rules = fMethodRuleFit->GetRFNrules();
135 fRFIntParms.tree_size = fMethodRuleFit->GetRFNendnodes();
136 fRFIntParms.path_steps = fMethodRuleFit->GetGDNPathSteps();
137 //
138 fRFRealParms.path_inc = fMethodRuleFit->GetGDPathStep();
139 fRFRealParms.samp_fract = fMethodRuleFit->GetTreeEveFrac();
140 fRFRealParms.trim_qntl = fMethodRuleFit->GetLinQuantile();
141 fRFRealParms.conv_fac = fMethodRuleFit->GetGDErrScale();
142 //
143 if (fRuleFit->GetRuleEnsemblePtr()->DoOnlyLinear() )
144 fRFIntParms.lmode = kRfLinear;
145 else if (fRuleFit->GetRuleEnsemblePtr()->DoOnlyRules() )
146 fRFIntParms.lmode = kRfRules;
147 else
148 fRFIntParms.lmode = kRfBoth;
149}
150
151////////////////////////////////////////////////////////////////////////////////
152/// set the directory containing rf_go.exe.
153
155{
156 fRFWorkDir = wdir;
157}
158
159////////////////////////////////////////////////////////////////////////////////
160/// check if the rulefit work dir is properly setup.
161/// it aborts (kFATAL) if not.
162///
163/// Check existence of directory
164
166{
168 if (!gSystem->cd(fRFWorkDir)) {
169 fLogger << kWARNING << "Must create a rulefit directory named : " << fRFWorkDir << Endl;
170 HowtoSetupRF();
171 fLogger << kFATAL << "Setup failed - aborting!" << Endl;
172 }
173 // check rf_go.exe
174 FILE *f = fopen("rf_go.exe","r");
175 if (f==0) {
176 fLogger << kWARNING << "No rf_go.exe file in directory : " << fRFWorkDir << Endl;
177 HowtoSetupRF();
178 fLogger << kFATAL << "Setup failed - aborting!" << Endl;
179 }
180 fclose(f);
181 gSystem->cd(oldDir.Data());
182}
183
184////////////////////////////////////////////////////////////////////////////////
185/// set the training parameters
186
188{
189 ImportSetup();
190 //
191 Int_t n = fMethodRuleFit->Data()->GetNTrainingEvents();
192 // Double_t neff = Double_t(n); // When weights are added: should be sum(wt)^2/sum(wt^2)
193 fRFIntParms.n = n; // number of data points in tree
194 fRFProgram = kRfTrain;
195}
196
197////////////////////////////////////////////////////////////////////////////////
198/// set the test params
199
201{
202 ImportSetup();
203 Int_t n = fMethodRuleFit->Data()->GetNTestEvents();
204 // Double_t neff = Double_t(n); // When weights are added: should be sum(wt)^2/sum(wt^2)
205 fRFIntParms.n = n; // number of data points in tree
206 fRFProgram = kRfPredict;
207}
208
209////////////////////////////////////////////////////////////////////////////////
210/// set default real params
211
213{
214 fRFRealParms.xmiss = 9.0e30;
215 fRFRealParms.trim_qntl = 0.025;
216 fRFRealParms.huber = 0.8;
217 fRFRealParms.inter_supp = 3.0;
218 fRFRealParms.memory_par = 0.01;
219 fRFRealParms.samp_fract = 0.5; // calculated later
220 fRFRealParms.path_inc = 0.01;
221 fRFRealParms.conv_fac = 1.1;
222}
223
224////////////////////////////////////////////////////////////////////////////////
225/// set default int params
226
228{
229 fRFIntParms.mode = (int)kRfClass;
230 fRFIntParms.lmode = (int)kRfBoth;
231 // fRFIntParms.n;
232 // fRFIntParms.p;
233 fRFIntParms.max_rules = 2000;
234 fRFIntParms.tree_size = 4;
235 fRFIntParms.path_speed = 2;
236 fRFIntParms.path_xval = 3;
237 fRFIntParms.path_steps = 50000;
238 fRFIntParms.path_testfreq = 100;
239 fRFIntParms.tree_store = 10000000;
240 fRFIntParms.cat_store = 1000000;
241
242}
243
244////////////////////////////////////////////////////////////////////////////////
245/// write all files read by rf_go.exe
246
248{
249 WriteIntParms();
250 WriteRealParms();
251 WriteLx();
252 WriteProgram();
253 WriteVarNames();
254 if (fRFProgram==kRfTrain) WriteTrain();
255 if (fRFProgram==kRfPredict) WriteTest();
256 if (fRFProgram==kRfVarimp) WriteRealVarImp();
257 return kTRUE;
258}
259
260////////////////////////////////////////////////////////////////////////////////
261/// write int params file
262
264{
265 std::ofstream f;
266 if (!OpenRFile("intparms",f)) return kFALSE;
267 WriteInt(f,&fRFIntParms.mode,sizeof(fRFIntParms)/sizeof(Int_t));
268 return kTRUE;
269}
270
271////////////////////////////////////////////////////////////////////////////////
272/// write int params file
273
275{
276 std::ofstream f;
277 if (!OpenRFile("realparms",f)) return kFALSE;
278 WriteFloat(f,&fRFRealParms.xmiss,sizeof(fRFRealParms)/sizeof(Float_t));
279 return kTRUE;
280}
281
282////////////////////////////////////////////////////////////////////////////////
283/// Save input variable mask
284///
285/// If the lx vector size is not the same as inputVars,
286/// resize it and fill it with 1
287/// NOTE: Always set all to 1
288/// if (fRFLx.size() != m_inputVars->size()) {
289
291{
292 fRFLx.clear();
293 fRFLx.resize(fMethodRuleFit->DataInfo().GetNVariables(),1);
294 // }
295 std::ofstream f;
296 if (!OpenRFile("lx",f)) return kFALSE;
297 WriteInt(f,&fRFLx[0],fRFLx.size());
298 return kTRUE;
299}
300
301////////////////////////////////////////////////////////////////////////////////
302/// write command to rf_go.exe
303
305{
306 std::ofstream f;
307 if (!OpenRFile("program",f)) return kFALSE;
309 switch (fRFProgram) {
310 case kRfTrain:
311 program = "rulefit";
312 break;
313 case kRfPredict:
314 program = "rulefit_pred";
315 break;
316 // calculate variable importance
317 case kRfVarimp:
318 program = "varimp";
319 break;
320 default:
321 fRFProgram = kRfTrain;
322 program="rulefit";
323 break;
324 }
325 f << program;
326 return kTRUE;
327}
328
329////////////////////////////////////////////////////////////////////////////////
330/// write the minimum importance to be considered
331
333{
334 std::ofstream f;
335 if (!OpenRFile("realvarimp",f)) return kFALSE;
336 Float_t rvp[2];
337 rvp[0] = 0.0; // Mode: see varimp() in rulefit.r
338 rvp[1] = 0.0; // Minimum importance considered (1 is max)
339 WriteFloat(f,&rvp[0],2);
340 return kTRUE;
341}
342
343////////////////////////////////////////////////////////////////////////////////
344/// written by rf_go.exe; write rulefit output (rfout)
345
347{
348 fLogger << kWARNING << "WriteRfOut is not yet implemented" << Endl;
349 return kTRUE;
350}
351
352////////////////////////////////////////////////////////////////////////////////
353/// written by rf_go.exe; write rulefit status
354
356{
357 fLogger << kWARNING << "WriteRfStatus is not yet implemented" << Endl;
358 return kTRUE;
359}
360
361////////////////////////////////////////////////////////////////////////////////
362/// written by rf_go.exe (NOTE:Format unknown!)
363
365{
366 fLogger << kWARNING << "WriteRuleFitMod is not yet implemented" << Endl;
367 return kTRUE;
368}
369
370////////////////////////////////////////////////////////////////////////////////
371/// written by rf_go.exe (NOTE: format unknown!)
372
374{
375 fLogger << kWARNING << "WriteRuleFitSum is not yet implemented" << Endl;
376 return kTRUE;
377}
378
379////////////////////////////////////////////////////////////////////////////////
380/// write training data, column wise
381
383{
384 std::ofstream fx;
385 std::ofstream fy;
386 std::ofstream fw;
387 //
388 if (!OpenRFile("train.x",fx)) return kFALSE;
389 if (!OpenRFile("train.y",fy)) return kFALSE;
390 if (!OpenRFile("train.w",fw)) return kFALSE;
391 //
392 Float_t x,y,w;
393 //
394 // The loop order cannot be changed.
395 // The data is stored <var1(eve1), var1(eve2), ...var1(eveN), var2(eve1),....
396 //
397 for (UInt_t ivar=0; ivar<fMethodRuleFit->DataInfo().GetNVariables(); ivar++) {
398 for (Int_t ievt=0;ievt<fMethodRuleFit->Data()->GetNTrainingEvents(); ievt++) {
399 const Event * ev = fMethodRuleFit->GetTrainingEvent(ievt);
400 x = ev->GetValue(ivar);
401 WriteFloat(fx,&x,1);
402 if (ivar==0) {
403 w = ev->GetWeight();
404 y = fMethodRuleFit->DataInfo().IsSignal(ev)? 1.0 : -1.0;
405 WriteFloat(fy,&y,1);
406 WriteFloat(fw,&w,1);
407 }
408 }
409 }
410 fLogger << kINFO << "Number of training data written: " << fMethodRuleFit->Data()->GetNTrainingEvents() << Endl;
411 return kTRUE;
412}
413
414////////////////////////////////////////////////////////////////////////////////
415/// Write test data
416
418{
419 fMethodRuleFit->Data()->SetCurrentType(Types::kTesting);
420
421 std::ofstream f;
422 //
423 if (!OpenRFile("test.x",f)) return kFALSE;
424 //
425 Float_t vf;
427 //
428 neve = static_cast<Float_t>(fMethodRuleFit->Data()->GetNEvents());
429 WriteFloat(f,&neve,1);
430 // Test data is saved as:
431 // 0 : <N> num of events, type float, 4 bytes
432 // 1-N : First variable for all events
433 // N+1-2N : Second variable...
434 // ...
435 for (UInt_t ivar=0; ivar<fMethodRuleFit->DataInfo().GetNVariables(); ivar++) {
436 for (Int_t ievt=0;ievt<fMethodRuleFit->Data()->GetNEvents(); ievt++) {
437 vf = fMethodRuleFit->GetEvent(ievt)->GetValue(ivar);
438 WriteFloat(f,&vf,1);
439 }
440 }
441 fLogger << kINFO << "Number of test data written: " << fMethodRuleFit->Data()->GetNEvents() << Endl;
442 //
443 return kTRUE;
444}
445
446////////////////////////////////////////////////////////////////////////////////
447/// write variable names, ascii
448
450{
451 std::ofstream f;
452 if (!OpenRFile("varnames",f)) return kFALSE;
453 for (UInt_t ivar=0; ivar<fMethodRuleFit->DataInfo().GetNVariables(); ivar++) {
454 f << fMethodRuleFit->DataInfo().GetVariableInfo(ivar).GetExpression() << '\n';
455 }
456 return kTRUE;
457}
458
459////////////////////////////////////////////////////////////////////////////////
460
462
463{
464 // written by rf_go.exe
465 fLogger << kWARNING << "WriteVarImp is not yet implemented" << Endl;
466 return kTRUE;
467}
468
469////////////////////////////////////////////////////////////////////////////////
470/// written by rf_go.exe
471
473{
474 fLogger << kWARNING << "WriteYhat is not yet implemented" << Endl;
475 return kTRUE;
476}
477
478////////////////////////////////////////////////////////////////////////////////
479/// read the score
480
482{
483 fRFYhat.clear();
484 //
485 std::ifstream f;
486 if (!OpenRFile("yhat",f)) return kFALSE;
487 Int_t neve;
489 ReadFloat(f,&xval,1);
490 neve = static_cast<Int_t>(xval);
491 if (neve!=fMethodRuleFit->Data()->GetNTestEvents()) {
492 fLogger << kWARNING << "Inconsistent size of yhat file and test tree!" << Endl;
493 fLogger << kWARNING << "neve = " << neve << " , tree = " << fMethodRuleFit->Data()->GetNTestEvents() << Endl;
494 return kFALSE;
495 }
496 for (Int_t ievt=0; ievt<fMethodRuleFit->Data()->GetNTestEvents(); ievt++) {
497 ReadFloat(f,&xval,1);
498 fRFYhat.push_back(xval);
499 }
500 return kTRUE;
501}
502
503////////////////////////////////////////////////////////////////////////////////
504/// read variable importance
505
507{
508 fRFVarImp.clear();
509 //
510 std::ifstream f;
511 if (!OpenRFile("varimp",f)) return kFALSE;
512 UInt_t nvars;
514 Float_t xmax=1.0;
515 nvars=fMethodRuleFit->DataInfo().GetNVariables();
516 //
517 // First read all importances
518 //
519 for (UInt_t ivar=0; ivar<nvars; ivar++) {
520 ReadFloat(f,&xval,1);
521 if (ivar==0) {
522 xmax=xval;
523 } else {
524 if (xval>xmax) xmax=xval;
525 }
526 fRFVarImp.push_back(xval);
527 }
528 //
529 // Read the indices.
530 // They are saved as float (!) by rf_go.exe.
531 //
532 for (UInt_t ivar=0; ivar<nvars; ivar++) {
533 fRFVarImp[ivar] = fRFVarImp[ivar]/xmax;
534 ReadFloat(f,&xval,1);
535 fRFVarImpInd.push_back(Int_t(xval)-1);
536 }
537 return kTRUE;
538}
539
540////////////////////////////////////////////////////////////////////////////////
541/// read model from rulefit.sum
542
544{
545 fRFVarImp.clear();
546 //
547 fLogger << kVERBOSE << "Reading RuleFit summary file" << Endl;
548 std::ifstream f;
549 if (!OpenRFile("rulefit.sum",f)) return kFALSE;
550 Int_t nrules=0;
551 Int_t nvars=0;
552 Int_t nvarsOpt=0;
553 Int_t dumI;
556 Double_t impref=-1.0;
558
559 fRuleFit->GetRuleEnsemblePtr()->SetAverageRuleSigma(0.4); // value used by Friedmans RuleFit
560 //
561 //--------------------------------------------
562 // first read rulefit.sum header
563 //--------------------------------------------
564 // line type val descr
565 // 0 <int> 86 N(rules)x2
566 // 1 <int> 155 ???
567 // 2 <int> 1 ???
568 // 3 <int> 1916 ???
569 // 4 <int> 2 N(vars) ?
570 // 5 <int> 2 N(vars) ?
571 // 6 <float> 9e+30 xmiss
572 // 7 <float> 1.1e-1 a0 (model offset)
573 //--------------------------------------------
574 //
575 // NOTE: a model without any rules, will look like
576 // for the first four lines:
577 //
578 // 0 1
579 // 1 1
580 // 2 1
581 // 3 0
582 //
583 // There will later be one block of dummy data for one rule.
584 // In order to catch this situation, some special checks are made below.
585 //
587 ReadInt(f,&nrules);
588 norules = (nrules==1);
589 ReadInt(f,&dumI);
590 norules = norules && (dumI==1);
591 ReadInt(f,&dumI);
592 norules = norules && (dumI==1);
593 ReadInt(f,&dumI);
594 norules = norules && (dumI==0);
595 if (nrules==0) norules=kTRUE; // this ugly construction is needed:(
596 if (norules) nrules = 0;
597 //
598 ReadInt(f,&nvars);
599 ReadInt(f,&nvarsOpt);
600 ReadFloat(f,&dumF);
601 ReadFloat(f,&offset);
602 fLogger << kDEBUG << "N(rules) = " << nrules << Endl;
603 fLogger << kDEBUG << "N(vars) = " << nvars << Endl;
604 fLogger << kDEBUG << "N(varsO) = " << nvarsOpt << Endl;
605 fLogger << kDEBUG << "xmiss = " << dumF << Endl;
606 fLogger << kDEBUG << "offset = " << offset << Endl;
607 if (nvars!=nvarsOpt) {
608 fLogger << kWARNING << "Format of rulefit.sum is ... weird?? Continuing but who knows how it will end...?" << Endl;
609 }
610 std::vector<Double_t> rfSupp;
611 std::vector<Double_t> rfCoef;
612 std::vector<Int_t> rfNcut;
613 std::vector<Rule *> rfRules;
614 if (norules) {
615 // if no rules, read 8 blocks of data
616 // this corresponds to one dummy rule
617 for (Int_t t=0; t<8; t++) {
618 ReadFloat(f,&dumF);
619 }
620 }
621 //
622 //--------------------------------------------
623 // read first part of rule info
624 //--------------------------------------------
625 //
626 // 8 <int> 10 ???
627 // 9 <float> 0.185 support
628 // 10 <float> 0.051 coefficient
629 // 11 <float> 2 num of cuts in rule
630 // 12 <float> 1 ??? not used by this interface
631 //
632 for (Int_t r=0; r<nrules; r++) {
633 ReadFloat(f,&dumF);
634 ReadFloat(f,&dumF);
635 rfSupp.push_back(dumF);
636 ReadFloat(f,&dumF);
637 rfCoef.push_back(dumF);
638 ReadFloat(f,&dumF);
639 rfNcut.push_back(static_cast<int>(dumF+0.5));
640 ReadFloat(f,&dumF);
641 //
642 }
643 //--------------------------------------------
644 // read second part of rule info
645 //--------------------------------------------
646 //
647 // Per range (cut):
648 // 0 <float> 1 varind
649 // 1 <float> -1.0 low
650 // 2 <float> 1.56 high
651 //
652
653 for (Int_t r=0; r<nrules; r++) {
657 Rule *rule = new Rule(fRuleFit->GetRuleEnsemblePtr());
658 rfRules.push_back( rule );
659 RuleCut *rfcut = new RuleCut();
660 rfcut->SetNvars(rfNcut[r]);
661 rule->SetRuleCut( rfcut );
662 // the below are set to default values since no info is
663 // available in rulefit.sum
664 rule->SetNorm(1.0);
665 rule->SetSupport(0);
666 rule->SetSSB(0.0);
667 rule->SetSSBNeve(0.0);
668 rule->SetImportanceRef(1.0);
669 rule->SetSSB(0.0);
670 rule->SetSSBNeve(0.0);
671 // set support etc
672 rule->SetSupport(rfSupp[r]);
673 rule->SetCoefficient(rfCoef[r]);
674 rule->CalcImportance();
675 imp = rule->GetImportance();
676 if (imp>impref) impref = imp; // find max importance
677 //
678 fLogger << kDEBUG << "Rule #" << r << " : " << nvars << Endl;
679 fLogger << kDEBUG << " support = " << rfSupp[r] << Endl;
680 fLogger << kDEBUG << " sigma = " << rule->GetSigma() << Endl;
681 fLogger << kDEBUG << " coeff = " << rfCoef[r] << Endl;
682 fLogger << kDEBUG << " N(cut) = " << rfNcut[r] << Endl;
683
684 for (Int_t c=0; c<rfNcut[r]; c++) {
685 ReadFloat(f,&dumF);
686 varind = static_cast<Int_t>(dumF+0.5)-1;
687 ReadFloat(f,&dumF);
688 xmin = static_cast<Double_t>(dumF);
689 ReadFloat(f,&dumF);
690 xmax = static_cast<Double_t>(dumF);
691 // create Rule HERE!
692 rfcut->SetSelector(c,varind);
693 rfcut->SetCutMin(c,xmin);
694 rfcut->SetCutMax(c,xmax);
695 // the following is not nice - this is however defined
696 // by the rulefit.sum format.
697 rfcut->SetCutDoMin(c,(xmin<-8.99e35 ? kFALSE:kTRUE));
698 rfcut->SetCutDoMax(c,(xmax> 8.99e35 ? kFALSE:kTRUE));
699 //
700 }
701 }
702 fRuleFit->GetRuleEnsemblePtr()->SetRules( rfRules );
703 fRuleFit->GetRuleEnsemblePtr()->SetOffset( offset );
704 //--------------------------------------------
705 // read second part of rule info
706 //--------------------------------------------
707 //
708 // Per linear term:
709 // 73 1 var index
710 // 74 -1.99594 min
711 // 75 1.99403 max
712 // 76 -0.000741858 ??? average ???
713 // 77 0.970935 std
714 // 78 0 coeff
715 //
716 std::vector<Int_t> varind;
717 std::vector<Double_t> xmin;
718 std::vector<Double_t> xmax;
719 std::vector<Double_t> average;
720 std::vector<Double_t> stdev;
721 std::vector<Double_t> norm;
722 std::vector<Double_t> coeff;
723 //
724 for (Int_t c=0; c<nvars; c++) {
725 ReadFloat(f,&dumF);
726 varind.push_back(static_cast<Int_t>(dumF+0.5)-1);
727 ReadFloat(f,&dumF);
728 xmin.push_back(static_cast<Double_t>(dumF));
729 ReadFloat(f,&dumF);
730 xmax.push_back(static_cast<Double_t>(dumF));
731 ReadFloat(f,&dumF);
732 average.push_back(static_cast<Double_t>(dumF));
733 ReadFloat(f,&dumF);
734 stdev.push_back(static_cast<Double_t>(dumF));
735 Double_t nv = fRuleFit->GetRuleEnsemblePtr()->CalcLinNorm(stdev.back());
736 norm.push_back(nv);
737 ReadFloat(f,&dumF);
738 coeff.push_back(dumF/nv); // save coefficient for normalised var
739 //
740 fLogger << kDEBUG << "Linear #" << c << Endl;
741 fLogger << kDEBUG << " varind = " << varind.back() << Endl;
742 fLogger << kDEBUG << " xmin = " << xmin.back() << Endl;
743 fLogger << kDEBUG << " xmax = " << xmax.back() << Endl;
744 fLogger << kDEBUG << " average = " << average.back() << Endl;
745 fLogger << kDEBUG << " stdev = " << stdev.back() << Endl;
746 fLogger << kDEBUG << " coeff = " << coeff.back() << Endl;
747 }
748 if (xmin.size()>0) {
749 fRuleFit->GetRuleEnsemblePtr()->SetLinCoefficients(coeff);
750 fRuleFit->GetRuleEnsemblePtr()->SetLinDM(xmin);
751 fRuleFit->GetRuleEnsemblePtr()->SetLinDP(xmax);
752 fRuleFit->GetRuleEnsemblePtr()->SetLinNorm(norm);
753 }
754 // fRuleFit->GetRuleEnsemblePtr()->CalcImportance();
755 imp = fRuleFit->GetRuleEnsemblePtr()->CalcLinImportance();
756 if (imp>impref) impref=imp;
757 fRuleFit->GetRuleEnsemblePtr()->SetImportanceRef(impref);
758 fRuleFit->GetRuleEnsemblePtr()->CleanupLinear(); // to fill fLinTermOK vector
759
760 fRuleFit->GetRuleEnsemblePtr()->CalcVarImportance();
761 // fRuleFit->GetRuleEnsemblePtr()->CalcRuleSupport();
762
763 fLogger << kDEBUG << "Reading model done" << Endl;
764 return kTRUE;
765}
766
767////////////////////////////////////////////////////////////////////////////////
768/// execute rf_go.exe
769
771{
773 TString cmd = "./rf_go.exe";
774 gSystem->cd(fRFWorkDir.Data());
775 int rval = gSystem->Exec(cmd.Data());
776 gSystem->cd(oldDir.Data());
777 return rval;
778}
#define f(i)
Definition RSha256.hxx:104
#define c(i)
Definition RSha256.hxx:101
int Int_t
Signed integer 4 bytes (int)
Definition RtypesCore.h:59
float Float_t
Float 4 bytes (float)
Definition RtypesCore.h:71
constexpr Bool_t kFALSE
Definition RtypesCore.h:108
constexpr Bool_t kTRUE
Definition RtypesCore.h:107
ROOT::Detail::TRangeCast< T, true > TRangeDynCast
TRangeDynCast is an adapter class that allows the typed iteration through a TCollection.
Option_t Option_t TPoint TPoint const char GetTextMagnitude GetFillStyle GetLineColor GetLineWidth GetMarkerStyle GetTextAlign GetTextColor GetTextSize void char Point_t Rectangle_t WindowAttributes_t Float_t Float_t Float_t Int_t Int_t UInt_t UInt_t Rectangle_t Int_t Int_t Window_t TString Int_t GCValues_t GetPrimarySelectionOwner GetDisplay GetScreen GetColormap GetNativeEvent const char const char dpyName wid window const char font_name cursor keysym reg const char only_if_exist regb h Point_t winding char text const char depth char const char Int_t count const char ColorStruct_t color const char Pixmap_t Pixmap_t PictureAttributes_t attr const char char ret_data h unsigned char height h offset
Option_t Option_t TPoint TPoint const char GetTextMagnitude GetFillStyle GetLineColor GetLineWidth GetMarkerStyle GetTextAlign GetTextColor GetTextSize void char Point_t Rectangle_t WindowAttributes_t Float_t r
float xmin
float xmax
R__EXTERN TSystem * gSystem
Definition TSystem.h:572
J Friedman's RuleFit method.
A class describing a 'rule cut'.
Definition RuleCut.h:36
void SetTestParms()
set the test params
Bool_t WriteRuleFitSum()
written by rf_go.exe (NOTE: format unknown!)
Bool_t WriteYhat()
written by rf_go.exe
Bool_t WriteAll()
write all files read by rf_go.exe
void ImportSetup()
import setup from MethodRuleFit
Bool_t WriteRfStatus()
written by rf_go.exe; write rulefit status
Bool_t WriteIntParms()
write int params file
void CheckRFWorkDir()
check if the rulefit work dir is properly setup.
Bool_t WriteProgram()
write command to rf_go.exe
Bool_t ReadModelSum()
read model from rulefit.sum
void SetRFWorkDir(const char *wdir)
set the directory containing rf_go.exe.
Bool_t ReadVarImp()
read variable importance
Bool_t WriteRuleFitMod()
written by rf_go.exe (NOTE:Format unknown!)
Bool_t WriteRfOut()
written by rf_go.exe; write rulefit output (rfout)
void InitRuleFit()
default initialisation SetRFWorkDir("./rulefit");
void FillRealParmsDef()
set default real params
Bool_t WriteVarNames()
write variable names, ascii
Bool_t WriteRealVarImp()
write the minimum importance to be considered
void FillIntParmsDef()
set default int params
void WelcomeMessage()
welcome message
Bool_t WriteTrain()
write training data, column wise
virtual ~RuleFitAPI()
destructor
Bool_t WriteRealParms()
write int params file
Bool_t WriteLx()
Save input variable mask.
Bool_t ReadYhat()
read the score
void HowtoSetupRF()
howto message
Bool_t WriteTest()
Write test data.
void SetTrainParms()
set the training parameters
Int_t RunRuleFit()
execute rf_go.exe
A class implementing various fits of rule ensembles.
Definition RuleFit.h:46
Implementation of a rule.
Definition Rule.h:50
Basic string class.
Definition TString.h:138
Bool_t cd(const char *path)
Definition TSystem.h:433
const char * pwd()
Definition TSystem.h:434
virtual Int_t Exec(const char *shellcmd)
Execute a command.
Definition TSystem.cxx:651
Double_t y[n]
Definition legend1.C:17
Double_t x[n]
Definition legend1.C:17
const Int_t n
Definition legend1.C:16
MsgLogger & Endl(MsgLogger &ml)
Definition MsgLogger.h:148