79 <<
"---------------------------------------------------------------------------\n"
80 <<
"- You are running the interface to Jerome Friedmans RuleFit(tm) code. -\n"
81 <<
"- For a full manual see the following web page: -\n"
83 <<
"- http://www-stat.stanford.edu/~jhf/R-RuleFit.html -\n"
85 <<
"---------------------------------------------------------------------------"
95 <<
"------------------------ RULEFIT-JF INTERFACE SETUP -----------------------\n"
97 <<
"1. Create a rulefit directory in your current work directory:\n"
98 <<
" mkdir " << fRFWorkDir <<
"\n\n"
99 <<
" the directory may be set using the option RuleFitDir\n"
101 <<
"2. Copy (or make a link) the file rf_go.exe into this directory\n"
103 <<
"The file can be obtained from Jerome Friedmans homepage (linux):\n"
104 <<
" wget http://www-stat.stanford.edu/~jhf/r-rulefit/linux/rf_go.exe\n"
106 <<
"Don't forget to do:\n"
107 <<
" chmod +x rf_go.exe\n"
109 <<
"For Windows download:\n"
110 <<
" http://www-stat.stanford.edu/~jhf/r-rulefit/windows/rf_go.exe\n"
112 <<
"NOTE: other platforms are not supported (see Friedmans homepage)\n"
114 <<
"---------------------------------------------------------------------------\n"
133 fRFIntParms.p = fMethodRuleFit->DataInfo().GetNVariables();
134 fRFIntParms.max_rules = fMethodRuleFit->GetRFNrules();
135 fRFIntParms.tree_size = fMethodRuleFit->GetRFNendnodes();
136 fRFIntParms.path_steps = fMethodRuleFit->GetGDNPathSteps();
138 fRFRealParms.path_inc = fMethodRuleFit->GetGDPathStep();
139 fRFRealParms.samp_fract = fMethodRuleFit->GetTreeEveFrac();
140 fRFRealParms.trim_qntl = fMethodRuleFit->GetLinQuantile();
141 fRFRealParms.conv_fac = fMethodRuleFit->GetGDErrScale();
143 if (fRuleFit->GetRuleEnsemblePtr()->DoOnlyLinear() )
144 fRFIntParms.lmode = kRfLinear;
145 else if (fRuleFit->GetRuleEnsemblePtr()->DoOnlyRules() )
146 fRFIntParms.lmode = kRfRules;
148 fRFIntParms.lmode = kRfBoth;
169 fLogger << kWARNING <<
"Must create a rulefit directory named : " << fRFWorkDir <<
Endl;
171 fLogger << kFATAL <<
"Setup failed - aborting!" <<
Endl;
176 fLogger << kWARNING <<
"No rf_go.exe file in directory : " << fRFWorkDir <<
Endl;
178 fLogger << kFATAL <<
"Setup failed - aborting!" <<
Endl;
191 Int_t n = fMethodRuleFit->Data()->GetNTrainingEvents();
194 fRFProgram = kRfTrain;
203 Int_t n = fMethodRuleFit->Data()->GetNTestEvents();
206 fRFProgram = kRfPredict;
214 fRFRealParms.xmiss = 9.0e30;
215 fRFRealParms.trim_qntl = 0.025;
216 fRFRealParms.huber = 0.8;
217 fRFRealParms.inter_supp = 3.0;
218 fRFRealParms.memory_par = 0.01;
219 fRFRealParms.samp_fract = 0.5;
220 fRFRealParms.path_inc = 0.01;
221 fRFRealParms.conv_fac = 1.1;
229 fRFIntParms.mode = (
int)kRfClass;
230 fRFIntParms.lmode = (
int)kRfBoth;
233 fRFIntParms.max_rules = 2000;
234 fRFIntParms.tree_size = 4;
235 fRFIntParms.path_speed = 2;
236 fRFIntParms.path_xval = 3;
237 fRFIntParms.path_steps = 50000;
238 fRFIntParms.path_testfreq = 100;
239 fRFIntParms.tree_store = 10000000;
240 fRFIntParms.cat_store = 1000000;
254 if (fRFProgram==kRfTrain) WriteTrain();
255 if (fRFProgram==kRfPredict) WriteTest();
256 if (fRFProgram==kRfVarimp) WriteRealVarImp();
266 if (!OpenRFile(
"intparms",
f))
return kFALSE;
267 WriteInt(
f,&fRFIntParms.mode,
sizeof(fRFIntParms)/
sizeof(
Int_t));
277 if (!OpenRFile(
"realparms",
f))
return kFALSE;
278 WriteFloat(
f,&fRFRealParms.xmiss,
sizeof(fRFRealParms)/
sizeof(
Float_t));
293 fRFLx.resize(fMethodRuleFit->DataInfo().GetNVariables(),1);
296 if (!OpenRFile(
"lx",
f))
return kFALSE;
297 WriteInt(
f,&fRFLx[0],fRFLx.size());
307 if (!OpenRFile(
"program",
f))
return kFALSE;
309 switch (fRFProgram) {
321 fRFProgram = kRfTrain;
335 if (!OpenRFile(
"realvarimp",
f))
return kFALSE;
339 WriteFloat(
f,&
rvp[0],2);
348 fLogger << kWARNING <<
"WriteRfOut is not yet implemented" <<
Endl;
357 fLogger << kWARNING <<
"WriteRfStatus is not yet implemented" <<
Endl;
366 fLogger << kWARNING <<
"WriteRuleFitMod is not yet implemented" <<
Endl;
375 fLogger << kWARNING <<
"WriteRuleFitSum is not yet implemented" <<
Endl;
388 if (!OpenRFile(
"train.x",
fx))
return kFALSE;
389 if (!OpenRFile(
"train.y",
fy))
return kFALSE;
390 if (!OpenRFile(
"train.w",
fw))
return kFALSE;
399 const Event *
ev = fMethodRuleFit->GetTrainingEvent(
ievt);
404 y = fMethodRuleFit->DataInfo().IsSignal(
ev)? 1.0 : -1.0;
410 fLogger << kINFO <<
"Number of training data written: " << fMethodRuleFit->Data()->GetNTrainingEvents() <<
Endl;
423 if (!OpenRFile(
"test.x",
f))
return kFALSE;
428 neve =
static_cast<Float_t>(fMethodRuleFit->Data()->GetNEvents());
429 WriteFloat(
f,&
neve,1);
437 vf = fMethodRuleFit->GetEvent(
ievt)->GetValue(
ivar);
441 fLogger << kINFO <<
"Number of test data written: " << fMethodRuleFit->Data()->GetNEvents() <<
Endl;
452 if (!OpenRFile(
"varnames",
f))
return kFALSE;
454 f << fMethodRuleFit->DataInfo().GetVariableInfo(
ivar).GetExpression() <<
'\n';
465 fLogger << kWARNING <<
"WriteVarImp is not yet implemented" <<
Endl;
474 fLogger << kWARNING <<
"WriteYhat is not yet implemented" <<
Endl;
486 if (!OpenRFile(
"yhat",
f))
return kFALSE;
489 ReadFloat(
f,&
xval,1);
491 if (
neve!=fMethodRuleFit->Data()->GetNTestEvents()) {
492 fLogger << kWARNING <<
"Inconsistent size of yhat file and test tree!" <<
Endl;
493 fLogger << kWARNING <<
"neve = " <<
neve <<
" , tree = " << fMethodRuleFit->Data()->GetNTestEvents() <<
Endl;
497 ReadFloat(
f,&
xval,1);
498 fRFYhat.push_back(
xval);
511 if (!OpenRFile(
"varimp",
f))
return kFALSE;
515 nvars=fMethodRuleFit->DataInfo().GetNVariables();
520 ReadFloat(
f,&
xval,1);
526 fRFVarImp.push_back(
xval);
534 ReadFloat(
f,&
xval,1);
547 fLogger << kVERBOSE <<
"Reading RuleFit summary file" <<
Endl;
549 if (!OpenRFile(
"rulefit.sum",
f))
return kFALSE;
559 fRuleFit->GetRuleEnsemblePtr()->SetAverageRuleSigma(0.4);
602 fLogger << kDEBUG <<
"N(rules) = " <<
nrules <<
Endl;
603 fLogger << kDEBUG <<
"N(vars) = " << nvars <<
Endl;
605 fLogger << kDEBUG <<
"xmiss = " <<
dumF <<
Endl;
606 fLogger << kDEBUG <<
"offset = " <<
offset <<
Endl;
608 fLogger << kWARNING <<
"Format of rulefit.sum is ... weird?? Continuing but who knows how it will end...?" <<
Endl;
610 std::vector<Double_t>
rfSupp;
611 std::vector<Double_t>
rfCoef;
612 std::vector<Int_t>
rfNcut;
617 for (
Int_t t=0; t<8; t++) {
667 rule->SetSSBNeve(0.0);
668 rule->SetImportanceRef(1.0);
670 rule->SetSSBNeve(0.0);
674 rule->CalcImportance();
678 fLogger << kDEBUG <<
"Rule #" <<
r <<
" : " << nvars <<
Endl;
679 fLogger << kDEBUG <<
" support = " <<
rfSupp[
r] <<
Endl;
680 fLogger << kDEBUG <<
" sigma = " <<
rule->GetSigma() <<
Endl;
681 fLogger << kDEBUG <<
" coeff = " <<
rfCoef[
r] <<
Endl;
682 fLogger << kDEBUG <<
" N(cut) = " <<
rfNcut[
r] <<
Endl;
702 fRuleFit->GetRuleEnsemblePtr()->SetRules(
rfRules );
703 fRuleFit->GetRuleEnsemblePtr()->SetOffset(
offset );
716 std::vector<Int_t>
varind;
717 std::vector<Double_t>
xmin;
718 std::vector<Double_t>
xmax;
719 std::vector<Double_t> average;
720 std::vector<Double_t>
stdev;
721 std::vector<Double_t>
norm;
722 std::vector<Double_t>
coeff;
735 Double_t nv = fRuleFit->GetRuleEnsemblePtr()->CalcLinNorm(
stdev.back());
740 fLogger << kDEBUG <<
"Linear #" <<
c <<
Endl;
741 fLogger << kDEBUG <<
" varind = " <<
varind.back() <<
Endl;
742 fLogger << kDEBUG <<
" xmin = " <<
xmin.back() <<
Endl;
743 fLogger << kDEBUG <<
" xmax = " <<
xmax.back() <<
Endl;
744 fLogger << kDEBUG <<
" average = " << average.back() <<
Endl;
745 fLogger << kDEBUG <<
" stdev = " <<
stdev.back() <<
Endl;
746 fLogger << kDEBUG <<
" coeff = " <<
coeff.back() <<
Endl;
749 fRuleFit->GetRuleEnsemblePtr()->SetLinCoefficients(
coeff);
750 fRuleFit->GetRuleEnsemblePtr()->SetLinDM(
xmin);
751 fRuleFit->GetRuleEnsemblePtr()->SetLinDP(
xmax);
752 fRuleFit->GetRuleEnsemblePtr()->SetLinNorm(
norm);
755 imp = fRuleFit->GetRuleEnsemblePtr()->CalcLinImportance();
757 fRuleFit->GetRuleEnsemblePtr()->SetImportanceRef(
impref);
758 fRuleFit->GetRuleEnsemblePtr()->CleanupLinear();
760 fRuleFit->GetRuleEnsemblePtr()->CalcVarImportance();
763 fLogger << kDEBUG <<
"Reading model done" <<
Endl;
int Int_t
Signed integer 4 bytes (int)
float Float_t
Float 4 bytes (float)
ROOT::Detail::TRangeCast< T, true > TRangeDynCast
TRangeDynCast is an adapter class that allows the typed iteration through a TCollection.
Option_t Option_t TPoint TPoint const char GetTextMagnitude GetFillStyle GetLineColor GetLineWidth GetMarkerStyle GetTextAlign GetTextColor GetTextSize void char Point_t Rectangle_t WindowAttributes_t Float_t Float_t Float_t Int_t Int_t UInt_t UInt_t Rectangle_t Int_t Int_t Window_t TString Int_t GCValues_t GetPrimarySelectionOwner GetDisplay GetScreen GetColormap GetNativeEvent const char const char dpyName wid window const char font_name cursor keysym reg const char only_if_exist regb h Point_t winding char text const char depth char const char Int_t count const char ColorStruct_t color const char Pixmap_t Pixmap_t PictureAttributes_t attr const char char ret_data h unsigned char height h offset
Option_t Option_t TPoint TPoint const char GetTextMagnitude GetFillStyle GetLineColor GetLineWidth GetMarkerStyle GetTextAlign GetTextColor GetTextSize void char Point_t Rectangle_t WindowAttributes_t Float_t r
R__EXTERN TSystem * gSystem
J Friedman's RuleFit method.
A class describing a 'rule cut'.
void SetTestParms()
set the test params
Bool_t WriteRuleFitSum()
written by rf_go.exe (NOTE: format unknown!)
Bool_t WriteYhat()
written by rf_go.exe
Bool_t WriteAll()
write all files read by rf_go.exe
void ImportSetup()
import setup from MethodRuleFit
Bool_t WriteRfStatus()
written by rf_go.exe; write rulefit status
Bool_t WriteIntParms()
write int params file
void CheckRFWorkDir()
check if the rulefit work dir is properly setup.
Bool_t WriteProgram()
write command to rf_go.exe
Bool_t ReadModelSum()
read model from rulefit.sum
void SetRFWorkDir(const char *wdir)
set the directory containing rf_go.exe.
Bool_t ReadVarImp()
read variable importance
Bool_t WriteRuleFitMod()
written by rf_go.exe (NOTE:Format unknown!)
Bool_t WriteRfOut()
written by rf_go.exe; write rulefit output (rfout)
void InitRuleFit()
default initialisation SetRFWorkDir("./rulefit");
void FillRealParmsDef()
set default real params
Bool_t WriteVarNames()
write variable names, ascii
Bool_t WriteRealVarImp()
write the minimum importance to be considered
void FillIntParmsDef()
set default int params
void WelcomeMessage()
welcome message
Bool_t WriteTrain()
write training data, column wise
virtual ~RuleFitAPI()
destructor
Bool_t WriteRealParms()
write int params file
Bool_t WriteLx()
Save input variable mask.
Bool_t ReadYhat()
read the score
void HowtoSetupRF()
howto message
Bool_t WriteTest()
Write test data.
void SetTrainParms()
set the training parameters
Int_t RunRuleFit()
execute rf_go.exe
A class implementing various fits of rule ensembles.
Implementation of a rule.
Bool_t cd(const char *path)
virtual Int_t Exec(const char *shellcmd)
Execute a command.
MsgLogger & Endl(MsgLogger &ml)