50                              EMsgType minType = kINFO ) :
 
   51fMethodRuleFit(rfbase),
 
   54   fLogger(
"RuleFitAPI",minType)
 
   80           << 
"---------------------------------------------------------------------------\n" 
   81           << 
"-   You are running the interface to Jerome Friedmans RuleFit(tm) code.   -\n" 
   82           << 
"-   For a full manual see the following web page:                         -\n" 
   84           << 
"-        http://www-stat.stanford.edu/~jhf/R-RuleFit.html                 -\n" 
   86           << 
"---------------------------------------------------------------------------" 
   96           << 
"------------------------ RULEFIT-JF INTERFACE SETUP -----------------------\n" 
   98           << 
"1. Create a rulefit directory in your current work directory:\n" 
   99           << 
"       mkdir " << fRFWorkDir << 
"\n\n" 
  100           << 
"   the directory may be set using the option RuleFitDir\n" 
  102           << 
"2. Copy (or make a link) the file rf_go.exe into this directory\n" 
  104           << 
"The file can be obtained from Jerome Friedmans homepage (linux):\n" 
  105           << 
"   wget http://www-stat.stanford.edu/~jhf/r-rulefit/linux/rf_go.exe\n" 
  107           << 
"Don't forget to do:\n" 
  108           << 
"   chmod +x rf_go.exe\n" 
  110           << 
"For Windows download:\n" 
  111           << 
"   http://www-stat.stanford.edu/~jhf/r-rulefit/windows/rf_go.exe\n" 
  113           << 
"NOTE: other platforms are not supported (see Friedmans homepage)\n" 
  115           << 
"---------------------------------------------------------------------------\n" 
  134   fRFIntParms.p            = fMethodRuleFit->DataInfo().GetNVariables();
 
  135   fRFIntParms.max_rules    = fMethodRuleFit->GetRFNrules();
 
  136   fRFIntParms.tree_size    = fMethodRuleFit->GetRFNendnodes();
 
  137   fRFIntParms.path_steps   = fMethodRuleFit->GetGDNPathSteps();
 
  139   fRFRealParms.path_inc    = fMethodRuleFit->GetGDPathStep();
 
  140   fRFRealParms.samp_fract  = fMethodRuleFit->GetTreeEveFrac();
 
  141   fRFRealParms.trim_qntl   = fMethodRuleFit->GetLinQuantile();
 
  142   fRFRealParms.conv_fac    = fMethodRuleFit->GetGDErrScale();
 
  144   if      (fRuleFit->GetRuleEnsemblePtr()->DoOnlyLinear() )
 
  145      fRFIntParms.lmode = kRfLinear;
 
  146   else if (fRuleFit->GetRuleEnsemblePtr()->DoOnlyRules() )
 
  147      fRFIntParms.lmode = kRfRules;
 
  149      fRFIntParms.lmode = kRfBoth;
 
  170      fLogger << kWARNING << 
"Must create a rulefit directory named : " << fRFWorkDir << 
Endl;
 
  172      fLogger << kFATAL << 
"Setup failed - aborting!" << 
Endl;
 
  175   FILE *
f = fopen(
"rf_go.exe",
"r");
 
  177      fLogger << kWARNING << 
"No rf_go.exe file in directory : " << fRFWorkDir << 
Endl;
 
  179      fLogger << kFATAL << 
"Setup failed - aborting!" << 
Endl;
 
  192   Int_t    n    = fMethodRuleFit->Data()->GetNTrainingEvents();
 
  195   fRFProgram    = kRfTrain;
 
  204   Int_t    n    = fMethodRuleFit->Data()->GetNTestEvents();
 
  207   fRFProgram    = kRfPredict;
 
  215   fRFRealParms.xmiss       = 9.0e30;
 
  216   fRFRealParms.trim_qntl   = 0.025;
 
  217   fRFRealParms.huber       = 0.8;
 
  218   fRFRealParms.inter_supp  = 3.0;
 
  219   fRFRealParms.memory_par  = 0.01;
 
  220   fRFRealParms.samp_fract  = 0.5; 
 
  221   fRFRealParms.path_inc    = 0.01;
 
  222   fRFRealParms.conv_fac    = 1.1;
 
  230   fRFIntParms.mode           = (
int)kRfClass;
 
  231   fRFIntParms.lmode          = (
int)kRfBoth;
 
  234   fRFIntParms.max_rules      = 2000;
 
  235   fRFIntParms.tree_size      = 4;
 
  236   fRFIntParms.path_speed     = 2;
 
  237   fRFIntParms.path_xval      = 3;
 
  238   fRFIntParms.path_steps     = 50000;
 
  239   fRFIntParms.path_testfreq  = 100;
 
  240   fRFIntParms.tree_store     = 10000000;
 
  241   fRFIntParms.cat_store      = 1000000;
 
  255   if (fRFProgram==kRfTrain)   WriteTrain();
 
  256   if (fRFProgram==kRfPredict) WriteTest();
 
  257   if (fRFProgram==kRfVarimp)  WriteRealVarImp();
 
  267   if (!OpenRFile(
"intparms",
f)) 
return kFALSE;
 
  268   WriteInt(
f,&fRFIntParms.mode,
sizeof(fRFIntParms)/
sizeof(
Int_t));
 
  278   if (!OpenRFile(
"realparms",
f)) 
return kFALSE;
 
  279   WriteFloat(
f,&fRFRealParms.xmiss,
sizeof(fRFRealParms)/
sizeof(
Float_t));
 
  294   fRFLx.resize(fMethodRuleFit->DataInfo().GetNVariables(),1);
 
  297   if (!OpenRFile(
"lx",
f)) 
return kFALSE;
 
  298   WriteInt(
f,&fRFLx[0],fRFLx.size());
 
  308   if (!OpenRFile(
"program",
f)) 
return kFALSE;
 
  310   switch (fRFProgram) {
 
  315      program = 
"rulefit_pred";
 
  322      fRFProgram = kRfTrain;
 
  336   if (!OpenRFile(
"realvarimp",
f)) 
return kFALSE;
 
  340   WriteFloat(
f,&rvp[0],2);
 
  349   fLogger << kWARNING << 
"WriteRfOut is not yet implemented" << 
Endl;
 
  358   fLogger << kWARNING << 
"WriteRfStatus is not yet implemented" << 
Endl;
 
  367   fLogger << kWARNING << 
"WriteRuleFitMod is not yet implemented" << 
Endl;
 
  376   fLogger << kWARNING << 
"WriteRuleFitSum is not yet implemented" << 
Endl;
 
  389   if (!OpenRFile(
"train.x",fx)) 
return kFALSE;
 
  390   if (!OpenRFile(
"train.y",fy)) 
return kFALSE;
 
  391   if (!OpenRFile(
"train.w",fw)) 
return kFALSE;
 
  398   for (
UInt_t ivar=0; ivar<fMethodRuleFit->DataInfo().GetNVariables(); ivar++) {
 
  399      for (
Int_t ievt=0;ievt<fMethodRuleFit->Data()->GetNTrainingEvents(); ievt++) {
 
  400         const Event * ev = fMethodRuleFit->GetTrainingEvent(ievt);
 
  405            y = fMethodRuleFit->DataInfo().IsSignal(ev)? 1.0 : -1.0;
 
  411   fLogger << kINFO << 
"Number of training data written: " << fMethodRuleFit->Data()->GetNTrainingEvents() << 
Endl;
 
  424   if (!OpenRFile(
"test.x",
f)) 
return kFALSE;
 
  429   neve = 
static_cast<Float_t>(fMethodRuleFit->Data()->GetNEvents());
 
  430   WriteFloat(
f,&neve,1);
 
  436   for (
UInt_t ivar=0; ivar<fMethodRuleFit->DataInfo().GetNVariables(); ivar++) {
 
  437      for (
Int_t ievt=0;ievt<fMethodRuleFit->Data()->GetNEvents(); ievt++) {
 
  438         vf =   fMethodRuleFit->GetEvent(ievt)->GetValue(ivar);
 
  442   fLogger << kINFO << 
"Number of test data written: " << fMethodRuleFit->Data()->GetNEvents() << 
Endl;
 
  453   if (!OpenRFile(
"varnames",
f)) 
return kFALSE;
 
  454   for (
UInt_t ivar=0; ivar<fMethodRuleFit->DataInfo().GetNVariables(); ivar++) {
 
  455      f << fMethodRuleFit->DataInfo().GetVariableInfo(ivar).GetExpression() << 
'\n';
 
  466   fLogger << kWARNING << 
"WriteVarImp is not yet implemented" << 
Endl;
 
  475   fLogger << kWARNING << 
"WriteYhat is not yet implemented" << 
Endl;
 
  487   if (!OpenRFile(
"yhat",
f)) 
return kFALSE;
 
  490   ReadFloat(
f,&xval,1);
 
  491   neve = 
static_cast<Int_t>(xval);
 
  492   if (neve!=fMethodRuleFit->Data()->GetNTestEvents()) {
 
  493      fLogger << kWARNING << 
"Inconsistent size of yhat file and test tree!" << 
Endl;
 
  494      fLogger << kWARNING << 
"neve = " << neve << 
" , tree = " << fMethodRuleFit->Data()->GetNTestEvents() << 
Endl;
 
  497   for (
Int_t ievt=0; ievt<fMethodRuleFit->Data()->GetNTestEvents(); ievt++) {
 
  498      ReadFloat(
f,&xval,1);
 
  499      fRFYhat.push_back(xval);
 
  512   if (!OpenRFile(
"varimp",
f)) 
return kFALSE;
 
  516   nvars=fMethodRuleFit->DataInfo().GetNVariables();
 
  520   for (
UInt_t ivar=0; ivar<nvars; ivar++) {
 
  521      ReadFloat(
f,&xval,1);
 
  527      fRFVarImp.push_back(xval);
 
  533   for (
UInt_t ivar=0; ivar<nvars; ivar++) {
 
  534      fRFVarImp[ivar] = fRFVarImp[ivar]/
xmax;
 
  535      ReadFloat(
f,&xval,1);
 
  536      fRFVarImpInd.push_back(
Int_t(xval)-1);
 
  548   fLogger << kVERBOSE << 
"Reading RuleFit summary file" << 
Endl;
 
  550   if (!OpenRFile(
"rulefit.sum",
f)) 
return kFALSE;
 
  560   fRuleFit->GetRuleEnsemblePtr()->SetAverageRuleSigma(0.4); 
 
  589   norules = (nrules==1);
 
  591   norules = norules && (dumI==1);
 
  593   norules = norules && (dumI==1);
 
  595   norules = norules && (dumI==0);
 
  596   if (nrules==0) norules=
kTRUE; 
 
  597   if (norules) nrules = 0;
 
  600   ReadInt(
f,&nvarsOpt);
 
  603   fLogger << kDEBUG << 
"N(rules) = " << nrules   << 
Endl;
 
  604   fLogger << kDEBUG << 
"N(vars)  = " << nvars    << 
Endl;
 
  605   fLogger << kDEBUG << 
"N(varsO) = " << nvarsOpt << 
Endl;
 
  606   fLogger << kDEBUG << 
"xmiss    = " << dumF     << 
Endl;
 
  607   fLogger << kDEBUG << 
"offset   = " << 
offset   << 
Endl;
 
  608   if (nvars!=nvarsOpt) {
 
  609      fLogger << kWARNING << 
"Format of rulefit.sum is ... weird?? Continuing but who knows how it will end...?" << 
Endl;
 
  611   std::vector<Double_t> rfSupp;
 
  612   std::vector<Double_t> rfCoef;
 
  613   std::vector<Int_t>    rfNcut;
 
  614   std::vector<Rule *>   rfRules;
 
  618      for (
Int_t t=0; t<8; t++) {
 
  636      rfSupp.push_back(dumF);
 
  638      rfCoef.push_back(dumF);
 
  640      rfNcut.push_back(
static_cast<int>(dumF+0.5));
 
  658      Rule *rule = 
new Rule(fRuleFit->GetRuleEnsemblePtr());
 
  659      rfRules.push_back( rule );
 
  677      if (imp>impref) impref = imp; 
 
  679      fLogger << kDEBUG << 
"Rule #" << 
r << 
" : " << nvars << 
Endl;
 
  680      fLogger << kDEBUG << 
"  support  = " << rfSupp[
r] << 
Endl;
 
  681      fLogger << kDEBUG << 
"  sigma    = " << rule->
GetSigma() << 
Endl;
 
  682      fLogger << kDEBUG << 
"  coeff    = " << rfCoef[
r] << 
Endl;
 
  683      fLogger << kDEBUG << 
"  N(cut)   = " << rfNcut[
r] << 
Endl;
 
  687         varind = 
static_cast<Int_t>(dumF+0.5)-1;
 
  703   fRuleFit->GetRuleEnsemblePtr()->SetRules( rfRules );
 
  704   fRuleFit->GetRuleEnsemblePtr()->SetOffset( 
offset );
 
  717   std::vector<Int_t>    varind;
 
  718   std::vector<Double_t> 
xmin;
 
  719   std::vector<Double_t> 
xmax;
 
  720   std::vector<Double_t> average;
 
  721   std::vector<Double_t> stdev;
 
  722   std::vector<Double_t> norm;
 
  723   std::vector<Double_t> coeff;
 
  727      varind.push_back(
static_cast<Int_t>(dumF+0.5)-1);
 
  733      average.push_back(
static_cast<Double_t>(dumF));
 
  735      stdev.push_back(
static_cast<Double_t>(dumF));
 
  736      Double_t nv = fRuleFit->GetRuleEnsemblePtr()->CalcLinNorm(stdev.back());
 
  739      coeff.push_back(dumF/nv); 
 
  741      fLogger << kDEBUG << 
"Linear #" << 
c << 
Endl;
 
  742      fLogger << kDEBUG << 
"  varind   = " << varind.back()  << 
Endl;
 
  743      fLogger << kDEBUG << 
"  xmin     = " << 
xmin.back()    << 
Endl;
 
  744      fLogger << kDEBUG << 
"  xmax     = " << 
xmax.back()    << 
Endl;
 
  745      fLogger << kDEBUG << 
"  average  = " << average.back() << 
Endl;
 
  746      fLogger << kDEBUG << 
"  stdev    = " << stdev.back()  << 
Endl;
 
  747      fLogger << kDEBUG << 
"  coeff    = " << coeff.back()  << 
Endl;
 
  750      fRuleFit->GetRuleEnsemblePtr()->SetLinCoefficients(coeff);
 
  751      fRuleFit->GetRuleEnsemblePtr()->SetLinDM(
xmin);
 
  752      fRuleFit->GetRuleEnsemblePtr()->SetLinDP(
xmax);
 
  753      fRuleFit->GetRuleEnsemblePtr()->SetLinNorm(norm);
 
  756   imp = fRuleFit->GetRuleEnsemblePtr()->CalcLinImportance();
 
  757   if (imp>impref) impref=imp;
 
  758   fRuleFit->GetRuleEnsemblePtr()->SetImportanceRef(impref);
 
  759   fRuleFit->GetRuleEnsemblePtr()->CleanupLinear(); 
 
  761   fRuleFit->GetRuleEnsemblePtr()->CalcVarImportance();
 
  764   fLogger << kDEBUG << 
"Reading model done" << 
Endl;
 
Option_t Option_t TPoint TPoint const char GetTextMagnitude GetFillStyle GetLineColor GetLineWidth GetMarkerStyle GetTextAlign GetTextColor GetTextSize void char Point_t Rectangle_t WindowAttributes_t Float_t Float_t Float_t Int_t Int_t UInt_t UInt_t Rectangle_t Int_t Int_t Window_t TString Int_t GCValues_t GetPrimarySelectionOwner GetDisplay GetScreen GetColormap GetNativeEvent const char const char dpyName wid window const char font_name cursor keysym reg const char only_if_exist regb h Point_t winding char text const char depth char const char Int_t count const char ColorStruct_t color const char Pixmap_t Pixmap_t PictureAttributes_t attr const char char ret_data h unsigned char height h offset
 
Option_t Option_t TPoint TPoint const char GetTextMagnitude GetFillStyle GetLineColor GetLineWidth GetMarkerStyle GetTextAlign GetTextColor GetTextSize void char Point_t Rectangle_t WindowAttributes_t Float_t r
 
R__EXTERN TSystem * gSystem
 
Float_t GetValue(UInt_t ivar) const
return value of i'th variable
 
Double_t GetWeight() const
return the event weight - depending on whether the flag IgnoreNegWeightsInTraining is or not.
 
J Friedman's RuleFit method.
 
const TString GetRFWorkDir() const
 
A class describing a 'rule cut'.
 
void SetSelector(Int_t i, UInt_t s)
 
void SetCutDoMin(Int_t i, Bool_t v)
 
void SetCutMin(Int_t i, Double_t v)
 
void SetCutMax(Int_t i, Double_t v)
 
void SetCutDoMax(Int_t i, Bool_t v)
 
J Friedman's RuleFit method.
 
void SetTestParms()
set the test params
 
Bool_t WriteRuleFitSum()
written by rf_go.exe (NOTE: format unknown!)
 
Bool_t WriteYhat()
written by rf_go.exe
 
Bool_t WriteAll()
write all files read by rf_go.exe
 
void ImportSetup()
import setup from MethodRuleFit
 
Bool_t WriteRfStatus()
written by rf_go.exe; write rulefit status
 
Bool_t WriteIntParms()
write int params file
 
void CheckRFWorkDir()
check if the rulefit work dir is properly setup.
 
Bool_t WriteProgram()
write command to rf_go.exe
 
Bool_t ReadModelSum()
read model from rulefit.sum
 
void SetRFWorkDir(const char *wdir)
set the directory containing rf_go.exe.
 
Bool_t ReadVarImp()
read variable importance
 
Bool_t WriteRuleFitMod()
written by rf_go.exe (NOTE:Format unknown!)
 
Bool_t WriteRfOut()
written by rf_go.exe; write rulefit output (rfout)
 
void InitRuleFit()
default initialisation SetRFWorkDir("./rulefit");
 
void FillRealParmsDef()
set default real params
 
Bool_t WriteVarNames()
write variable names, ascii
 
Bool_t WriteRealVarImp()
write the minimum importance to be considered
 
void FillIntParmsDef()
set default int params
 
void WelcomeMessage()
welcome message
 
Bool_t WriteTrain()
write training data, column wise
 
virtual ~RuleFitAPI()
destructor
 
Bool_t WriteRealParms()
write int params file
 
Bool_t WriteLx()
Save input variable mask.
 
Bool_t ReadYhat()
read the score
 
void HowtoSetupRF()
howto message
 
Bool_t WriteTest()
Write test data.
 
void SetTrainParms()
set the training parameters
 
Int_t RunRuleFit()
execute rf_go.exe
 
A class implementing various fits of rule ensembles.
 
Implementation of a rule.
 
void SetImportanceRef(Double_t v)
 
void SetCoefficient(Double_t v)
 
void SetNorm(Double_t norm)
 
Double_t GetImportance() const
 
Double_t GetSigma() const
 
void SetSSBNeve(Double_t v)
 
void SetRuleCut(RuleCut *rc)
 
void SetSupport(Double_t v)
 
const char * Data() const
 
Bool_t cd(const char *path)
 
virtual Int_t Exec(const char *shellcmd)
Execute a command.
 
MsgLogger & Endl(MsgLogger &ml)