52 EMsgType minType = kINFO ) :
53 fMethodRuleFit(rfbase),
56 fLogger(
"RuleFitAPI",minType)
82 <<
"---------------------------------------------------------------------------\n" 83 <<
"- You are running the interface to Jerome Friedmans RuleFit(tm) code. -\n" 84 <<
"- For a full manual see the following web page: -\n" 86 <<
"- http://www-stat.stanford.edu/~jhf/R-RuleFit.html -\n" 88 <<
"---------------------------------------------------------------------------" 98 <<
"------------------------ RULEFIT-JF INTERFACE SETUP -----------------------\n" 100 <<
"1. Create a rulefit directory in your current work directory:\n" 102 <<
" the directory may be set using the option RuleFitDir\n" 104 <<
"2. Copy (or make a link) the file rf_go.exe into this directory\n" 106 <<
"The file can be obtained from Jerome Friedmans homepage (linux):\n" 107 <<
" wget http://www-stat.stanford.edu/~jhf/r-rulefit/linux/rf_go.exe\n" 109 <<
"Don't forget to do:\n" 110 <<
" chmod +x rf_go.exe\n" 112 <<
"For Windows download:\n" 113 <<
" http://www-stat.stanford.edu/~jhf/r-rulefit/windows/rf_go.exe\n" 115 <<
"NOTE: other platforms are not supported (see Friedmans homepage)\n" 117 <<
"---------------------------------------------------------------------------\n" 174 fLogger << kFATAL <<
"Setup failed - aborting!" <<
Endl;
177 FILE *
f = fopen(
"rf_go.exe",
"r");
181 fLogger << kFATAL <<
"Setup failed - aborting!" <<
Endl;
317 program =
"rulefit_pred";
351 fLogger << kWARNING <<
"WriteRfOut is not yet implemented" <<
Endl;
360 fLogger << kWARNING <<
"WriteRfStatus is not yet implemented" <<
Endl;
369 fLogger << kWARNING <<
"WriteRuleFitMod is not yet implemented" <<
Endl;
378 fLogger << kWARNING <<
"WriteRuleFitSum is not yet implemented" <<
Endl;
468 fLogger << kWARNING <<
"WriteVarImp is not yet implemented" <<
Endl;
477 fLogger << kWARNING <<
"WriteYhat is not yet implemented" <<
Endl;
493 neve =
static_cast<Int_t>(xval);
495 fLogger << kWARNING <<
"Inconsistent size of yhat file and test tree!" <<
Endl;
522 for (
UInt_t ivar=0; ivar<nvars; ivar++) {
527 if (xval>xmax) xmax=xval;
535 for (
UInt_t ivar=0; ivar<nvars; ivar++) {
550 fLogger << kVERBOSE <<
"Reading RuleFit summary file" <<
Endl;
592 norules = (nrules==1);
594 norules = norules && (dumI==1);
596 norules = norules && (dumI==1);
598 norules = norules && (dumI==0);
599 if (nrules==0) norules=
kTRUE;
600 if (norules) nrules = 0;
606 fLogger << kDEBUG <<
"N(rules) = " << nrules <<
Endl;
607 fLogger << kDEBUG <<
"N(vars) = " << nvars <<
Endl;
608 fLogger << kDEBUG <<
"N(varsO) = " << nvarsOpt <<
Endl;
610 fLogger << kDEBUG <<
"offset = " << offset <<
Endl;
611 if (nvars!=nvarsOpt) {
612 fLogger << kWARNING <<
"Format of rulefit.sum is ... weird?? Continuing but who knows how it will end...?" <<
Endl;
614 std::vector<Double_t> rfSupp;
615 std::vector<Double_t> rfCoef;
616 std::vector<Int_t> rfNcut;
617 std::vector<Rule *> rfRules;
621 for (
Int_t t=0; t<8; t++) {
639 rfSupp.push_back(dumF);
641 rfCoef.push_back(dumF);
643 rfNcut.push_back(static_cast<int>(dumF+0.5));
662 rfRules.push_back( rule );
680 if (imp>impref) impref = imp;
682 fLogger << kDEBUG <<
"Rule #" << r <<
" : " << nvars <<
Endl;
683 fLogger << kDEBUG <<
" support = " << rfSupp[
r] <<
Endl;
690 varind =
static_cast<Int_t>(dumF+0.5)-1;
720 std::vector<Int_t> varind;
721 std::vector<Double_t>
xmin;
722 std::vector<Double_t>
xmax;
723 std::vector<Double_t> average;
724 std::vector<Double_t> stdev;
725 std::vector<Double_t> norm;
726 std::vector<Double_t> coeff;
730 varind.push_back(static_cast<Int_t>(dumF+0.5)-1);
732 xmin.push_back(static_cast<Double_t>(dumF));
734 xmax.push_back(static_cast<Double_t>(dumF));
736 average.push_back(static_cast<Double_t>(dumF));
738 stdev.push_back(static_cast<Double_t>(dumF));
742 coeff.push_back(dumF/nv);
745 fLogger << kDEBUG <<
" varind = " << varind.back() <<
Endl;
746 fLogger << kDEBUG <<
" xmin = " << xmin.back() <<
Endl;
747 fLogger << kDEBUG <<
" xmax = " << xmax.back() <<
Endl;
748 fLogger << kDEBUG <<
" average = " << average.back() <<
Endl;
749 fLogger << kDEBUG <<
" stdev = " << stdev.back() <<
Endl;
750 fLogger << kDEBUG <<
" coeff = " << coeff.back() <<
Endl;
760 if (imp>impref) impref=imp;
Bool_t WriteLx()
Save input variable mask.
Bool_t ReadVarImp()
read variable importance
void WelcomeMessage()
welcome message
UInt_t GetNVariables() const
void SetCoefficient(Double_t v)
J Friedman's RuleFit method.
Double_t GetTreeEveFrac() const
void HowtoSetupRF()
howto message
void SetLinDP(const std::vector< Double_t > &xmax)
MsgLogger & Endl(MsgLogger &ml)
void SetSSBNeve(Double_t v)
void SetRuleCut(RuleCut *rc)
void SetCutMax(Int_t i, Double_t v)
Double_t GetGDErrScale() const
J Friedman's RuleFit method.
Double_t GetSigma() const
void SetLinDM(const std::vector< Double_t > &xmin)
A class implementing various fits of rule ensembles.
virtual ~RuleFitAPI()
destructor
Bool_t WriteRfStatus()
written by rf_go.exe; write rulefit status
void SetRFWorkDir(const char *wdir)
set the directory containing rf_go.exe.
Double_t CalcLinNorm(Double_t stdev)
Bool_t WriteAll()
write all files read by rf_go.exe
void FillIntParmsDef()
set default int params
Bool_t cd(const char *path)
Bool_t WriteTrain()
write training data, column wise
const MethodRuleFit * fMethodRuleFit
Bool_t WriteRfOut()
written by rf_go.exe; write rulefit output (rfout)
Bool_t WriteIntParms()
write int params file
Bool_t WriteRealVarImp()
write the minimum importance to be considered
void CleanupLinear()
cleanup linear model
void ImportSetup()
import setup from MethodRuleFit
void CheckRFWorkDir()
check if the rulefit work dir is properly setup.
const TString & GetExpression() const
Implementation of a rule.
void SetAverageRuleSigma(Double_t v)
Bool_t DoOnlyLinear() const
void SetLinCoefficients(const std::vector< Double_t > &v)
void SetImportanceRef(Double_t impref)
set reference importance
Double_t GetGDPathStep() const
void SetCutMin(Int_t i, Double_t v)
std::vector< Float_t > fRFYhat
Bool_t WriteProgram()
write command to rf_go.exe
void FillRealParmsDef()
set default real params
const Event * GetEvent() const
void SetCutDoMin(Int_t i, Bool_t v)
void SetRules(const std::vector< TMVA::Rule *> &rules)
set rules
DataSetInfo & DataInfo() const
Bool_t DoOnlyRules() const
Bool_t ReadModelSum()
read model from rulefit.sum
void SetSelector(Int_t i, UInt_t s)
RuleEnsemble * GetRuleEnsemblePtr()
Int_t ReadInt(std::ifstream &f, Int_t *v, Int_t n=1) const
Double_t GetWeight() const
return the event weight - depending on whether the flag IgnoreNegWeightsInTraining is or not...
Long64_t GetNTrainingEvents() const
const Event * GetTrainingEvent(Long64_t ievt) const
Bool_t WriteRuleFitMod()
written by rf_go.exe (NOTE:Format unknown!)
void SetTrainParms()
set the training parameters
void CalcVarImportance()
Calculates variable importance using eq (35) in RuleFit paper by Friedman et.al.
Int_t GetRFNrules() const
R__EXTERN TSystem * gSystem
A class describing a 'rule cut'.
Double_t GetLinQuantile() const
Bool_t WriteInt(std::ofstream &f, const Int_t *v, Int_t n=1)
void SetOffset(Double_t v=0.0)
Int_t ReadFloat(std::ifstream &f, Float_t *v, Int_t n=1) const
virtual Int_t Exec(const char *shellcmd)
Execute a command.
void SetTestParms()
set the test params
Bool_t WriteFloat(std::ofstream &f, const Float_t *v, Int_t n=1)
void SetImportanceRef(Double_t v)
Int_t GetRFNendnodes() const
Long64_t GetNTestEvents() const
Float_t GetValue(UInt_t ivar) const
return value of i'th variable
void SetSupport(Double_t v)
const TString GetRFWorkDir() const
void SetNorm(Double_t norm)
std::vector< Float_t > fRFVarImp
void SetCutDoMax(Int_t i, Bool_t v)
Int_t GetGDNPathSteps() const
Bool_t WriteVarNames()
write variable names, ascii
void SetCurrentType(Types::ETreeType type) const
Bool_t WriteTest()
Write test data.
VariableInfo & GetVariableInfo(Int_t i)
Bool_t OpenRFile(TString name, std::ofstream &f)
Double_t GetImportance() const
Bool_t WriteYhat()
written by rf_go.exe
Bool_t WriteRuleFitSum()
written by rf_go.exe (NOTE: format unknown!)
Bool_t ReadYhat()
read the score
Int_t RunRuleFit()
execute rf_go.exe
void SetLinNorm(const std::vector< Double_t > &norm)
Long64_t GetNEvents(Types::ETreeType type=Types::kMaxTreeType) const
Bool_t WriteRealParms()
write int params file
Bool_t IsSignal(const Event *ev) const
Double_t CalcLinImportance()
calculate the linear importance for each rule
std::vector< Int_t > fRFVarImpInd
void InitRuleFit()
default initialisation SetRFWorkDir("./rulefit");
const char * Data() const