Logo ROOT   6.08/07
Reference Guide
RuleFitAPI.h
Go to the documentation of this file.
1 // @(#)root/tmva $Id$
2 // Author: Andreas Hoecker, Joerg Stelzer, Fredrik Tegenfeldt, Helge Voss
3 
4 /**********************************************************************************
5  * Project: TMVA - a Root-integrated toolkit for multivariate data analysis *
6  * Package: TMVA *
7  * Class : RuleFitAPI *
8  * Web : http://tmva.sourceforge.net *
9  * *
10  * Description: *
11  * Interface to Friedman's RuleFit method *
12  * *
13  * Authors (alphabetical): *
14  * Andreas Hoecker <Andreas.Hocker@cern.ch> - CERN, Switzerland *
15  * Fredrik Tegenfeldt <Fredrik.Tegenfeldt@cern.ch> - Iowa State U., USA *
16  * Helge Voss <Helge.Voss@cern.ch> - MPI-KP Heidelberg, Ger. *
17  * Kai Voss <Kai.Voss@cern.ch> - U. of Victoria, Canada *
18  * *
19  * Copyright (c) 2005: *
20  * CERN, Switzerland *
21  * U. of Victoria, Canada *
22  * MPI-KP Heidelberg, Germany *
23  * LAPP, Annecy, France *
24  * *
25  * Redistribution and use in source and binary forms, with or without *
26  * modification, are permitted according to the terms listed in LICENSE *
27  * *
28  **********************************************************************************/
29 
30 #ifndef ROOT_TMVA_RuleFitAPI
31 #define ROOT_TMVA_RuleFitAPI
32 
33 //////////////////////////////////////////////////////////////////////////
34 // //
35 // RuleFitAPI //
36 // //
37 // J Friedman's RuleFit method //
38 // //
39 //////////////////////////////////////////////////////////////////////////
40 
41 #include <fstream>
42 
43 #include "TMVA/MsgLogger.h"
44 
45 namespace TMVA {
46 
47  class MethodRuleFit;
48  class RuleFit;
49 
50  class RuleFitAPI {
51 
52  public:
53 
54  RuleFitAPI( const TMVA::MethodRuleFit *rfbase, TMVA::RuleFit *rulefit, EMsgType minType );
55 
56  virtual ~RuleFitAPI();
57 
58  // welcome message
59  void WelcomeMessage();
60 
61  // message on howto get the binary
62  void HowtoSetupRF();
63 
64  // Set RuleFit working directory
65  void SetRFWorkDir(const char * wdir);
66 
67  // Check RF work dir - aborts if it fails
68  void CheckRFWorkDir();
69 
70  // run rf_go.exe in various modes
71  inline void TrainRuleFit();
72  inline void TestRuleFit();
73  inline void VarImp();
74 
75  // read result into MethodRuleFit
77 
78  // Get working directory
79  const TString GetRFWorkDir() const { return fRFWorkDir; }
80 
81  protected:
82 
83  enum ERFMode { kRfRegress=1, kRfClass=2 }; // RuleFit modes, default=Class
84  enum EModel { kRfLinear=0, kRfRules=1, kRfBoth=2 }; // models, default=Both (rules+linear)
85  enum ERFProgram { kRfTrain=0, kRfPredict, kRfVarimp }; // rf_go.exe running mode
86 
87  // integer parameters
88  typedef struct {
101  } IntParms;
102 
103  // float parameters
104  typedef struct {
113  } RealParms;
114 
115  // setup
116  void InitRuleFit();
117  void FillRealParmsDef();
118  void FillIntParmsDef();
119  void ImportSetup();
120  void SetTrainParms();
121  void SetTestParms();
122 
123  // run
124  Int_t RunRuleFit();
125 
126  // set rf_go.exe running mode
130 
131  // handle rulefit files
132  inline TString GetRFName(TString name);
133  inline Bool_t OpenRFile(TString name, std::ofstream & f);
134  inline Bool_t OpenRFile(TString name, std::ifstream & f);
135 
136  // read/write binary files
137  inline Bool_t WriteInt(std::ofstream & f, const Int_t *v, Int_t n=1);
138  inline Bool_t WriteFloat(std::ofstream & f, const Float_t *v, Int_t n=1);
139  inline Int_t ReadInt(std::ifstream & f, Int_t *v, Int_t n=1) const;
140  inline Int_t ReadFloat(std::ifstream & f, Float_t *v, Int_t n=1) const;
141 
142  // write rf_go.exe i/o files
143  Bool_t WriteAll();
146  Bool_t WriteLx();
149  Bool_t WriteRfOut();
153  Bool_t WriteTrain();
156  Bool_t WriteYhat();
157  Bool_t WriteTest();
158 
159  // read rf_go.exe i/o files
160  Bool_t ReadYhat();
163  Bool_t ReadLx();
166  Bool_t ReadRfOut();
170  Bool_t ReadTrainX();
171  Bool_t ReadTrainY();
172  Bool_t ReadTrainW();
174  Bool_t ReadVarImp();
175 
176  private:
177  // prevent empty constructor from being used
178  RuleFitAPI();
179  const MethodRuleFit *fMethodRuleFit; // parent method - set in constructor
180  RuleFit *fRuleFit; // non const ptr to RuleFit class in MethodRuleFit
181  //
182  std::vector<Float_t> fRFYhat; // score results from test sample
183  std::vector<Float_t> fRFVarImp; // variable importances
184  std::vector<Int_t> fRFVarImpInd; // variable index
185  TString fRFWorkDir; // working directory
186  IntParms fRFIntParms; // integer parameters
187  RealParms fRFRealParms; // real parameters
188  std::vector<int> fRFLx; // variable selector
189  ERFProgram fRFProgram; // what to run
190  TString fModelType; // model type string
191 
192  mutable MsgLogger fLogger; // message logger
193 
194  ClassDef(RuleFitAPI,0); // Friedman's RuleFit method
195 
196  };
197 
198 } // namespace TMVA
199 
200 //_______________________________________________________________________
202 {
203  // run rf_go.exe to train the model
204  SetTrainParms();
205  WriteAll();
206  RunRuleFit();
207 }
208 
209 //_______________________________________________________________________
211 {
212  // run rf_go.exe with the test data
213  SetTestParms();
214  WriteAll();
215  RunRuleFit();
216  ReadYhat(); // read in the scores
217 }
218 
219 //_______________________________________________________________________
221 {
222  // run rf_go.exe to get the variable importance
223  SetRFVarimp();
224  WriteAll();
225  RunRuleFit();
226  ReadVarImp(); // read in the variable importances
227 }
228 
229 //_______________________________________________________________________
231 {
232  // get the name inluding the rulefit directory
233  return fRFWorkDir+"/"+name;
234 }
235 
236 //_______________________________________________________________________
238 {
239  // open a file for writing in the rulefit directory
240  TString fullName = GetRFName(name);
241  f.open(fullName);
242  if (!f.is_open()) {
243  fLogger << kERROR << "Error opening RuleFit file for output: "
244  << fullName << Endl;
245  return kFALSE;
246  }
247  return kTRUE;
248 }
249 
250 //_______________________________________________________________________
252 {
253  // open a file for reading in the rulefit directory
254  TString fullName = GetRFName(name);
255  f.open(fullName);
256  if (!f.is_open()) {
257  fLogger << kERROR << "Error opening RuleFit file for input: "
258  << fullName << Endl;
259  return kFALSE;
260  }
261  return kTRUE;
262 }
263 
264 //_______________________________________________________________________
265 Bool_t TMVA::RuleFitAPI::WriteInt(std::ofstream & f, const Int_t *v, Int_t n)
266 {
267  // write an int
268  if (!f.is_open()) return kFALSE;
269  return (Bool_t)f.write(reinterpret_cast<char const *>(v), n*sizeof(Int_t));
270 }
271 
272 //_______________________________________________________________________
274 {
275  // write a float
276  if (!f.is_open()) return kFALSE;
277  return (Bool_t)f.write(reinterpret_cast<char const *>(v), n*sizeof(Float_t));
278 }
279 
280 //_______________________________________________________________________
281 Int_t TMVA::RuleFitAPI::ReadInt(std::ifstream & f, Int_t *v, Int_t n) const
282 {
283  // read an int
284  if (!f.is_open()) return 0;
285  if (f.read(reinterpret_cast<char *>(v), n*sizeof(Int_t))) return 1;
286  return 0;
287 }
288 
289 //_______________________________________________________________________
290 Int_t TMVA::RuleFitAPI::ReadFloat(std::ifstream & f, Float_t *v, Int_t n) const
291 {
292  // read a float
293  if (!f.is_open()) return 0;
294  if (f.read(reinterpret_cast<char *>(v), n*sizeof(Float_t))) return 1;
295  return 0;
296 }
297 
298 #endif // RuleFitAPI_H
Bool_t WriteLx()
Save input variable mask.
Definition: RuleFitAPI.cxx:293
TString GetRFName(TString name)
Definition: RuleFitAPI.h:230
Bool_t ReadVarImp()
read variable importance
Definition: RuleFitAPI.cxx:509
void WelcomeMessage()
welcome message
Definition: RuleFitAPI.cxx:78
Bool_t ReadRuleFitSum()
void HowtoSetupRF()
howto message
Definition: RuleFitAPI.cxx:94
MsgLogger & Endl(MsgLogger &ml)
Definition: MsgLogger.h:162
void TrainRuleFit()
Definition: RuleFitAPI.h:201
RuleFit * fRuleFit
Definition: RuleFitAPI.h:180
float Float_t
Definition: RtypesCore.h:53
Bool_t ReadRealVarImp()
ERFProgram fRFProgram
Definition: RuleFitAPI.h:189
virtual ~RuleFitAPI()
destructor
Definition: RuleFitAPI.cxx:71
Bool_t WriteRfStatus()
written by rf_go.exe; write rulefit status
Definition: RuleFitAPI.cxx:358
void SetRFWorkDir(const char *wdir)
set the directory containing rf_go.exe.
Definition: RuleFitAPI.cxx:157
Bool_t WriteAll()
write all files read by rf_go.exe
Definition: RuleFitAPI.cxx:250
void FillIntParmsDef()
set default int params
Definition: RuleFitAPI.cxx:230
Basic string class.
Definition: TString.h:137
Bool_t WriteTrain()
write training data, columnwise
Definition: RuleFitAPI.cxx:385
const MethodRuleFit * fMethodRuleFit
Definition: RuleFitAPI.h:179
int Int_t
Definition: RtypesCore.h:41
bool Bool_t
Definition: RtypesCore.h:59
Bool_t WriteRfOut()
written by rf_go.exe; write rulefit output (rfout)
Definition: RuleFitAPI.cxx:349
const Bool_t kFALSE
Definition: Rtypes.h:92
Bool_t WriteIntParms()
write int params file
Definition: RuleFitAPI.cxx:266
Bool_t WriteRealVarImp()
write the minimum importance to be considered
Definition: RuleFitAPI.cxx:335
IntParms fRFIntParms
Definition: RuleFitAPI.h:186
void ImportSetup()
import setup from MethodRuleFit
Definition: RuleFitAPI.cxx:134
void CheckRFWorkDir()
check if the rulefit work dir is properly setup.
Definition: RuleFitAPI.cxx:168
void SetRFVarimp()
Definition: RuleFitAPI.h:129
Bool_t ReadRfOut()
std::vector< int > fRFLx
Definition: RuleFitAPI.h:188
Bool_t ReadRuleFitMod()
#define ClassDef(name, id)
Definition: Rtypes.h:254
std::vector< Float_t > fRFYhat
Definition: RuleFitAPI.h:182
Bool_t WriteProgram()
write command to rf_go.exe
Definition: RuleFitAPI.cxx:307
void FillRealParmsDef()
set default real params
Definition: RuleFitAPI.cxx:215
void SetRFPredict()
Definition: RuleFitAPI.h:128
Bool_t ReadIntParms()
Bool_t ReadModelSum()
read model from rulefit.sum
Definition: RuleFitAPI.cxx:546
Bool_t ReadVarNames()
Int_t ReadInt(std::ifstream &f, Int_t *v, Int_t n=1) const
Definition: RuleFitAPI.h:281
Bool_t WriteRuleFitMod()
written by rf_go.exe (NOTE:Format unknown!)
Definition: RuleFitAPI.cxx:367
void SetTrainParms()
set the training parameters
Definition: RuleFitAPI.cxx:190
SVector< double, 2 > v
Definition: Dict.h:5
Bool_t ReadTrainW()
Bool_t WriteInt(std::ofstream &f, const Int_t *v, Int_t n=1)
Definition: RuleFitAPI.h:265
EMsgType
Definition: Types.h:61
void TestRuleFit()
Definition: RuleFitAPI.h:210
Int_t ReadFloat(std::ifstream &f, Float_t *v, Int_t n=1) const
Definition: RuleFitAPI.h:290
void SetTestParms()
set the test params
Definition: RuleFitAPI.cxx:203
Bool_t WriteFloat(std::ofstream &f, const Float_t *v, Int_t n=1)
Definition: RuleFitAPI.h:273
Bool_t ReadTrainY()
std::vector< Float_t > fRFVarImp
Definition: RuleFitAPI.h:183
TString fModelType
Definition: RuleFitAPI.h:190
double f(double x)
MsgLogger fLogger
Definition: RuleFitAPI.h:192
Bool_t WriteVarNames()
write variable names, ascii
Definition: RuleFitAPI.cxx:452
Bool_t WriteTest()
Write test data.
Definition: RuleFitAPI.cxx:420
TString fRFWorkDir
Definition: RuleFitAPI.h:185
Bool_t OpenRFile(TString name, std::ofstream &f)
Definition: RuleFitAPI.h:237
Abstract ClassifierFactory template that handles arbitrary types.
const TString GetRFWorkDir() const
Definition: RuleFitAPI.h:79
RealParms fRFRealParms
Definition: RuleFitAPI.h:187
Bool_t ReadRfStatus()
Bool_t WriteYhat()
written by rf_go.exe
Definition: RuleFitAPI.cxx:475
Bool_t WriteRuleFitSum()
written by rf_go.exe (NOTE: format unknown!)
Definition: RuleFitAPI.cxx:376
Bool_t ReadProgram()
Bool_t ReadYhat()
read the score
Definition: RuleFitAPI.cxx:484
Int_t RunRuleFit()
execute rf_go.exe
Definition: RuleFitAPI.cxx:774
Bool_t WriteRealParms()
write int params file
Definition: RuleFitAPI.cxx:277
Bool_t WriteVarImp()
Definition: RuleFitAPI.cxx:464
Bool_t ReadTrainX()
std::vector< Int_t > fRFVarImpInd
Definition: RuleFitAPI.h:184
const Bool_t kTRUE
Definition: Rtypes.h:91
Bool_t ReadRealParms()
const Int_t n
Definition: legend1.C:16
char name[80]
Definition: TGX11.cxx:109
void InitRuleFit()
default initialisation SetRFWorkDir("./rulefit");
Definition: RuleFitAPI.cxx:124