Logo ROOT  
Reference Guide
RuleFitAPI.h
Go to the documentation of this file.
1// @(#)root/tmva $Id$
2// Author: Andreas Hoecker, Joerg Stelzer, Fredrik Tegenfeldt, Helge Voss
3
4/**********************************************************************************
5 * Project: TMVA - a Root-integrated toolkit for multivariate data analysis *
6 * Package: TMVA *
7 * Class : RuleFitAPI *
8 * Web : http://tmva.sourceforge.net *
9 * *
10 * Description: *
11 * Interface to Friedman's RuleFit method *
12 * *
13 * Authors (alphabetical): *
14 * Andreas Hoecker <Andreas.Hocker@cern.ch> - CERN, Switzerland *
15 * Fredrik Tegenfeldt <Fredrik.Tegenfeldt@cern.ch> - Iowa State U., USA *
16 * Helge Voss <Helge.Voss@cern.ch> - MPI-KP Heidelberg, Ger. *
17 * Kai Voss <Kai.Voss@cern.ch> - U. of Victoria, Canada *
18 * *
19 * Copyright (c) 2005: *
20 * CERN, Switzerland *
21 * U. of Victoria, Canada *
22 * MPI-KP Heidelberg, Germany *
23 * LAPP, Annecy, France *
24 * *
25 * Redistribution and use in source and binary forms, with or without *
26 * modification, are permitted according to the terms listed in LICENSE *
27 * *
28 **********************************************************************************/
29
30#ifndef ROOT_TMVA_RuleFitAPI
31#define ROOT_TMVA_RuleFitAPI
32
33//////////////////////////////////////////////////////////////////////////
34// //
35// RuleFitAPI //
36// //
37// J Friedman's RuleFit method //
38// //
39//////////////////////////////////////////////////////////////////////////
40
41#include <fstream>
42#include <vector>
43
44#include "TMVA/MsgLogger.h"
45
46namespace TMVA {
47
48 class MethodRuleFit;
49 class RuleFit;
50
51 class RuleFitAPI {
52
53 public:
54
55 RuleFitAPI( const TMVA::MethodRuleFit *rfbase, TMVA::RuleFit *rulefit, EMsgType minType );
56
57 virtual ~RuleFitAPI();
58
59 // welcome message
60 void WelcomeMessage();
61
62 // message on howto get the binary
63 void HowtoSetupRF();
64
65 // Set RuleFit working directory
66 void SetRFWorkDir(const char * wdir);
67
68 // Check RF work dir - aborts if it fails
69 void CheckRFWorkDir();
70
71 // run rf_go.exe in various modes
72 inline void TrainRuleFit();
73 inline void TestRuleFit();
74 inline void VarImp();
75
76 // read result into MethodRuleFit
78
79 // Get working directory
80 const TString GetRFWorkDir() const { return fRFWorkDir; }
81
82 protected:
83
84 enum ERFMode { kRfRegress=1, kRfClass=2 }; // RuleFit modes, default=Class
85 enum EModel { kRfLinear=0, kRfRules=1, kRfBoth=2 }; // models, default=Both (rules+linear)
86 enum ERFProgram { kRfTrain=0, kRfPredict, kRfVarimp }; // rf_go.exe running mode
87
88 // integer parameters
89 typedef struct {
102 } IntParms;
103
104 // float parameters
105 typedef struct {
114 } RealParms;
115
116 // setup
117 void InitRuleFit();
118 void FillRealParmsDef();
119 void FillIntParmsDef();
120 void ImportSetup();
121 void SetTrainParms();
122 void SetTestParms();
123
124 // run
126
127 // set rf_go.exe running mode
131
132 // handle rulefit files
134 inline Bool_t OpenRFile(TString name, std::ofstream & f);
135 inline Bool_t OpenRFile(TString name, std::ifstream & f);
136
137 // read/write binary files
138 inline Bool_t WriteInt(std::ofstream & f, const Int_t *v, Int_t n=1);
139 inline Bool_t WriteFloat(std::ofstream & f, const Float_t *v, Int_t n=1);
140 inline Int_t ReadInt(std::ifstream & f, Int_t *v, Int_t n=1) const;
141 inline Int_t ReadFloat(std::ifstream & f, Float_t *v, Int_t n=1) const;
142
143 // write rf_go.exe i/o files
147 Bool_t WriteLx();
159
160 // read rf_go.exe i/o files
176
177 private:
178 // prevent empty constructor from being used
180 const MethodRuleFit *fMethodRuleFit; ///< parent method - set in constructor
181 RuleFit *fRuleFit; ///< non const ptr to RuleFit class in MethodRuleFit
182 //
183 std::vector<Float_t> fRFYhat; ///< score results from test sample
184 std::vector<Float_t> fRFVarImp; ///< variable importances
185 std::vector<Int_t> fRFVarImpInd; ///< variable index
186 TString fRFWorkDir; ///< working directory
187 IntParms fRFIntParms; ///< integer parameters
188 RealParms fRFRealParms; ///< real parameters
189 std::vector<int> fRFLx; ///< variable selector
190 ERFProgram fRFProgram; ///< what to run
191 TString fModelType; ///< model type string
192
193 mutable MsgLogger fLogger; ///< message logger
194
195 ClassDef(RuleFitAPI,0); // Friedman's RuleFit method
196
197 };
198
199} // namespace TMVA
200
201//_______________________________________________________________________
203{
204 // run rf_go.exe to train the model
206 WriteAll();
207 RunRuleFit();
208}
209
210//_______________________________________________________________________
212{
213 // run rf_go.exe with the test data
214 SetTestParms();
215 WriteAll();
216 RunRuleFit();
217 ReadYhat(); // read in the scores
218}
219
220//_______________________________________________________________________
222{
223 // run rf_go.exe to get the variable importance
224 SetRFVarimp();
225 WriteAll();
226 RunRuleFit();
227 ReadVarImp(); // read in the variable importances
228}
229
230//_______________________________________________________________________
232{
233 // get the name including the rulefit directory
234 return fRFWorkDir+"/"+name;
235}
236
237//_______________________________________________________________________
239{
240 // open a file for writing in the rulefit directory
241 TString fullName = GetRFName(name);
242 f.open(fullName);
243 if (!f.is_open()) {
244 fLogger << kERROR << "Error opening RuleFit file for output: "
245 << fullName << Endl;
246 return kFALSE;
247 }
248 return kTRUE;
249}
250
251//_______________________________________________________________________
253{
254 // open a file for reading in the rulefit directory
255 TString fullName = GetRFName(name);
256 f.open(fullName);
257 if (!f.is_open()) {
258 fLogger << kERROR << "Error opening RuleFit file for input: "
259 << fullName << Endl;
260 return kFALSE;
261 }
262 return kTRUE;
263}
264
265//_______________________________________________________________________
267{
268 // write an int
269 if (!f.is_open()) return kFALSE;
270 return (Bool_t)f.write(reinterpret_cast<char const *>(v), n*sizeof(Int_t));
271}
272
273//_______________________________________________________________________
275{
276 // write a float
277 if (!f.is_open()) return kFALSE;
278 return (Bool_t)f.write(reinterpret_cast<char const *>(v), n*sizeof(Float_t));
279}
280
281//_______________________________________________________________________
282Int_t TMVA::RuleFitAPI::ReadInt(std::ifstream & f, Int_t *v, Int_t n) const
283{
284 // read an int
285 if (!f.is_open()) return 0;
286 if (f.read(reinterpret_cast<char *>(v), n*sizeof(Int_t))) return 1;
287 return 0;
288}
289
290//_______________________________________________________________________
292{
293 // read a float
294 if (!f.is_open()) return 0;
295 if (f.read(reinterpret_cast<char *>(v), n*sizeof(Float_t))) return 1;
296 return 0;
297}
298
299#endif // RuleFitAPI_H
#define f(i)
Definition: RSha256.hxx:104
bool Bool_t
Definition: RtypesCore.h:63
const Bool_t kFALSE
Definition: RtypesCore.h:101
float Float_t
Definition: RtypesCore.h:57
const Bool_t kTRUE
Definition: RtypesCore.h:100
#define ClassDef(name, id)
Definition: Rtypes.h:335
char name[80]
Definition: TGX11.cxx:110
J Friedman's RuleFit method.
Definition: MethodRuleFit.h:48
ostringstream derivative to redirect and format output
Definition: MsgLogger.h:57
J Friedman's RuleFit method.
Definition: RuleFitAPI.h:51
void SetTestParms()
set the test params
Definition: RuleFitAPI.cxx:201
Bool_t ReadRfStatus()
TString fModelType
model type string
Definition: RuleFitAPI.h:191
Bool_t WriteRuleFitSum()
written by rf_go.exe (NOTE: format unknown!)
Definition: RuleFitAPI.cxx:374
Bool_t WriteYhat()
written by rf_go.exe
Definition: RuleFitAPI.cxx:473
Int_t ReadFloat(std::ifstream &f, Float_t *v, Int_t n=1) const
Definition: RuleFitAPI.h:291
Bool_t WriteAll()
write all files read by rf_go.exe
Definition: RuleFitAPI.cxx:248
void ImportSetup()
import setup from MethodRuleFit
Definition: RuleFitAPI.cxx:132
Bool_t WriteRfStatus()
written by rf_go.exe; write rulefit status
Definition: RuleFitAPI.cxx:356
Bool_t WriteIntParms()
write int params file
Definition: RuleFitAPI.cxx:264
void CheckRFWorkDir()
check if the rulefit work dir is properly setup.
Definition: RuleFitAPI.cxx:166
Bool_t WriteProgram()
write command to rf_go.exe
Definition: RuleFitAPI.cxx:305
Bool_t ReadModelSum()
read model from rulefit.sum
Definition: RuleFitAPI.cxx:544
Bool_t WriteVarImp()
Definition: RuleFitAPI.cxx:462
void SetRFWorkDir(const char *wdir)
set the directory containing rf_go.exe.
Definition: RuleFitAPI.cxx:155
Bool_t ReadVarImp()
read variable importance
Definition: RuleFitAPI.cxx:507
Bool_t ReadTrainW()
Bool_t WriteRuleFitMod()
written by rf_go.exe (NOTE:Format unknown!)
Definition: RuleFitAPI.cxx:365
Bool_t WriteRfOut()
written by rf_go.exe; write rulefit output (rfout)
Definition: RuleFitAPI.cxx:347
Bool_t ReadRealVarImp()
void TestRuleFit()
Definition: RuleFitAPI.h:211
TString GetRFName(TString name)
Definition: RuleFitAPI.h:231
RealParms fRFRealParms
real parameters
Definition: RuleFitAPI.h:188
RuleFit * fRuleFit
non const ptr to RuleFit class in MethodRuleFit
Definition: RuleFitAPI.h:181
Bool_t OpenRFile(TString name, std::ofstream &f)
Definition: RuleFitAPI.h:238
Bool_t ReadIntParms()
void InitRuleFit()
default initialisation SetRFWorkDir("./rulefit");
Definition: RuleFitAPI.cxx:122
void FillRealParmsDef()
set default real params
Definition: RuleFitAPI.cxx:213
std::vector< Float_t > fRFVarImp
variable importances
Definition: RuleFitAPI.h:184
Bool_t WriteVarNames()
write variable names, ascii
Definition: RuleFitAPI.cxx:450
ERFProgram fRFProgram
what to run
Definition: RuleFitAPI.h:190
Bool_t WriteRealVarImp()
write the minimum importance to be considered
Definition: RuleFitAPI.cxx:333
void FillIntParmsDef()
set default int params
Definition: RuleFitAPI.cxx:228
void WelcomeMessage()
welcome message
Definition: RuleFitAPI.cxx:76
void TrainRuleFit()
Definition: RuleFitAPI.h:202
Bool_t WriteTrain()
write training data, column wise
Definition: RuleFitAPI.cxx:383
virtual ~RuleFitAPI()
destructor
Definition: RuleFitAPI.cxx:69
Bool_t ReadRfOut()
Bool_t WriteRealParms()
write int params file
Definition: RuleFitAPI.cxx:275
Bool_t WriteFloat(std::ofstream &f, const Float_t *v, Int_t n=1)
Definition: RuleFitAPI.h:274
Bool_t ReadProgram()
MsgLogger fLogger
message logger
Definition: RuleFitAPI.h:193
Bool_t ReadRuleFitMod()
const MethodRuleFit * fMethodRuleFit
parent method - set in constructor
Definition: RuleFitAPI.h:180
Int_t ReadInt(std::ifstream &f, Int_t *v, Int_t n=1) const
Definition: RuleFitAPI.h:282
IntParms fRFIntParms
integer parameters
Definition: RuleFitAPI.h:187
Bool_t WriteLx()
Save input variable mask.
Definition: RuleFitAPI.cxx:291
TString fRFWorkDir
working directory
Definition: RuleFitAPI.h:186
Bool_t ReadYhat()
read the score
Definition: RuleFitAPI.cxx:482
void HowtoSetupRF()
howto message
Definition: RuleFitAPI.cxx:92
Bool_t ReadTrainX()
std::vector< Float_t > fRFYhat
score results from test sample
Definition: RuleFitAPI.h:183
std::vector< Int_t > fRFVarImpInd
variable index
Definition: RuleFitAPI.h:185
Bool_t ReadVarNames()
std::vector< int > fRFLx
variable selector
Definition: RuleFitAPI.h:189
Bool_t WriteTest()
Write test data.
Definition: RuleFitAPI.cxx:418
void SetRFPredict()
Definition: RuleFitAPI.h:129
void SetTrainParms()
set the training parameters
Definition: RuleFitAPI.cxx:188
void SetRFVarimp()
Definition: RuleFitAPI.h:130
Bool_t WriteInt(std::ofstream &f, const Int_t *v, Int_t n=1)
Definition: RuleFitAPI.h:266
const TString GetRFWorkDir() const
Definition: RuleFitAPI.h:80
Bool_t ReadTrainY()
Bool_t ReadRealParms()
Int_t RunRuleFit()
execute rf_go.exe
Definition: RuleFitAPI.cxx:771
Bool_t ReadRuleFitSum()
A class implementing various fits of rule ensembles.
Definition: RuleFit.h:46
EMsgType
Definition: Types.h:55
@ kERROR
Definition: Types.h:60
Basic string class.
Definition: TString.h:136
const Int_t n
Definition: legend1.C:16
create variable transformations
MsgLogger & Endl(MsgLogger &ml)
Definition: MsgLogger.h:148