50MethodC50::MethodC50(
const TString &jobName,
57 predict(
"predict.C5.0"),
60 C50Control(
"C5.0Control"),
61 asfactor(
"as.factor"),
87 predict(
"predict.C5.0"),
89 C50Control(
"C5.0Control"),
90 asfactor(
"as.factor"),
127 Error(
"Init",
"R's package C50 can not be loaded.");
128 Log() << kFATAL <<
" R's package C50 can not be loaded."
136 if (
Data()->GetNTrainingEvents() == 0)
Log() << kFATAL <<
"<Train> Data() has zero events" <<
Endl;
150 r[
"C50Model"] << Model;
151 r <<
"save(C50Model,file='" + path +
"')";
164 predictors for splits? Note: the C5.0 command line version defaults this \
165 parameter to ‘FALSE’, meaning no attempted gropings will be evaluated \
166 during the tree growing stage.");
168 the rules by their affect on the error rate and groups the \
169 rules into the specified number of bands. This modifies the \
170 output so that the effect on the error rate can be seen for \
171 the groups of rules within a band. If this options is \
172 selected and ‘rules = kFALSE’, a warning is issued and ‘rules’ \
173 is changed to ‘kTRUE’.");
176 step to simplify the tree.");
179 put in at least two of the splits.");
182 of the data. See Quinlan (1993) for details and examples.");
184 proportion of the data should be used to train the model. By \
185 default, all the samples are used for model training. Samples \
186 not used for training are used to evaluate the accuracy of \
187 the model in the printed output.");
190 stopping boosting should be used.");
199 Log() << kERROR <<
" fNTrials <=0... that does not work !! "
200 <<
" I set it to 1 .. just so that the program does not crash"
219 Log() << kINFO <<
"Testing Classification C50 METHOD " <<
Endl;
232 for (
UInt_t i = 0; i < nvar; i++) {
239 mvaValue = result[1];
249 if (firstEvt > lastEvt || lastEvt > nEvents) lastEvt = nEvents;
250 if (firstEvt < 0) firstEvt = 0;
252 nEvents = lastEvt-firstEvt;
264 std::vector<std::vector<Float_t> > inputData(nvars);
265 for (
UInt_t i = 0; i < nvars; i++) {
266 inputData[i] = std::vector<Float_t>(nEvents);
269 for (
Int_t ievt=firstEvt; ievt<lastEvt; ievt++) {
272 assert(nvars ==
e->GetNVariables());
273 for (
UInt_t i = 0; i < nvars; i++) {
274 inputData[i][ievt] =
e->GetValue(i);
281 for (
UInt_t i = 0; i < nvars; i++) {
287 std::vector<Double_t> mvaValues(nEvents);
289 std::vector<Double_t> probValues(2*nEvents);
290 probValues = result.
As<std::vector<Double_t>>();
291 assert(probValues.size() == 2*mvaValues.size());
292 std::copy(probValues.begin()+nEvents, probValues.end(), mvaValues.begin() );
295 Log() << kINFO <<
Form(
"Dataset[%s] : ",
DataInfo().
GetName())<<
"Elapsed time for evaluation of " << nEvents <<
" events: "
313 Log() <<
"Decision Trees and Rule-Based Models " <<
Endl;
331 r <<
"load('" + path +
"')";
333 r[
"C50Model"] >> Model;
#define REGISTER_METHOD(CLASS)
for example
char * Form(const char *fmt,...)
This is a class to create DataFrames from ROOT to R.
static TRInterface & Instance()
static method to get an TRInterface instance reference
Bool_t Require(TString pkg)
Method to load an R's package.
This is a class to get ROOT's objects from R's objects.
T As()
Some datatypes of ROOT or c++ can be wrapped in to a TRObject, this method lets you unwrap those data...
OptionBase * DeclareOptionRef(T &ref, const TString &name, const TString &desc="")
Class that contains all the data information.
UInt_t GetNVariables() const
std::vector< TString > GetListOfVariables() const
returns list of variables
const Event * GetEvent() const
Types::ETreeType GetCurrentType() const
Long64_t GetNEvents(Types::ETreeType type=Types::kMaxTreeType) const
UInt_t GetNVariables() const
access the number of variables through the datasetinfo
void SetCurrentEvent(Long64_t ievt) const
std::vector< Float_t > & GetValues()
const char * GetName() const
const TString & GetWeightFileDir() const
const TString & GetMethodName() const
const Event * GetEvent() const
DataSetInfo & DataInfo() const
virtual void TestClassification()
initialization
void ReadStateFromFile()
Function to write options and weights to file.
Bool_t IsModelPersistence()
void NoErrorCalc(Double_t *const err, Double_t *const errUpper)
void GetHelpMessage() const
static Bool_t IsModuleLoaded
virtual void TestClassification()
initialization
ROOT::R::TRFunctionImport asfactor
ROOT::R::TRObject * fModel
Double_t GetMvaValue(Double_t *errLower=0, Double_t *errUpper=0)
Bool_t HasAnalysisType(Types::EAnalysisType type, UInt_t numberClasses, UInt_t numberTargets)
virtual void MakeClass(const TString &classFileName=TString("")) const
create reader class for method (classification only at present)
Bool_t fControlNoGlobalPruning
Bool_t fControlFuzzyThreshold
std::vector< TString > ListOfVariables
Bool_t fControlEarlyStopping
ROOT::R::TRFunctionImport C50Control
virtual std::vector< Double_t > GetMvaValues(Long64_t firstEvt=0, Long64_t lastEvt=-1, Bool_t logProgress=false)
get all the MVA values for the events of the current Data type
ROOT::R::TRObject fModelControl
ROOT::R::TRFunctionImport predict
ROOT::R::TRFunctionImport C50
MethodC50(const TString &jobName, const TString &methodTitle, DataSetInfo &theData, const TString &theOption="")
std::vector< std::string > fFactorTrain
ROOT::R::TRDataFrame fDfTrain
Timing information for training and evaluation of MVA methods.
TString GetElapsedTime(Bool_t Scientific=kTRUE)
returns pretty string with elapsed time
Singleton class for Global types used by TMVA.
virtual void Error(const char *method, const char *msgfmt,...) const
Issue error message.
std::string GetName(const std::string &scope_name)
Rcpp::internal::NamedPlaceHolder Label
create variable transformations
MsgLogger & Endl(MsgLogger &ml)