19 #pragma GCC diagnostic ignored "-Wunused-parameter"
24 #define NPY_NO_DEPRECATED_API NPY_1_7_API_VERSION
25 #include <numpy/arrayobject.h>
58 PyMethodBase(jobName,
Types::kPyRandomForest, methodTitle, dsi, theOption, theTargetDir),
64 min_weight_fraction_leaf(0),
65 max_features("'auto'"),
66 max_leaf_nodes("None"),
76 SetWeightFileDir(
gConfig().GetIONames().fWeightFileDir);
88 min_weight_fraction_leaf(0),
89 max_features(
"'auto'"),
90 max_leaf_nodes(
"None"),
123 The function to measure the quality of a split. Supported criteria are \
124 'gini' for the Gini impurity and 'entropy' for the information gain. \
125 Note: this parameter is tree-specific.");
128 The maximum depth of the tree. If None, then nodes are expanded until \
129 all leaves are pure or until all leaves contain less than \
130 min_samples_split samples. \
131 Ignored if ``max_leaf_nodes`` is not None.");
133 The minimum number of samples required to split an internal node.");
136 The minimum number of samples in newly created leaves. A split is \
137 discarded if after the split, one of the leaves would contain less then \
138 ``min_samples_leaf`` samples.");
140 The minimum weighted fraction of the input samples required to be at a \
144 Grow trees with ``max_leaf_nodes`` in best-first fashion.\
145 Best nodes are defined as relative reduction in impurity.\
146 If None then unlimited number of leaf nodes.\
147 If not None then ``max_depth`` will be ignored.");
149 Whether bootstrap samples are used when building trees.");
151 the generalization error.");
153 The number of jobs to run in parallel for both `fit` and `predict`. \
154 If -1, then the number of jobs is set to the number of cores.");
157 If int, random_state is the seed used by the random number generator;\
158 If RandomState instance, random_state is the random number generator;\
159 If None, the random number generator is the RandomState instance used\
162 Controls the verbosity of the tree building process.");
164 When set to ``True``, reuse the solution of the previous call to fit\
165 and add more estimators to the ensemble, otherwise, just fit a whole\
168 Weights associated with classes in the form ``{class_label: weight}``.\
169 If not given, all classes are supposed to have weight one. For\
170 multi-output problems, a list of dicts can be provided in the same\
171 order as the columns of y.\
172 The \"auto\" mode uses the values of y to automatically adjust\
173 weights inversely proportional to class frequencies in the input data.\
174 The \"subsample\" mode is the same as \"auto\" except that weights are\
175 computed based on the bootstrap sample for every tree grown.\
176 For multi-output, the weights of each column of y will be multiplied.\
177 Note that these weights will be multiplied with sample_weight (passed\
178 through the fit method) if sample_weight is specified.");
185 Log() <<
kERROR <<
" NEstimators <=0... that does not work !! "
186 <<
" I set it to 10 .. just so that the program does not crash"
192 <<
" The options are gini of entropy."
198 <<
" The options are None or integer."
201 Py_DECREF(pomax_depth);
204 Log() <<
kERROR <<
" MinSamplesSplit < 0... that does not work !! "
205 <<
" I set it to 2 .. just so that the program does not crash"
210 Log() <<
kERROR <<
" MinSamplesLeaf < 0... that does not work !! "
211 <<
" I set it to 1 .. just so that the program does not crash"
217 Log() <<
kERROR <<
" MinWeightFractionLeaf < 0... that does not work !! "
218 <<
" I set it to 0 .. just so that the program does not crash"
224 if (!pomax_features) {
226 <<
"int, float, string or None, optional (default='auto')"
227 <<
"The number of features to consider when looking for the best split:"
228 <<
"If int, then consider `max_features` features at each split."
229 <<
"If float, then `max_features` is a percentage and"
230 <<
"`int(max_features * n_features)` features are considered at each split."
231 <<
"If 'auto', then `max_features=sqrt(n_features)`."
232 <<
"If 'sqrt', then `max_features=sqrt(n_features)`."
233 <<
"If 'log2', then `max_features=log2(n_features)`."
234 <<
"If None, then `max_features=n_features`."
237 Py_DECREF(pomax_features);
240 if (!pomax_leaf_nodes) {
242 <<
" The options are None or integer."
245 Py_DECREF(pomax_leaf_nodes);
252 if (!porandom_state) {
254 <<
"If int, random_state is the seed used by the random number generator;"
255 <<
"If RandomState instance, random_state is the random number generator;"
256 <<
"If None, the random number generator is the RandomState instance used by `np.random`."
259 Py_DECREF(porandom_state);
265 if (!poclass_weight) {
267 <<
"dict, list of dicts, 'auto', 'subsample' or None, optional"
270 Py_DECREF(poclass_weight);
282 PyObject *pName = PyUnicode_FromString(
"sklearn.ensemble");
284 fModule = PyImport_Import(pName);
296 int *dims =
new int[2];
297 dims[0] = fNrowsTraining;
299 fTrainData = (PyArrayObject *)PyArray_FromDims(2, dims, NPY_FLOAT);
300 float *TrainData = (
float *)(PyArray_DATA(
fTrainData));
303 fTrainDataClasses = (PyArrayObject *)PyArray_FromDims(1, &fNrowsTraining, NPY_FLOAT);
306 fTrainDataWeights = (PyArrayObject *)PyArray_FromDims(1, &fNrowsTraining, NPY_FLOAT);
309 for (
int i = 0; i < fNrowsTraining; i++) {
311 for (
UInt_t j = 0; j < fNvars; j++) {
312 TrainData[j + i * fNvars] = e->
GetValue(j);
339 Py_DECREF(pomax_depth);
340 PyObject_Print(args, stdout, 0);
341 std::cout << std::endl;
344 PyObject *fClassifierClass = PyDict_GetItemString(pDict,
"RandomForestClassifier");
348 if (PyCallable_Check(fClassifierClass)) {
350 fClassifier = PyObject_CallObject(fClassifierClass , args);
357 Py_DECREF(fClassifierClass);
358 Log() <<
kFATAL <<
"Can't call function RandomForestClassifier" <<
Endl;
367 Log() <<
kFATAL <<
"Can't create classifier object from RandomForestClassifier" <<
Endl;
396 int *dims =
new int[2];
399 PyArrayObject *pEvent= (PyArrayObject *)PyArray_FromDims(2, dims, NPY_FLOAT);
400 float *pValue = (
float *)(PyArray_DATA(pEvent));
404 PyArrayObject *
result = (PyArrayObject *)PyObject_CallMethod(
fClassifier, const_cast<char *>(
"predict_proba"),
const_cast<char *
>(
"(O)"), pEvent);
405 double *proba = (
double *)(PyArray_DATA(result));
427 Log() <<
kFATAL <<
"Can't load RandomForestClassifier from Serialized data." <<
Endl;
442 Log() <<
"Decision Trees and Rule-Based Models " <<
Endl;
Bool_t HasAnalysisType(Types::EAnalysisType type, UInt_t numberClasses, UInt_t numberTargets)
const TString & GetWeightFileDir() const
~MethodPyRandomForest(void)
MsgLogger & Endl(MsgLogger &ml)
const Event * GetTrainingEvent(Long64_t ievt) const
OptionBase * DeclareOptionRef(T &ref, const TString &name, const TString &desc="")
static void Serialize(TString file, PyObject *classifier)
Double_t GetWeight() const
return the event weight - depending on whether the flag IgnoreNegWeightsInTraining is or not...
PyArrayObject * fTrainDataClasses
static int PyIsInitialized()
static void PyInitialize()
Float_t GetValue(UInt_t ivar) const
return value of i'th variable
const char * Data() const
Double_t min_weight_fraction_leaf
static PyObject * Eval(TString code)
PyArrayObject * fTrainDataWeights
Double_t GetMvaValue(Double_t *errLower=0, Double_t *errUpper=0)
UInt_t GetNVariables() const
accessor to the number of variables
void GetHelpMessage() const
char * Form(const char *fmt,...)
PyArrayObject * fTrainData
const Event * GetEvent() const
Describe directory structure in memory.
UInt_t GetNVariables() const
access the number of variables through the datasetinfo
ClassImp(MethodPyRandomForest) MethodPyRandomForest
#define REGISTER_METHOD(CLASS)
for example
Abstract ClassifierFactory template that handles arbitrary types.
MethodPyRandomForest(const TString &jobName, const TString &methodTitle, DataSetInfo &theData, const TString &theOption="", TDirectory *theTargetDir=NULL)
virtual void DeclareCompatibilityOptions()
options that are used ONLY for the READER to ensure backward compatibility they are hence without any...
void SetWeightFileDir(TString fileDir)
set directory of weight file
virtual void TestClassification()
initialization
Long64_t GetNTrainingEvents() const
static void UnSerialize(TString file, PyObject **obj)
virtual void TestClassification()
initialization
void NoErrorCalc(Double_t *const err, Double_t *const errUpper)