22 #define NPY_NO_DEPRECATED_API NPY_1_7_API_VERSION 23 #include <numpy/arrayobject.h> 25 #pragma GCC diagnostic ignored "-Wunused-parameter" 67 min_weight_fraction_leaf(0),
68 max_features("'auto'"),
69 max_leaf_nodes("None"),
88 min_weight_fraction_leaf(0),
89 max_features(
"'auto'"),
90 max_leaf_nodes(
"None"),
122 The function to measure the quality of a split. Supported criteria are \ 123 'gini' for the Gini impurity and 'entropy' for the information gain. \ 124 Note: this parameter is tree-specific.");
127 The maximum depth of the tree. If None, then nodes are expanded until \ 128 all leaves are pure or until all leaves contain less than \ 129 min_samples_split samples. \ 130 Ignored if ``max_leaf_nodes`` is not None.");
132 The minimum number of samples required to split an internal node.");
135 The minimum number of samples in newly created leaves. A split is \ 136 discarded if after the split, one of the leaves would contain less then \ 137 ``min_samples_leaf`` samples.");
139 The minimum weighted fraction of the input samples required to be at a \ 143 Grow trees with ``max_leaf_nodes`` in best-first fashion.\ 144 Best nodes are defined as relative reduction in impurity.\ 145 If None then unlimited number of leaf nodes.\ 146 If not None then ``max_depth`` will be ignored.");
148 Whether bootstrap samples are used when building trees.");
150 the generalization error.");
152 The number of jobs to run in parallel for both `fit` and `predict`. \ 153 If -1, then the number of jobs is set to the number of cores.");
156 If int, random_state is the seed used by the random number generator;\ 157 If RandomState instance, random_state is the random number generator;\ 158 If None, the random number generator is the RandomState instance used\ 161 Controls the verbosity of the tree building process.");
163 When set to ``True``, reuse the solution of the previous call to fit\ 164 and add more estimators to the ensemble, otherwise, just fit a whole\ 167 Weights associated with classes in the form ``{class_label: weight}``.\ 168 If not given, all classes are supposed to have weight one. For\ 169 multi-output problems, a list of dicts can be provided in the same\ 170 order as the columns of y.\ 171 The \"auto\" mode uses the values of y to automatically adjust\ 172 weights inversely proportional to class frequencies in the input data.\ 173 The \"subsample\" mode is the same as \"auto\" except that weights are\ 174 computed based on the bootstrap sample for every tree grown.\ 175 For multi-output, the weights of each column of y will be multiplied.\ 176 Note that these weights will be multiplied with sample_weight (passed\ 177 through the fit method) if sample_weight is specified.");
184 Log() <<
kERROR <<
" NEstimators <=0... that does not work !! " 185 <<
" I set it to 10 .. just so that the program does not crash" 191 <<
" The options are gini of entropy." 197 <<
" The options are None or integer." 200 Py_DECREF(pomax_depth);
203 Log() <<
kERROR <<
" MinSamplesSplit < 0... that does not work !! " 204 <<
" I set it to 2 .. just so that the program does not crash" 209 Log() <<
kERROR <<
" MinSamplesLeaf < 0... that does not work !! " 210 <<
" I set it to 1 .. just so that the program does not crash" 216 Log() <<
kERROR <<
" MinWeightFractionLeaf < 0... that does not work !! " 217 <<
" I set it to 0 .. just so that the program does not crash" 223 if (!pomax_features) {
225 <<
"int, float, string or None, optional (default='auto')" 226 <<
"The number of features to consider when looking for the best split:" 227 <<
"If int, then consider `max_features` features at each split." 228 <<
"If float, then `max_features` is a percentage and" 229 <<
"`int(max_features * n_features)` features are considered at each split." 230 <<
"If 'auto', then `max_features=sqrt(n_features)`." 231 <<
"If 'sqrt', then `max_features=sqrt(n_features)`." 232 <<
"If 'log2', then `max_features=log2(n_features)`." 233 <<
"If None, then `max_features=n_features`." 236 Py_DECREF(pomax_features);
239 if (!pomax_leaf_nodes) {
241 <<
" The options are None or integer." 244 Py_DECREF(pomax_leaf_nodes);
251 if (!porandom_state) {
253 <<
"If int, random_state is the seed used by the random number generator;" 254 <<
"If RandomState instance, random_state is the random number generator;" 255 <<
"If None, the random number generator is the RandomState instance used by `np.random`." 258 Py_DECREF(porandom_state);
264 if (!poclass_weight) {
266 <<
"dict, list of dicts, 'auto', 'subsample' or None, optional" 269 Py_DECREF(poclass_weight);
281 PyObject *pName = PyUnicode_FromString(
"sklearn.ensemble");
283 fModule = PyImport_Import(pName);
296 dims[0] = fNrowsTraining;
298 fTrainData = (PyArrayObject *)PyArray_FromDims(2, dims, NPY_FLOAT);
299 float *TrainData = (
float *)(PyArray_DATA(
fTrainData));
302 fTrainDataClasses = (PyArrayObject *)PyArray_FromDims(1, &fNrowsTraining, NPY_FLOAT);
305 fTrainDataWeights = (PyArrayObject *)PyArray_FromDims(1, &fNrowsTraining, NPY_FLOAT);
308 for (
int i = 0; i < fNrowsTraining; i++) {
310 for (
UInt_t j = 0; j < fNvars; j++) {
311 TrainData[j + i * fNvars] = e->
GetValue(j);
336 Py_DECREF(pomax_depth);
337 PyObject_Print(args, stdout, 0);
338 std::cout << std::endl;
341 PyObject *fClassifierClass = PyDict_GetItemString(pDict,
"RandomForestClassifier");
345 if (PyCallable_Check(fClassifierClass)) {
347 fClassifier = PyObject_CallObject(fClassifierClass , args);
354 Py_DECREF(fClassifierClass);
355 Log() <<
kFATAL <<
"Can't call function RandomForestClassifier" <<
Endl;
364 Log() <<
kFATAL <<
"Can't create classifier object from RandomForestClassifier" <<
Endl;
398 PyArrayObject *pEvent= (PyArrayObject *)PyArray_FromDims(2, dims, NPY_FLOAT);
399 float *pValue = (
float *)(PyArray_DATA(pEvent));
403 PyArrayObject *
result = (PyArrayObject *)PyObject_CallMethod(
fClassifier, const_cast<char *>(
"predict_proba"),
const_cast<char *
>(
"(O)"), pEvent);
404 double *proba = (
double *)(PyArray_DATA(result));
425 Log() <<
kFATAL <<
"Can't load RandomForestClassifier from Serialized data." <<
Endl;
440 Log() <<
"Decision Trees and Rule-Based Models " <<
Endl;
Bool_t HasAnalysisType(Types::EAnalysisType type, UInt_t numberClasses, UInt_t numberTargets)
const TString & GetWeightFileDir() const
~MethodPyRandomForest(void)
MsgLogger & Endl(MsgLogger &ml)
#define REGISTER_METHOD(CLASS)
for example
const Event * GetTrainingEvent(Long64_t ievt) const
OptionBase * DeclareOptionRef(T &ref, const TString &name, const TString &desc="")
static void Serialize(TString file, PyObject *classifier)
Double_t GetWeight() const
return the event weight - depending on whether the flag IgnoreNegWeightsInTraining is or not...
PyArrayObject * fTrainDataClasses
static int PyIsInitialized()
static void PyInitialize()
Float_t GetValue(UInt_t ivar) const
return value of i'th variable
const char * Data() const
Double_t min_weight_fraction_leaf
static PyObject * Eval(TString code)
PyArrayObject * fTrainDataWeights
Double_t GetMvaValue(Double_t *errLower=0, Double_t *errUpper=0)
UInt_t GetNVariables() const
accessor to the number of variables
MethodPyRandomForest(const TString &jobName, const TString &methodTitle, DataSetInfo &theData, const TString &theOption="")
void GetHelpMessage() const
char * Form(const char *fmt,...)
PyArrayObject * fTrainData
const Event * GetEvent() const
UInt_t GetNVariables() const
access the number of variables through the datasetinfo
you should not use this method at all Int_t Int_t Double_t Double_t Double_t e
Abstract ClassifierFactory template that handles arbitrary types.
virtual void DeclareCompatibilityOptions()
options that are used ONLY for the READER to ensure backward compatibility they are hence without any...
virtual void TestClassification()
initialization
Long64_t GetNTrainingEvents() const
static void UnSerialize(TString file, PyObject **obj)
virtual void TestClassification()
initialization
void NoErrorCalc(Double_t *const err, Double_t *const errUpper)
Bool_t IsModelPersistence()