20 #pragma GCC diagnostic ignored "-Wunused-parameter"
25 #define NPY_NO_DEPRECATED_API NPY_1_7_API_VERSION
26 #include <numpy/arrayobject.h>
66 min_weight_fraction_leaf(0.0),
72 max_leaf_nodes("None"),
76 SetWeightFileDir(
gConfig().GetIONames().fWeightFileDir);
89 min_weight_fraction_leaf(0.0),
95 max_leaf_nodes(
"None"),
121 loss function to be optimized. 'deviance' refers to\
122 deviance (= logistic regression) for classification\
123 with probabilistic outputs. For loss 'exponential' gradient\
124 boosting recovers the AdaBoost algorithm.");
127 learning rate shrinks the contribution of each tree by `learning_rate`.\
128 There is a trade-off between learning_rate and n_estimators.");
131 The number of boosting stages to perform. Gradient boosting\
132 is fairly robust to over-fitting so a large number usually\
133 results in better performance.");
136 The fraction of samples to be used for fitting the individual base\
137 learners. If smaller than 1.0 this results in Stochastic Gradient\
138 Boosting. `subsample` interacts with the parameter `n_estimators`.\
139 Choosing `subsample < 1.0` leads to a reduction of variance\
140 and an increase in bias.");
143 The minimum number of samples required to split an internal node.");
146 The minimum number of samples in newly created leaves. A split is \
147 discarded if after the split, one of the leaves would contain less then \
148 ``min_samples_leaf`` samples.");
151 The minimum weighted fraction of the input samples required to be at a \
155 The maximum depth of the tree. If None, then nodes are expanded until \
156 all leaves are pure or until all leaves contain less than \
157 min_samples_split samples. \
158 Ignored if ``max_leaf_nodes`` is not None.");
161 An estimator object that is used to compute the initial\
162 predictions. ``init`` has to provide ``fit`` and ``predict``.\
163 If None it uses ``loss.init_estimator`");
166 If int, random_state is the seed used by the random number generator;\
167 If RandomState instance, random_state is the random number generator;\
168 If None, the random number generator is the RandomState instance used\
172 Controls the verbosity of the tree building process.");
174 Grow trees with ``max_leaf_nodes`` in best-first fashion.\
175 Best nodes are defined as relative reduction in impurity.\
176 If None then unlimited number of leaf nodes.\
177 If not None then ``max_depth`` will be ignored.");
179 When set to ``True``, reuse the solution of the previous call to fit\
180 and add more estimators to the ensemble, otherwise, just fit a whole\
191 if (
loss !=
"deviance" &&
loss !=
"exponential") {
193 <<
" The options are deviance of exponential."
198 Log() <<
kERROR <<
" LearningRate <=0... that does not work !! "
199 <<
" I set it to 0.1 .. just so that the program does not crash"
204 Log() <<
kERROR <<
" NEstimators <=0... that does not work !! "
205 <<
" I set it to 100 .. just so that the program does not crash"
210 Log() <<
kERROR <<
" MinSamplesSplit <0... that does not work !! "
211 <<
" I set it to 2 .. just so that the program does not crash"
216 Log() <<
kERROR <<
" Subsample <0... that does not work !! "
217 <<
" I set it to 1.0 .. just so that the program does not crash"
223 Log() <<
kERROR <<
" MinSamplesLeaf <0... that does not work !! "
224 <<
" I set it to 1.0 .. just so that the program does not crash"
230 Log() <<
kERROR <<
" MinSamplesLeaf <0... that does not work !! "
231 <<
" I set it to 1.0 .. just so that the program does not crash"
237 Log() <<
kERROR <<
" MinWeightFractionLeaf <0... that does not work !! "
238 <<
" I set it to 0.0 .. just so that the program does not crash"
244 Log() <<
kERROR <<
" MaxDepth <0... that does not work !! "
245 <<
" I set it to 3 .. just so that the program does not crash"
253 <<
" The options are None or BaseEstimator. An estimator object that is used to compute the initial"
254 <<
" predictions. ``init`` has to provide ``fit`` and ``predict``."
255 <<
" If None it uses ``loss.init_estimator``."
261 if (!porandom_state) {
263 <<
"If int, random_state is the seed used by the random number generator;"
264 <<
"If RandomState instance, random_state is the random number generator;"
265 <<
"If None, the random number generator is the RandomState instance used by `np.random`."
268 Py_DECREF(porandom_state);
272 if (!pomax_features) {
274 <<
"int, float, string or None, optional (default='auto')"
275 <<
"The number of features to consider when looking for the best split:"
276 <<
"If int, then consider `max_features` features at each split."
277 <<
"If float, then `max_features` is a percentage and"
278 <<
"`int(max_features * n_features)` features are considered at each split."
279 <<
"If 'auto', then `max_features=sqrt(n_features)`."
280 <<
"If 'sqrt', then `max_features=sqrt(n_features)`."
281 <<
"If 'log2', then `max_features=log2(n_features)`."
282 <<
"If None, then `max_features=n_features`."
285 Py_DECREF(pomax_features);
289 if (!pomax_leaf_nodes) {
291 <<
" The options are None or integer."
294 Py_DECREF(pomax_leaf_nodes);
309 PyObject *pName = PyString_FromString(
"sklearn.ensemble");
311 fModule = PyImport_Import(pName);
323 int *dims =
new int[2];
324 dims[0] = fNrowsTraining;
326 fTrainData = (PyArrayObject *)PyArray_FromDims(2, dims, NPY_FLOAT);
327 float *TrainData = (
float *)(PyArray_DATA(
fTrainData));
330 fTrainDataClasses = (PyArrayObject *)PyArray_FromDims(1, &fNrowsTraining, NPY_FLOAT);
333 fTrainDataWeights = (PyArrayObject *)PyArray_FromDims(1, &fNrowsTraining, NPY_FLOAT);
336 for (
int i = 0; i < fNrowsTraining; i++) {
338 for (
UInt_t j = 0; j < fNvars; j++) {
339 TrainData[j + i * fNvars] = e->
GetValue(j);
376 PyObject_Print(args, stdout, 0);
377 std::cout << std::endl;
380 PyObject *fClassifierClass = PyDict_GetItemString(pDict,
"GradientBoostingClassifier");
383 if (PyCallable_Check(fClassifierClass)) {
385 fClassifier = PyObject_CallObject(fClassifierClass , args);
387 std::cout << std::endl;
390 Py_DECREF(porandom_state);
391 Py_DECREF(pomax_features);
392 Py_DECREF(pomax_leaf_nodes);
397 Py_DECREF(porandom_state);
398 Py_DECREF(pomax_features);
399 Py_DECREF(pomax_leaf_nodes);
402 Py_DECREF(fClassifierClass);
403 Log() <<
kFATAL <<
"Can't call function GradientBoostingClassifier" <<
Endl;
421 std::ofstream PyData;
422 PyData.open(path.
Data());
423 PyData << PyString_AsString(model_data);
425 Py_DECREF(model_arg);
426 Py_DECREF(model_data);
447 PyObject *pEvent = PyTuple_New(nvars);
448 for (
UInt_t i = 0; i < nvars; i++) {
456 PyTuple_SetItem(pEvent, i, pValue);
458 PyArrayObject *
result = (PyArrayObject *)PyObject_CallMethod(
fClassifier, (
char *)
"predict_proba", (
char *)
"(O)", pEvent);
459 double *proba = (
double *)(PyArray_DATA(result));
477 std::ifstream PyData;
478 std::stringstream PyDataStream;
479 std::string PyDataString;
481 PyData.open(path.
Data());
482 PyDataStream << PyData.rdbuf();
483 PyDataString = PyDataStream.str();
489 PyObject *model_arg = Py_BuildValue(
"(s)", PyDataString.c_str());
493 Py_DECREF(model_arg);
506 Log() <<
"Decision Trees and Rule-Based Models " <<
Endl;
const TString & GetWeightFileDir() const
MsgLogger & Endl(MsgLogger &ml)
Double_t GetMvaValue(Double_t *errLower=0, Double_t *errUpper=0)
const Event * GetTrainingEvent(Long64_t ievt) const
OptionBase * DeclareOptionRef(T &ref, const TString &name, const TString &desc="")
ClassImp(MethodPyGTB) MethodPyGTB
Double_t GetWeight() const
return the event weight - depending on whether the flag IgnoreNegWeightsInTraining is or not...
Double_t min_weight_fraction_leaf
PyArrayObject * fTrainDataClasses
static int PyIsInitialized()
static void PyInitialize()
const char * Data() const
static PyObject * Eval(TString code)
PyArrayObject * fTrainDataWeights
UInt_t GetNVariables() const
accessor to the number of variables
void GetHelpMessage() const
Bool_t HasAnalysisType(Types::EAnalysisType type, UInt_t numberClasses, UInt_t numberTargets)
char * Form(const char *fmt,...)
PyArrayObject * fTrainData
const Event * GetEvent() const
MethodPyGTB(const TString &jobName, const TString &methodTitle, DataSetInfo &theData, const TString &theOption="", TDirectory *theTargetDir=NULL)
Describe directory structure in memory.
Float_t GetValue(UInt_t ivar) const
return value of i'th variable
UInt_t GetNVariables() const
access the number of variables through the datasetinfo
#define REGISTER_METHOD(CLASS)
for example
static PyObject * fPickleLoads
static PyObject * fPickleDumps
virtual void DeclareCompatibilityOptions()
options that are used ONLY for the READER to ensure backward compatibility they are hence without any...
void SetWeightFileDir(TString fileDir)
set directory of weight file
Long64_t GetNTrainingEvents() const
virtual void TestClassification()
initialization
virtual void TestClassification()
initialization
void NoErrorCalc(Double_t *const err, Double_t *const errUpper)