23 #define NPY_NO_DEPRECATED_API NPY_1_7_API_VERSION 24 #include <numpy/arrayobject.h> 26 #pragma GCC diagnostic ignored "-Wunused-parameter" 60 const TString &methodTitle,
62 const TString &theOption) :
70 min_weight_fraction_leaf(0.0),
76 max_leaf_nodes("None"),
90 min_weight_fraction_leaf(0.0),
96 max_leaf_nodes(
"None"),
121 loss function to be optimized. 'deviance' refers to\ 122 deviance (= logistic regression) for classification\ 123 with probabilistic outputs. For loss 'exponential' gradient\ 124 boosting recovers the AdaBoost algorithm.");
127 learning rate shrinks the contribution of each tree by `learning_rate`.\ 128 There is a trade-off between learning_rate and n_estimators.");
131 The number of boosting stages to perform. Gradient boosting\ 132 is fairly robust to over-fitting so a large number usually\ 133 results in better performance.");
136 The fraction of samples to be used for fitting the individual base\ 137 learners. If smaller than 1.0 this results in Stochastic Gradient\ 138 Boosting. `subsample` interacts with the parameter `n_estimators`.\ 139 Choosing `subsample < 1.0` leads to a reduction of variance\ 140 and an increase in bias.");
143 The minimum number of samples required to split an internal node.");
146 The minimum number of samples in newly created leaves. A split is \ 147 discarded if after the split, one of the leaves would contain less then \ 148 ``min_samples_leaf`` samples.");
151 The minimum weighted fraction of the input samples required to be at a \ 155 The maximum depth of the tree. If None, then nodes are expanded until \ 156 all leaves are pure or until all leaves contain less than \ 157 min_samples_split samples. \ 158 Ignored if ``max_leaf_nodes`` is not None.");
161 An estimator object that is used to compute the initial\ 162 predictions. ``init`` has to provide ``fit`` and ``predict``.\ 163 If None it uses ``loss.init_estimator`");
166 If int, random_state is the seed used by the random number generator;\ 167 If RandomState instance, random_state is the random number generator;\ 168 If None, the random number generator is the RandomState instance used\ 172 Controls the verbosity of the tree building process.");
174 Grow trees with ``max_leaf_nodes`` in best-first fashion.\ 175 Best nodes are defined as relative reduction in impurity.\ 176 If None then unlimited number of leaf nodes.\ 177 If not None then ``max_depth`` will be ignored.");
179 When set to ``True``, reuse the solution of the previous call to fit\ 180 and add more estimators to the ensemble, otherwise, just fit a whole\ 191 if (
loss !=
"deviance" &&
loss !=
"exponential") {
193 <<
" The options are deviance of exponential." 198 Log() <<
kERROR <<
" LearningRate <=0... that does not work !! " 199 <<
" I set it to 0.1 .. just so that the program does not crash" 204 Log() <<
kERROR <<
" NEstimators <=0... that does not work !! " 205 <<
" I set it to 100 .. just so that the program does not crash" 210 Log() <<
kERROR <<
" MinSamplesSplit <0... that does not work !! " 211 <<
" I set it to 2 .. just so that the program does not crash" 216 Log() <<
kERROR <<
" Subsample <0... that does not work !! " 217 <<
" I set it to 1.0 .. just so that the program does not crash" 223 Log() <<
kERROR <<
" MinSamplesLeaf <0... that does not work !! " 224 <<
" I set it to 1.0 .. just so that the program does not crash" 230 Log() <<
kERROR <<
" MinSamplesLeaf <0... that does not work !! " 231 <<
" I set it to 1.0 .. just so that the program does not crash" 237 Log() <<
kERROR <<
" MinWeightFractionLeaf <0... that does not work !! " 238 <<
" I set it to 0.0 .. just so that the program does not crash" 244 Log() <<
kERROR <<
" MaxDepth <0... that does not work !! " 245 <<
" I set it to 3 .. just so that the program does not crash" 253 <<
" The options are None or BaseEstimator. An estimator object that is used to compute the initial" 254 <<
" predictions. ``init`` has to provide ``fit`` and ``predict``." 255 <<
" If None it uses ``loss.init_estimator``." 261 if (!porandom_state) {
263 <<
"If int, random_state is the seed used by the random number generator;" 264 <<
"If RandomState instance, random_state is the random number generator;" 265 <<
"If None, the random number generator is the RandomState instance used by `np.random`." 268 Py_DECREF(porandom_state);
272 if (!pomax_features) {
274 <<
"int, float, string or None, optional (default='auto')" 275 <<
"The number of features to consider when looking for the best split:" 276 <<
"If int, then consider `max_features` features at each split." 277 <<
"If float, then `max_features` is a percentage and" 278 <<
"`int(max_features * n_features)` features are considered at each split." 279 <<
"If 'auto', then `max_features=sqrt(n_features)`." 280 <<
"If 'sqrt', then `max_features=sqrt(n_features)`." 281 <<
"If 'log2', then `max_features=log2(n_features)`." 282 <<
"If None, then `max_features=n_features`." 285 Py_DECREF(pomax_features);
289 if (!pomax_leaf_nodes) {
291 <<
" The options are None or integer." 294 Py_DECREF(pomax_leaf_nodes);
307 PyObject *pName = PyUnicode_FromString(
"sklearn.ensemble");
309 fModule = PyImport_Import(pName);
321 int *dims =
new int[2];
322 dims[0] = fNrowsTraining;
324 fTrainData = (PyArrayObject *)PyArray_FromDims(2, dims, NPY_FLOAT);
325 float *TrainData = (
float *)(PyArray_DATA(
fTrainData));
328 fTrainDataClasses = (PyArrayObject *)PyArray_FromDims(1, &fNrowsTraining, NPY_FLOAT);
331 fTrainDataWeights = (PyArrayObject *)PyArray_FromDims(1, &fNrowsTraining, NPY_FLOAT);
334 for (
int i = 0; i < fNrowsTraining; i++) {
336 for (
UInt_t j = 0; j < fNvars; j++) {
337 TrainData[j + i * fNvars] = e->
GetValue(j);
355 PyObject *args = Py_BuildValue(
"(sfifiifiOOOiOi)",
loss.Data(), \
357 max_depth, poinit, porandom_state, pomax_features,
verbose, pomax_leaf_nodes,
warm_start);
359 PyObject_Print(args, stdout, 0);
360 std::cout << std::endl;
363 PyObject *fClassifierClass = PyDict_GetItemString(pDict,
"GradientBoostingClassifier");
366 if (PyCallable_Check(fClassifierClass)) {
368 fClassifier = PyObject_CallObject(fClassifierClass , args);
370 std::cout << std::endl;
373 Py_DECREF(porandom_state);
374 Py_DECREF(pomax_features);
375 Py_DECREF(pomax_leaf_nodes);
380 Py_DECREF(porandom_state);
381 Py_DECREF(pomax_features);
382 Py_DECREF(pomax_leaf_nodes);
385 Py_DECREF(fClassifierClass);
386 Log() <<
kFATAL <<
"Can't call function GradientBoostingClassifier" <<
Endl;
428 PyArrayObject *pEvent= (PyArrayObject *)PyArray_FromDims(2, dims, NPY_FLOAT);
429 float *pValue = (
float *)(PyArray_DATA(pEvent));
433 PyArrayObject *
result = (PyArrayObject *)PyObject_CallMethod(
fClassifier, const_cast<char *>(
"predict_proba"),
const_cast<char *
>(
"(O)"), pEvent);
434 double *proba = (
double *)(PyArray_DATA(result));
468 Log() <<
"Decision Trees and Rule-Based Models " <<
Endl;
MsgLogger & Endl(MsgLogger &ml)
Double_t GetMvaValue(Double_t *errLower=0, Double_t *errUpper=0)
OptionBase * DeclareOptionRef(T &ref, const TString &name, const TString &desc="")
UInt_t GetNVariables() const
access the number of variables through the datasetinfo
Double_t min_weight_fraction_leaf
static void Serialize(TString file, PyObject *classifier)
PyArrayObject * fTrainDataClasses
static int PyIsInitialized()
static void PyInitialize()
const TString & GetWeightFileDir() const
void GetHelpMessage() const
static PyObject * Eval(TString code)
PyArrayObject * fTrainDataWeights
Double_t GetWeight() const
return the event weight - depending on whether the flag IgnoreNegWeightsInTraining is or not...
Long64_t GetNTrainingEvents() const
const Event * GetTrainingEvent(Long64_t ievt) const
Bool_t HasAnalysisType(Types::EAnalysisType type, UInt_t numberClasses, UInt_t numberTargets)
char * Form(const char *fmt,...)
PyArrayObject * fTrainData
UInt_t GetNVariables() const
accessor to the number of variables
Float_t GetValue(UInt_t ivar) const
return value of i'th variable
virtual void ReadModelFromFile()
you should not use this method at all Int_t Int_t Double_t Double_t Double_t e
#define REGISTER_METHOD(CLASS)
for example
Abstract ClassifierFactory template that handles arbitrary types.
virtual void DeclareCompatibilityOptions()
options that are used ONLY for the READER to ensure backward compatibility they are hence without any...
static void UnSerialize(TString file, PyObject **obj)
MethodPyGTB(const TString &jobName, const TString &methodTitle, DataSetInfo &theData, const TString &theOption="")
virtual void TestClassification()
initialization
virtual void TestClassification()
initialization
const Event * GetEvent() const
void NoErrorCalc(Double_t *const err, Double_t *const errUpper)
Bool_t IsModelPersistence()