20 #pragma GCC diagnostic ignored "-Wunused-parameter"
25 #define NPY_NO_DEPRECATED_API NPY_1_7_API_VERSION
26 #include <numpy/arrayobject.h>
66 min_weight_fraction_leaf(0.0),
72 max_leaf_nodes("None"),
76 SetWeightFileDir(
gConfig().GetIONames().fWeightFileDir);
89 min_weight_fraction_leaf(0.0),
95 max_leaf_nodes(
"None"),
121 loss function to be optimized. 'deviance' refers to\
122 deviance (= logistic regression) for classification\
123 with probabilistic outputs. For loss 'exponential' gradient\
124 boosting recovers the AdaBoost algorithm.");
127 learning rate shrinks the contribution of each tree by `learning_rate`.\
128 There is a trade-off between learning_rate and n_estimators.");
131 The number of boosting stages to perform. Gradient boosting\
132 is fairly robust to over-fitting so a large number usually\
133 results in better performance.");
136 The fraction of samples to be used for fitting the individual base\
137 learners. If smaller than 1.0 this results in Stochastic Gradient\
138 Boosting. `subsample` interacts with the parameter `n_estimators`.\
139 Choosing `subsample < 1.0` leads to a reduction of variance\
140 and an increase in bias.");
143 The minimum number of samples required to split an internal node.");
146 The minimum number of samples in newly created leaves. A split is \
147 discarded if after the split, one of the leaves would contain less then \
148 ``min_samples_leaf`` samples.");
151 The minimum weighted fraction of the input samples required to be at a \
155 The maximum depth of the tree. If None, then nodes are expanded until \
156 all leaves are pure or until all leaves contain less than \
157 min_samples_split samples. \
158 Ignored if ``max_leaf_nodes`` is not None.");
161 An estimator object that is used to compute the initial\
162 predictions. ``init`` has to provide ``fit`` and ``predict``.\
163 If None it uses ``loss.init_estimator`");
166 If int, random_state is the seed used by the random number generator;\
167 If RandomState instance, random_state is the random number generator;\
168 If None, the random number generator is the RandomState instance used\
172 Controls the verbosity of the tree building process.");
174 Grow trees with ``max_leaf_nodes`` in best-first fashion.\
175 Best nodes are defined as relative reduction in impurity.\
176 If None then unlimited number of leaf nodes.\
177 If not None then ``max_depth`` will be ignored.");
179 When set to ``True``, reuse the solution of the previous call to fit\
180 and add more estimators to the ensemble, otherwise, just fit a whole\
191 if (
loss !=
"deviance" &&
loss !=
"exponential") {
193 <<
" The options are deviance of exponential."
198 Log() <<
kERROR <<
" LearningRate <=0... that does not work !! "
199 <<
" I set it to 0.1 .. just so that the program does not crash"
204 Log() <<
kERROR <<
" NEstimators <=0... that does not work !! "
205 <<
" I set it to 100 .. just so that the program does not crash"
210 Log() <<
kERROR <<
" MinSamplesSplit <0... that does not work !! "
211 <<
" I set it to 2 .. just so that the program does not crash"
216 Log() <<
kERROR <<
" Subsample <0... that does not work !! "
217 <<
" I set it to 1.0 .. just so that the program does not crash"
223 Log() <<
kERROR <<
" MinSamplesLeaf <0... that does not work !! "
224 <<
" I set it to 1.0 .. just so that the program does not crash"
230 Log() <<
kERROR <<
" MinSamplesLeaf <0... that does not work !! "
231 <<
" I set it to 1.0 .. just so that the program does not crash"
237 Log() <<
kERROR <<
" MinWeightFractionLeaf <0... that does not work !! "
238 <<
" I set it to 0.0 .. just so that the program does not crash"
244 Log() <<
kERROR <<
" MaxDepth <0... that does not work !! "
245 <<
" I set it to 3 .. just so that the program does not crash"
253 <<
" The options are None or BaseEstimator. An estimator object that is used to compute the initial"
254 <<
" predictions. ``init`` has to provide ``fit`` and ``predict``."
255 <<
" If None it uses ``loss.init_estimator``."
261 if (!porandom_state) {
263 <<
"If int, random_state is the seed used by the random number generator;"
264 <<
"If RandomState instance, random_state is the random number generator;"
265 <<
"If None, the random number generator is the RandomState instance used by `np.random`."
268 Py_DECREF(porandom_state);
272 if (!pomax_features) {
274 <<
"int, float, string or None, optional (default='auto')"
275 <<
"The number of features to consider when looking for the best split:"
276 <<
"If int, then consider `max_features` features at each split."
277 <<
"If float, then `max_features` is a percentage and"
278 <<
"`int(max_features * n_features)` features are considered at each split."
279 <<
"If 'auto', then `max_features=sqrt(n_features)`."
280 <<
"If 'sqrt', then `max_features=sqrt(n_features)`."
281 <<
"If 'log2', then `max_features=log2(n_features)`."
282 <<
"If None, then `max_features=n_features`."
285 Py_DECREF(pomax_features);
289 if (!pomax_leaf_nodes) {
291 <<
" The options are None or integer."
294 Py_DECREF(pomax_leaf_nodes);
307 PyObject *pName = PyUnicode_FromString(
"sklearn.ensemble");
309 fModule = PyImport_Import(pName);
321 int *dims =
new int[2];
322 dims[0] = fNrowsTraining;
324 fTrainData = (PyArrayObject *)PyArray_FromDims(2, dims, NPY_FLOAT);
325 float *TrainData = (
float *)(PyArray_DATA(
fTrainData));
328 fTrainDataClasses = (PyArrayObject *)PyArray_FromDims(1, &fNrowsTraining, NPY_FLOAT);
331 fTrainDataWeights = (PyArrayObject *)PyArray_FromDims(1, &fNrowsTraining, NPY_FLOAT);
334 for (
int i = 0; i < fNrowsTraining; i++) {
336 for (
UInt_t j = 0; j < fNvars; j++) {
337 TrainData[j + i * fNvars] = e->
GetValue(j);
374 PyObject_Print(args, stdout, 0);
375 std::cout << std::endl;
378 PyObject *fClassifierClass = PyDict_GetItemString(pDict,
"GradientBoostingClassifier");
381 if (PyCallable_Check(fClassifierClass)) {
383 fClassifier = PyObject_CallObject(fClassifierClass , args);
385 std::cout << std::endl;
388 Py_DECREF(porandom_state);
389 Py_DECREF(pomax_features);
390 Py_DECREF(pomax_leaf_nodes);
395 Py_DECREF(porandom_state);
396 Py_DECREF(pomax_features);
397 Py_DECREF(pomax_leaf_nodes);
400 Py_DECREF(fClassifierClass);
401 Log() <<
kFATAL <<
"Can't call function GradientBoostingClassifier" <<
Endl;
438 int *dims =
new int[2];
441 PyArrayObject *pEvent= (PyArrayObject *)PyArray_FromDims(2, dims, NPY_FLOAT);
442 float *pValue = (
float *)(PyArray_DATA(pEvent));
446 PyArrayObject *
result = (PyArrayObject *)PyObject_CallMethod(
fClassifier, const_cast<char *>(
"predict_proba"),
const_cast<char *
>(
"(O)"), pEvent);
447 double *proba = (
double *)(PyArray_DATA(result));
479 Log() <<
"Decision Trees and Rule-Based Models " <<
Endl;
const TString & GetWeightFileDir() const
MsgLogger & Endl(MsgLogger &ml)
Double_t GetMvaValue(Double_t *errLower=0, Double_t *errUpper=0)
const Event * GetTrainingEvent(Long64_t ievt) const
OptionBase * DeclareOptionRef(T &ref, const TString &name, const TString &desc="")
ClassImp(MethodPyGTB) MethodPyGTB
Double_t min_weight_fraction_leaf
static void Serialize(TString file, PyObject *classifier)
Double_t GetWeight() const
return the event weight - depending on whether the flag IgnoreNegWeightsInTraining is or not...
PyArrayObject * fTrainDataClasses
static int PyIsInitialized()
static void PyInitialize()
Float_t GetValue(UInt_t ivar) const
return value of i'th variable
const char * Data() const
static PyObject * Eval(TString code)
PyArrayObject * fTrainDataWeights
void GetHelpMessage() const
UInt_t GetNVariables() const
accessor to the number of variables
Bool_t HasAnalysisType(Types::EAnalysisType type, UInt_t numberClasses, UInt_t numberTargets)
char * Form(const char *fmt,...)
PyArrayObject * fTrainData
const Event * GetEvent() const
MethodPyGTB(const TString &jobName, const TString &methodTitle, DataSetInfo &theData, const TString &theOption="", TDirectory *theTargetDir=NULL)
Describe directory structure in memory.
UInt_t GetNVariables() const
access the number of variables through the datasetinfo
#define REGISTER_METHOD(CLASS)
for example
Abstract ClassifierFactory template that handles arbitrary types.
virtual void DeclareCompatibilityOptions()
options that are used ONLY for the READER to ensure backward compatibility they are hence without any...
void SetWeightFileDir(TString fileDir)
set directory of weight file
Long64_t GetNTrainingEvents() const
static void UnSerialize(TString file, PyObject **obj)
virtual void TestClassification()
initialization
virtual void TestClassification()
initialization
void NoErrorCalc(Double_t *const err, Double_t *const errUpper)