23#define NPY_NO_DEPRECATED_API NPY_1_7_API_VERSION
24#include <numpy/arrayobject.h>
105 The base estimator from which the boosted ensemble is built.\
106 Support for sample weighting is required, as well as proper `classes_`\
107 and `n_classes_` attributes.");
110 The maximum number of estimators at which boosting is terminated.\
111 In case of perfect fit, the learning procedure is stopped early.");
114 Learning rate shrinks the contribution of each classifier by\
115 ``learning_rate``. There is a trade-off between ``learning_rate`` and\
119 If 'SAMME.R' then use the SAMME.R real boosting algorithm.\
120 ``base_estimator`` must support calculation of class probabilities.\
121 If 'SAMME' then use the SAMME discrete boosting algorithm.\
122 The SAMME.R algorithm typically converges faster than SAMME,\
123 achieving a lower test error with fewer boosting iterations.\
124 'SAME.R' is deprecated since version 1.4 and removed since 1.6");
127 If int, random_state is the seed used by the random number generator;\
128 If RandomState instance, random_state is the random number generator;\
129 If None, the random number generator is the RandomState instance used\
133 "Store trained classifier in this file");
143 <<
" The options are Object or None." <<
Endl;
148 Log() << kFATAL <<
"NEstimators <=0 ... that does not work!" <<
Endl;
154 Log() << kFATAL <<
"LearningRate <=0 ... that does not work!" <<
Endl;
161 Log() << kFATAL <<
Form(
"Algorithm = %s ... that does not work!",
fAlgorithm.Data())
162 <<
" The only options is SAMME " <<
Endl;
164 Log() << kWARNING <<
Form(
"Algorithm = %s is deprecated for scikit versions > 1.5 - use SAMME",
fAlgorithm.Data()) <<
Endl;
170 Log() << kFATAL <<
Form(
" RandomState = %s... that does not work !! ",
fRandomState.Data())
171 <<
"If int, random_state is the seed used by the random number generator;"
172 <<
"If RandomState instance, random_state is the random number generator;"
173 <<
"If None, the random number generator is the RandomState instance used by `np.random`." <<
Endl;
205 npy_intp dimsData[2];
206 dimsData[0] = fNrowsTraining;
208 PyArrayObject * fTrainData = (PyArrayObject *)PyArray_SimpleNew(2, dimsData, NPY_FLOAT);
210 float *TrainData = (
float *)(PyArray_DATA(fTrainData));
212 npy_intp dimsClasses = (npy_intp) fNrowsTraining;
213 PyArrayObject * fTrainDataClasses = (PyArrayObject *)PyArray_SimpleNew(1, &dimsClasses, NPY_FLOAT);
214 PyDict_SetItemString(
fLocalNS,
"trainDataClasses", (
PyObject*)fTrainDataClasses);
215 float *TrainDataClasses = (
float *)(PyArray_DATA(fTrainDataClasses));
217 PyArrayObject * fTrainDataWeights = (PyArrayObject *)PyArray_SimpleNew(1, &dimsClasses, NPY_FLOAT);
218 PyDict_SetItemString(
fLocalNS,
"trainDataWeights", (
PyObject*)fTrainDataWeights);
219 float *TrainDataWeights = (
float *)(PyArray_DATA(fTrainDataWeights));
221 for (
int i = 0; i < fNrowsTraining; i++) {
224 for (UInt_t j = 0; j <
fNvars; j++) {
225 TrainData[j + i *
fNvars] =
e->GetValue(j);
229 TrainDataClasses[i] =
e->GetClass();
232 TrainDataWeights[i] =
e->GetWeight();
236 PyRunString(
"classifier = sklearn.ensemble.AdaBoostClassifier(estimator=baseEstimator, n_estimators=nEstimators, learning_rate=learningRate, random_state=randomState)",
237 "Failed to setup classifier");
241 PyRunString(
"dump = classifier.fit(trainData, trainDataClasses, trainDataWeights)",
"Failed to train classifier");
246 Log() << kFATAL <<
"Can't create classifier object from AdaBoostClassifier" <<
Endl;
272 if (firstEvt > lastEvt || lastEvt > nEvents) lastEvt = nEvents;
273 if (firstEvt < 0) firstEvt = 0;
274 nEvents = lastEvt-firstEvt;
280 PyArrayObject *pEvent= (PyArrayObject *)PyArray_SimpleNew(2, dims, NPY_FLOAT);
281 float *pValue = (
float *)(PyArray_DATA(pEvent));
283 for (
Int_t ievt=0; ievt<nEvents; ievt++) {
286 for (UInt_t i = 0; i <
fNvars; i++) {
287 pValue[ievt *
fNvars + i] =
e->GetValue(i);
292 PyArrayObject *result = (PyArrayObject *)PyObject_CallMethod(
fClassifier,
const_cast<char *
>(
"predict_proba"),
const_cast<char *
>(
"(O)"), pEvent);
293 double *proba = (
double *)(PyArray_DATA(result));
297 for (
int i = 0; i < nEvents; ++i) {
327 PyArrayObject *pEvent= (PyArrayObject *)PyArray_SimpleNew(2, dims, NPY_FLOAT);
328 float *pValue = (
float *)(PyArray_DATA(pEvent));
329 for (UInt_t i = 0; i <
fNvars; i++) pValue[i] =
e->GetValue(i);
332 PyArrayObject *result = (PyArrayObject *)PyObject_CallMethod(
fClassifier,
const_cast<char *
>(
"predict_proba"),
const_cast<char *
>(
"(O)"), pEvent);
333 double *proba = (
double *)(PyArray_DATA(result));
356 PyArrayObject *pEvent= (PyArrayObject *)PyArray_SimpleNew(2, dims, NPY_FLOAT);
357 float *pValue = (
float *)(PyArray_DATA(pEvent));
358 for (UInt_t i = 0; i <
fNvars; i++) pValue[i] =
e->GetValue(i);
361 PyArrayObject *result = (PyArrayObject *)PyObject_CallMethod(
fClassifier,
const_cast<char *
>(
"predict_proba"),
const_cast<char *
>(
"(O)"), pEvent);
362 double *proba = (
double *)(PyArray_DATA(result));
403 PyArrayObject* pRanking = (PyArrayObject*) PyObject_GetAttrString(
fClassifier,
"feature_importances_");
406 if(pRanking == 0)
return NULL;
411 for(UInt_t iVar=0; iVar<
fNvars; iVar++){
425 Log() <<
"An AdaBoost classifier is a meta-estimator that begins by fitting" <<
Endl;
426 Log() <<
"a classifier on the original dataset and then fits additional copies" <<
Endl;
427 Log() <<
"of the classifier on the same dataset but where the weights of incorrectly" <<
Endl;
428 Log() <<
"classified instances are adjusted such that subsequent classifiers focus" <<
Endl;
429 Log() <<
"more on difficult cases." <<
Endl;
431 Log() <<
"Check out the scikit-learn documentation for more information." <<
Endl;
#define REGISTER_METHOD(CLASS)
for example
int Int_t
Signed integer 4 bytes (int).
unsigned int UInt_t
Unsigned integer 4 bytes (unsigned int).
bool Bool_t
Boolean (0=false, 1=true) (bool).
double Double_t
Double 8 bytes.
long long Long64_t
Portable signed long integer 8 bytes.
char * Form(const char *fmt,...)
Formats a string in a circular formatting buffer.
OptionBase * DeclareOptionRef(T &ref, const TString &name, const TString &desc="")
Class that contains all the data information.
UInt_t GetNClasses() const
const Event * GetEvent() const
returns event without transformations
Long64_t GetNEvents(Types::ETreeType type=Types::kMaxTreeType) const
Long64_t GetNTrainingEvents() const
void SetCurrentEvent(Long64_t ievt) const
const Event * GetTrainingEvent(Long64_t ievt) const
PyGILState_STATE m_GILState
const char * GetName() const override
virtual void DeclareCompatibilityOptions()
options that are used ONLY for the READER to ensure backward compatibility they are hence without any...
Bool_t IsModelPersistence() const
const TString & GetWeightFileDir() const
DataSetInfo & DataInfo() const
virtual void TestClassification()
initialization
UInt_t GetNVariables() const
void NoErrorCalc(Double_t *const err, Double_t *const errUpper)
const TString & GetInputLabel(Int_t i) const
void ProcessOptions() override
void DeclareOptions() override
std::vector< Double_t > GetMvaValues(Long64_t firstEvt=0, Long64_t lastEvt=-1, Bool_t logProgress=false) override
get all the MVA values for the events of the current Data type
PyObject * pBaseEstimator
std::vector< Double_t > mvaValues
TString fFilenameClassifier
std::vector< Float_t > classValues
const Ranking * CreateRanking() override
void GetHelpMessage() const override
Bool_t HasAnalysisType(Types::EAnalysisType type, UInt_t numberClasses, UInt_t numberTargets) override
std::vector< Float_t > & GetMulticlassValues() override
MethodPyAdaBoost(const TString &jobName, const TString &methodTitle, DataSetInfo &theData, const TString &theOption="")
Double_t GetMvaValue(Double_t *errLower=nullptr, Double_t *errUpper=nullptr) override
void TestClassification() override
initialization
void ReadModelFromFile() override
static int PyIsInitialized()
Check Python interpreter initialization status.
PyObject * Eval(TString code)
Evaluate Python code.
static void PyInitialize()
Initialize Python interpreter.
static void Serialize(TString file, PyObject *classifier)
Serialize Python object.
static Int_t UnSerialize(TString file, PyObject **obj)
Unserialize Python object.
PyMethodBase(const TString &jobName, Types::EMVA methodType, const TString &methodTitle, DataSetInfo &dsi, const TString &theOption="")
void PyRunString(TString code, TString errorMessage="Failed to run python code", int start=256)
Execute Python code from string.
Ranking for variables in method (implementation).
Singleton class for Global types used by TMVA.
@ kSignal
Never change this number - it is elsewhere assumed to be zero !
create variable transformations
MsgLogger & Endl(MsgLogger &ml)