Logo ROOT   6.10/09
Reference Guide
MethodPyGTB.cxx
Go to the documentation of this file.
1 // @(#)root/tmva/pymva $Id$
2 // Authors: Omar Zapata, Lorenzo Moneta, Sergei Gleyzer 2015
3 
4 /**********************************************************************************
5  * Project: TMVA - a Root-integrated toolkit for multivariate data analysis *
6  * Package: TMVA *
7  * Class : MethodPyGTB *
8  * Web : http://oproject.org *
9  * *
10  * Description: *
11  * GradientBoostingClassifier Classifiear from Scikit learn *
12  * *
13  * *
14  * Redistribution and use in source and binary forms, with or without *
15  * modification, are permitted according to the terms listed in LICENSE *
16  * (http://tmva.sourceforge.net/LICENSE) *
17  * *
18  **********************************************************************************/
19 
20 #include <Python.h> // Needs to be included first to avoid redefinition of _POSIX_C_SOURCE
21 #include "TMVA/MethodPyGTB.h"
22 
23 #define NPY_NO_DEPRECATED_API NPY_1_7_API_VERSION
24 #include <numpy/arrayobject.h>
25 
26 #include "TMVA/Configurable.h"
27 #include "TMVA/ClassifierFactory.h"
28 #include "TMVA/Config.h"
29 #include "TMVA/DataSet.h"
30 #include "TMVA/Event.h"
31 #include "TMVA/IMethod.h"
32 #include "TMVA/MsgLogger.h"
33 #include "TMVA/PDF.h"
34 #include "TMVA/Ranking.h"
35 #include "TMVA/Results.h"
37 #include "TMVA/Tools.h"
38 #include "TMVA/Types.h"
39 #include "TMVA/Timer.h"
41 
42 #include "Riostream.h"
43 #include "TMath.h"
44 #include "TMatrix.h"
45 #include "TMatrixD.h"
46 #include "TVectorD.h"
47 
48 #include <iomanip>
49 #include <fstream>
50 
51 using namespace TMVA;
52 
53 REGISTER_METHOD(PyGTB)
54 
56 
57 //_______________________________________________________________________
58 MethodPyGTB::MethodPyGTB(const TString &jobName,
59  const TString &methodTitle,
60  DataSetInfo &dsi,
61  const TString &theOption) :
62  PyMethodBase(jobName, Types::kPyGTB, methodTitle, dsi, theOption),
63  fLoss("deviance"),
64  fLearningRate(0.1),
65  fNestimators(100),
66  fSubsample(1.0),
67  fMinSamplesSplit(2),
68  fMinSamplesLeaf(1),
69  fMinWeightFractionLeaf(0.0),
70  fMaxDepth(3),
71  fInit("None"),
72  fRandomState("None"),
73  fMaxFeatures("None"),
74  fVerbose(0),
75  fMaxLeafNodes("None"),
76  fWarmStart(kFALSE)
77 {
78 }
79 
80 //_______________________________________________________________________
81 MethodPyGTB::MethodPyGTB(DataSetInfo &theData, const TString &theWeightFile)
82  : PyMethodBase(Types::kPyGTB, theData, theWeightFile),
83  fLoss("deviance"),
84  fLearningRate(0.1),
85  fNestimators(100),
86  fSubsample(1.0),
87  fMinSamplesSplit(2),
88  fMinSamplesLeaf(1),
89  fMinWeightFractionLeaf(0.0),
90  fMaxDepth(3),
91  fInit("None"),
92  fRandomState("None"),
93  fMaxFeatures("None"),
94  fVerbose(0),
95  fMaxLeafNodes("None"),
96  fWarmStart(kFALSE)
97 {
98 }
99 
100 
101 //_______________________________________________________________________
103 {
104 }
105 
106 //_______________________________________________________________________
108 {
109  if (type == Types::kClassification && numberClasses == 2) return kTRUE;
110  if (type == Types::kMulticlass && numberClasses >= 2) return kTRUE;
111  return kFALSE;
112 }
113 
114 
115 //_______________________________________________________________________
117 {
119 
120  DeclareOptionRef(fLoss, "Loss", "{'deviance', 'exponential'}, optional (default='deviance')\
121  loss function to be optimized. 'deviance' refers to\
122  deviance (= logistic regression) for classification\
123  with probabilistic outputs. For loss 'exponential' gradient\
124  boosting recovers the AdaBoost algorithm.");
125 
126  DeclareOptionRef(fLearningRate, "LearningRate", "float, optional (default=0.1)\
127  learning rate shrinks the contribution of each tree by `learning_rate`.\
128  There is a trade-off between learning_rate and n_estimators.");
129 
130  DeclareOptionRef(fNestimators, "NEstimators", "int (default=100)\
131  The number of boosting stages to perform. Gradient boosting\
132  is fairly robust to over-fitting so a large number usually\
133  results in better performance.");
134 
135  DeclareOptionRef(fSubsample, "Subsample", "float, optional (default=1.0)\
136  The fraction of samples to be used for fitting the individual base\
137  learners. If smaller than 1.0 this results in Stochastic Gradient\
138  Boosting. `subsample` interacts with the parameter `n_estimators`.\
139  Choosing `subsample < 1.0` leads to a reduction of variance\
140  and an increase in bias.");
141 
142  DeclareOptionRef(fMinSamplesSplit, "MinSamplesSplit", "integer, optional (default=2)\
143  The minimum number of samples required to split an internal node.");
144 
145  DeclareOptionRef(fMinSamplesLeaf, "MinSamplesLeaf", "integer, optional (default=1) \
146  The minimum number of samples in newly created leaves. A split is \
147  discarded if after the split, one of the leaves would contain less then \
148  ``min_samples_leaf`` samples.");
149 
150  DeclareOptionRef(fMinWeightFractionLeaf, "MinWeightFractionLeaf", "//float, optional (default=0.) \
151  The minimum weighted fraction of the input samples required to be at a \
152  leaf node.");
153 
154  DeclareOptionRef(fMaxDepth, "MaxDepth", "integer or None, optional (default=None) \
155  The maximum depth of the tree. If None, then nodes are expanded until \
156  all leaves are pure or until all leaves contain less than \
157  min_samples_split samples. \
158  Ignored if ``max_leaf_nodes`` is not None.");
159 
160  DeclareOptionRef(fInit, "Init", "BaseEstimator, None, optional (default=None)\
161  An estimator object that is used to compute the initial\
162  predictions. ``init`` has to provide ``fit`` and ``predict``.\
163  If None it uses ``loss.init_estimator`");
164 
165  DeclareOptionRef(fRandomState, "RandomState", "int, RandomState instance or None, optional (default=None)\
166  If int, random_state is the seed used by the random number generator;\
167  If RandomState instance, random_state is the random number generator;\
168  If None, the random number generator is the RandomState instance used\
169  by `np.random`.");
170 
171  DeclareOptionRef(fMaxFeatures, "MaxFeatures", "The number of features to consider when looking for the best split");
172 
173  DeclareOptionRef(fVerbose, "Verbose", "int, optional (default=0)\
174  Controls the verbosity of the tree building process.");
175 
176  DeclareOptionRef(fMaxLeafNodes, "MaxLeafNodes", "int or None, optional (default=None)\
177  Grow trees with ``max_leaf_nodes`` in best-first fashion.\
178  Best nodes are defined as relative reduction in impurity.\
179  If None then unlimited number of leaf nodes.\
180  If not None then ``max_depth`` will be ignored.");
181 
182  DeclareOptionRef(fWarmStart, "WarmStart", "bool, optional (default=False)\
183  When set to ``True``, reuse the solution of the previous call to fit\
184  and add more estimators to the ensemble, otherwise, just fit a whole\
185  new forest.");
186 
187  DeclareOptionRef(fFilenameClassifier, "FilenameClassifier",
188  "Store trained classifier in this file");
189 }
190 
191 //_______________________________________________________________________
192 // Check options and load them to local python namespace
194 {
195  if (fLoss != "deviance" && fLoss != "exponential") {
196  Log() << kFATAL << Form("Loss = %s ... that does not work!", fLoss.Data())
197  << " The options are 'deviance' or 'exponential'." << Endl;
198  }
199  pLoss = Eval(Form("'%s'", fLoss.Data()));
200  PyDict_SetItemString(fLocalNS, "loss", pLoss);
201 
202  if (fLearningRate <= 0) {
203  Log() << kFATAL << "LearningRate <= 0 ... that does not work!" << Endl;
204  }
206  PyDict_SetItemString(fLocalNS, "learningRate", pLearningRate);
207 
208  if (fNestimators <= 0) {
209  Log() << kFATAL << "NEstimators <= 0 ... that does not work!" << Endl;
210  }
212  PyDict_SetItemString(fLocalNS, "nEstimators", pNestimators);
213 
214  if (fMinSamplesSplit < 0) {
215  Log() << kFATAL << "MinSamplesSplit < 0 ... that does not work!" << Endl;
216  }
218  PyDict_SetItemString(fLocalNS, "minSamplesSplit", pMinSamplesSplit);
219 
220  if (fSubsample < 0) {
221  Log() << kFATAL << "Subsample < 0 ... that does not work!" << Endl;
222  }
223  pSubsample = Eval(Form("%f", fSubsample));
224  PyDict_SetItemString(fLocalNS, "subsample", pSubsample);
225 
226  if (fMinSamplesLeaf < 0) {
227  Log() << kFATAL << "MinSamplesLeaf < 0 ... that does not work!" << Endl;
228  }
230  PyDict_SetItemString(fLocalNS, "minSamplesLeaf", pMinSamplesLeaf);
231 
232  if (fMinSamplesSplit < 0) {
233  Log() << kFATAL << "MinSamplesSplit < 0 ... that does not work!" << Endl;
234  }
236  PyDict_SetItemString(fLocalNS, "minSamplesSplit", pMinSamplesSplit);
237 
238  if (fMinWeightFractionLeaf < 0) {
239  Log() << kFATAL << "MinWeightFractionLeaf < 0 ... that does not work !" << Endl;
240  }
242  PyDict_SetItemString(fLocalNS, "minWeightFractionLeaf", pMinWeightFractionLeaf);
243 
244  if (fMaxDepth <= 0) {
245  Log() << kFATAL << " MaxDepth <= 0 ... that does not work !! " << Endl;
246  }
247  pMaxDepth = Eval(Form("%i", fMaxDepth));
248  PyDict_SetItemString(fLocalNS, "maxDepth", pMaxDepth);
249 
250  pInit = Eval(fInit);
251  if (!pInit) {
252  Log() << kFATAL << Form("Init = %s ... that does not work!", fInit.Data())
253  << " The options are None or BaseEstimator, which is an estimator object that"
254  << "is used to compute the initial predictions. "
255  << "'init' has to provide 'fit' and 'predict' methods."
256  << " If None it uses 'loss.init_estimator'." << Endl;
257  }
258  PyDict_SetItemString(fLocalNS, "init", pInit);
259 
261  if (!pRandomState) {
262  Log() << kFATAL << Form(" RandomState = %s ... that does not work! ", fRandomState.Data())
263  << " If int, random_state is the seed used by the random number generator;"
264  << " If RandomState instance, random_state is the random number generator;"
265  << " If None, the random number generator is the RandomState instance used by 'np.random'."
266  << Endl;
267  }
268  PyDict_SetItemString(fLocalNS, "randomState", pRandomState);
269 
270  if (fMaxFeatures == "auto" || fMaxFeatures == "sqrt" || fMaxFeatures == "log2"){
271  fMaxFeatures = Form("'%s'", fMaxFeatures.Data());
272  }
274  PyDict_SetItemString(fLocalNS, "maxFeatures", pMaxFeatures);
275 
276  if (!pMaxFeatures) {
277  Log() << kFATAL << Form(" MaxFeatures = %s... that does not work !! ", fMaxFeatures.Data())
278  << "int, float, string or None, optional (default='auto')"
279  << "The number of features to consider when looking for the best split:"
280  << "If int, then consider `max_features` features at each split."
281  << "If float, then `max_features` is a percentage and"
282  << "`int(max_features * n_features)` features are considered at each split."
283  << "If 'auto', then `max_features=sqrt(n_features)`."
284  << "If 'sqrt', then `max_features=sqrt(n_features)`."
285  << "If 'log2', then `max_features=log2(n_features)`."
286  << "If None, then `max_features=n_features`." << Endl;
287  }
288 
290  if (!pMaxLeafNodes) {
291  Log() << kFATAL << Form(" MaxLeafNodes = %s... that does not work!", fMaxLeafNodes.Data())
292  << " The options are None or integer." << Endl;
293  }
294  PyDict_SetItemString(fLocalNS, "maxLeafNodes", pMaxLeafNodes);
295 
296  pVerbose = Eval(Form("%i", fVerbose));
297  PyDict_SetItemString(fLocalNS, "verbose", pVerbose);
298 
299  pWarmStart = Eval(Form("%i", UInt_t(fWarmStart)));
300  PyDict_SetItemString(fLocalNS, "warmStart", pWarmStart);
301 
302  // If no filename is given, set default
303  if(fFilenameClassifier.IsNull()) {
304  fFilenameClassifier = GetWeightFileDir() + "/PyGTBModel_" + GetName() + ".PyData";
305  }
306 }
307 
308 //_______________________________________________________________________
310 {
311  _import_array(); //require to use numpy arrays
312 
313  // Check options and load them to local python namespace
314  ProcessOptions();
315 
316  // Import module for gradient tree boosting classifier
317  PyRunString("import sklearn.ensemble");
318 
319  // Get data properties
320  fNvars = GetNVariables();
322 }
323 
325 {
326  // Load training data (data, classes, weights) to python arrays
327  int fNrowsTraining = Data()->GetNTrainingEvents(); //every row is an event, a class type and a weight
328  npy_intp dimsData[2];
329  dimsData[0] = fNrowsTraining;
330  dimsData[1] = fNvars;
331  fTrainData = (PyArrayObject *)PyArray_SimpleNew(2, dimsData, NPY_FLOAT);
332  PyDict_SetItemString(fLocalNS, "trainData", (PyObject*)fTrainData);
333  float *TrainData = (float *)(PyArray_DATA(fTrainData));
334 
335  npy_intp dimsClasses = (npy_intp) fNrowsTraining;
336  fTrainDataClasses = (PyArrayObject *)PyArray_SimpleNew(1, &dimsClasses, NPY_FLOAT);
337  PyDict_SetItemString(fLocalNS, "trainDataClasses", (PyObject*)fTrainDataClasses);
338  float *TrainDataClasses = (float *)(PyArray_DATA(fTrainDataClasses));
339 
340  fTrainDataWeights = (PyArrayObject *)PyArray_SimpleNew(1, &dimsClasses, NPY_FLOAT);
341  PyDict_SetItemString(fLocalNS, "trainDataWeights", (PyObject*)fTrainDataWeights);
342  float *TrainDataWeights = (float *)(PyArray_DATA(fTrainDataWeights));
343 
344  for (int i = 0; i < fNrowsTraining; i++) {
345  // Fill training data matrix
346  const TMVA::Event *e = Data()->GetTrainingEvent(i);
347  for (UInt_t j = 0; j < fNvars; j++) {
348  TrainData[j + i * fNvars] = e->GetValue(j);
349  }
350 
351  // Fill target classes
352  TrainDataClasses[i] = e->GetClass();
353 
354  // Get event weight
355  TrainDataWeights[i] = e->GetWeight();
356  }
357 
358  // Create classifier object
359  PyRunString("classifier = sklearn.ensemble.GradientBoostingClassifier(loss=loss, learning_rate=learningRate, n_estimators=nEstimators, max_depth=maxDepth, min_samples_split=minSamplesSplit, min_samples_leaf=minSamplesLeaf, min_weight_fraction_leaf=minWeightFractionLeaf, subsample=subsample, max_features=maxFeatures, max_leaf_nodes=maxLeafNodes, init=init, verbose=verbose, warm_start=warmStart, random_state=randomState)",
360  "Failed to setup classifier");
361 
362  // Fit classifier
363  // NOTE: We dump the output to a variable so that the call does not pollute stdout
364  PyRunString("dump = classifier.fit(trainData, trainDataClasses, trainDataWeights)", "Failed to train classifier");
365 
366  // Store classifier
367  fClassifier = PyDict_GetItemString(fLocalNS, "classifier");
368  if(fClassifier == 0) {
369  Log() << kFATAL << "Can't create classifier object from GradientBoostingClassifier" << Endl;
370  Log() << Endl;
371  }
372 
373  if (IsModelPersistence()) {
374  Log() << Endl;
375  Log() << gTools().Color("bold") << "Saving state file: " << gTools().Color("reset") << fFilenameClassifier << Endl;
376  Log() << Endl;
378  }
379 }
380 
381 //_______________________________________________________________________
383 {
385 }
386 
387 //_______________________________________________________________________
388 std::vector<Double_t> MethodPyGTB::GetMvaValues(Long64_t firstEvt, Long64_t lastEvt, Bool_t)
389 {
390  // Load model if not already done
391  if (fClassifier == 0) ReadModelFromFile();
392 
393  // Determine number of events
395  if (firstEvt > lastEvt || lastEvt > nEvents) lastEvt = nEvents;
396  if (firstEvt < 0) firstEvt = 0;
397  nEvents = lastEvt-firstEvt;
398 
399  // Get data
400  npy_intp dims[2];
401  dims[0] = nEvents;
402  dims[1] = fNvars;
403  PyArrayObject *pEvent= (PyArrayObject *)PyArray_SimpleNew(2, dims, NPY_FLOAT);
404  float *pValue = (float *)(PyArray_DATA(pEvent));
405 
406  for (Int_t ievt=0; ievt<nEvents; ievt++) {
407  Data()->SetCurrentEvent(ievt);
408  const TMVA::Event *e = Data()->GetEvent();
409  for (UInt_t i = 0; i < fNvars; i++) {
410  pValue[ievt * fNvars + i] = e->GetValue(i);
411  }
412  }
413 
414  // Get prediction from classifier
415  PyArrayObject *result = (PyArrayObject *)PyObject_CallMethod(fClassifier, const_cast<char *>("predict_proba"), const_cast<char *>("(O)"), pEvent);
416  double *proba = (double *)(PyArray_DATA(result));
417 
418  // Return signal probabilities
419  if(Long64_t(mvaValues.size()) != nEvents) mvaValues.resize(nEvents);
420  for (int i = 0; i < nEvents; ++i) {
421  mvaValues[i] = proba[fNoutputs*i + TMVA::Types::kSignal];
422  }
423 
424  Py_DECREF(pEvent);
425  Py_DECREF(result);
426 
427  return mvaValues;
428 }
429 
430 //_______________________________________________________________________
432 {
433  // cannot determine error
434  NoErrorCalc(errLower, errUpper);
435 
436  // Load model if not already done
437  if (fClassifier == 0) ReadModelFromFile();
438 
439  // Get current event and load to python array
440  const TMVA::Event *e = Data()->GetEvent();
441  npy_intp dims[2];
442  dims[0] = 1;
443  dims[1] = fNvars;
444  PyArrayObject *pEvent= (PyArrayObject *)PyArray_SimpleNew(2, dims, NPY_FLOAT);
445  float *pValue = (float *)(PyArray_DATA(pEvent));
446  for (UInt_t i = 0; i < fNvars; i++) pValue[i] = e->GetValue(i);
447 
448  // Get prediction from classifier
449  PyArrayObject *result = (PyArrayObject *)PyObject_CallMethod(fClassifier, const_cast<char *>("predict_proba"), const_cast<char *>("(O)"), pEvent);
450  double *proba = (double *)(PyArray_DATA(result));
451 
452  // Return MVA value
453  Double_t mvaValue;
454  mvaValue = proba[TMVA::Types::kSignal]; // getting signal probability
455 
456  Py_DECREF(result);
457  Py_DECREF(pEvent);
458 
459  return mvaValue;
460 }
461 
462 //_______________________________________________________________________
463 std::vector<Float_t>& MethodPyGTB::GetMulticlassValues()
464 {
465  // Load model if not already done
466  if (fClassifier == 0) ReadModelFromFile();
467 
468  // Get current event and load to python array
469  const TMVA::Event *e = Data()->GetEvent();
470  npy_intp dims[2];
471  dims[0] = 1;
472  dims[1] = fNvars;
473  PyArrayObject *pEvent= (PyArrayObject *)PyArray_SimpleNew(2, dims, NPY_FLOAT);
474  float *pValue = (float *)(PyArray_DATA(pEvent));
475  for (UInt_t i = 0; i < fNvars; i++) pValue[i] = e->GetValue(i);
476 
477  // Get prediction from classifier
478  PyArrayObject *result = (PyArrayObject *)PyObject_CallMethod(fClassifier, const_cast<char *>("predict_proba"), const_cast<char *>("(O)"), pEvent);
479  double *proba = (double *)(PyArray_DATA(result));
480 
481  // Return MVA values
482  if(UInt_t(classValues.size()) != fNoutputs) classValues.resize(fNoutputs);
483  for(UInt_t i = 0; i < fNoutputs; i++) classValues[i] = proba[i];
484 
485  Py_DECREF(pEvent);
486  Py_DECREF(result);
487 
488  return classValues;
489 }
490 
491 //_______________________________________________________________________
493 {
494  if (!PyIsInitialized()) {
495  PyInitialize();
496  }
497 
498  Log() << Endl;
499  Log() << gTools().Color("bold") << "Loading state file: " << gTools().Color("reset") << fFilenameClassifier << Endl;
500  Log() << Endl;
501 
502  // Load classifier from file
504  if(err != 0)
505  {
506  Log() << kFATAL << Form("Failed to load classifier from file (error code: %i): %s", err, fFilenameClassifier.Data()) << Endl;
507  }
508 
509  // Book classifier object in python dict
510  PyDict_SetItemString(fLocalNS, "classifier", fClassifier);
511 
512  // Load data properties
513  // NOTE: This has to be repeated here for the reader application
514  fNvars = GetNVariables();
516 }
517 
518 //_______________________________________________________________________
520 {
521  // Get feature importance from classifier as an array with length equal
522  // number of variables, higher value signals a higher importance
523  PyArrayObject* pRanking = (PyArrayObject*) PyObject_GetAttrString(fClassifier, "feature_importances_");
524  if(pRanking == 0) Log() << kFATAL << "Failed to get ranking from classifier" << Endl;
525 
526  // Fill ranking object and return it
527  fRanking = new Ranking(GetName(), "Variable Importance");
528  Double_t* rankingData = (Double_t*) PyArray_DATA(pRanking);
529  for(UInt_t iVar=0; iVar<fNvars; iVar++){
530  fRanking->AddRank(Rank(GetInputLabel(iVar), rankingData[iVar]));
531  }
532 
533  Py_DECREF(pRanking);
534 
535  return fRanking;
536 }
537 
538 //_______________________________________________________________________
540 {
541  // typical length of text line:
542  // "|--------------------------------------------------------------|"
543  Log() << "A gradient tree boosting classifier builds a model from an ensemble" << Endl;
544  Log() << "of decision trees, which are adapted each boosting step to fit better" << Endl;
545  Log() << "to previously misclassified events." << Endl;
546  Log() << Endl;
547  Log() << "Check out the scikit-learn documentation for more information." << Endl;
548 }
549 
550 
Double_t fSubsample
Definition: MethodPyGTB.h:98
PyObject * pNestimators
Definition: MethodPyGTB.h:93
void SetCurrentEvent(Long64_t ievt) const
Definition: DataSet.h:99
MsgLogger & Endl(MsgLogger &ml)
Definition: MsgLogger.h:158
Singleton class for Global types used by TMVA.
Definition: Types.h:73
long long Long64_t
Definition: RtypesCore.h:69
PyObject * fClassifier
Definition: PyMethodBase.h:120
PyObject * pMaxFeatures
Definition: MethodPyGTB.h:138
Double_t GetMvaValue(Double_t *errLower=0, Double_t *errUpper=0)
MsgLogger & Log() const
Definition: Configurable.h:122
OptionBase * DeclareOptionRef(T &ref, const TString &name, const TString &desc="")
EAnalysisType
Definition: Types.h:125
PyObject * pMinSamplesSplit
Definition: MethodPyGTB.h:105
PyObject * pWarmStart
Definition: MethodPyGTB.h:163
Ranking for variables in method (implementation)
Definition: Ranking.h:48
int Int_t
Definition: RtypesCore.h:41
bool Bool_t
Definition: RtypesCore.h:59
UInt_t GetNClasses() const
Definition: DataSetInfo.h:136
static void Serialize(TString file, PyObject *classifier)
Serialize Python object.
PyObject * pRandomState
Definition: MethodPyGTB.h:131
PyArrayObject * fTrainDataClasses
Definition: PyMethodBase.h:124
static int PyIsInitialized()
Check Python interpreter initialization status.
static void PyInitialize()
Initialize Python interpreter.
const TString & GetInputLabel(Int_t i) const
Definition: MethodBase.h:334
const TString & GetWeightFileDir() const
Definition: MethodBase.h:474
void PyRunString(TString code, TString errorMessage="Failed to run python code", int start=Py_single_input)
Execute Python code from string.
PyObject * pInit
Definition: MethodPyGTB.h:125
void GetHelpMessage() const
PyObject * pVerbose
Definition: MethodPyGTB.h:153
DataSet * Data() const
Definition: MethodBase.h:393
TString fFilenameClassifier
Definition: MethodPyGTB.h:77
UInt_t GetClass() const
Definition: Event.h:81
PyObject * Eval(TString code)
Evaluate Python code.
DataSetInfo & DataInfo() const
Definition: MethodBase.h:394
PyObject * pMaxDepth
Definition: MethodPyGTB.h:117
Class that contains all the data information.
Definition: DataSetInfo.h:60
PyArrayObject * fTrainDataWeights
Definition: PyMethodBase.h:123
Double_t GetWeight() const
return the event weight - depending on whether the flag IgnoreNegWeightsInTraining is or not...
Definition: Event.cxx:382
Long64_t GetNTrainingEvents() const
Definition: DataSet.h:79
PyObject * pSubsample
Definition: MethodPyGTB.h:97
#define None
Definition: TGWin32.h:55
const Event * GetTrainingEvent(Long64_t ievt) const
Definition: DataSet.h:85
std::vector< Double_t > mvaValues
Definition: MethodPyGTB.h:72
const int nEvents
Definition: testRooFit.cxx:42
Bool_t HasAnalysisType(Types::EAnalysisType type, UInt_t numberClasses, UInt_t numberTargets)
const Ranking * CreateRanking()
const char * GetName() const
Definition: MethodBase.h:318
unsigned int UInt_t
Definition: RtypesCore.h:42
char * Form(const char *fmt,...)
PyArrayObject * fTrainData
Definition: PyMethodBase.h:122
PyObject * pLoss
Definition: MethodPyGTB.h:81
Tools & gTools()
Double_t fLearningRate
Definition: MethodPyGTB.h:89
UInt_t GetNVariables() const
Definition: MethodBase.h:329
const Bool_t kFALSE
Definition: RtypesCore.h:92
Float_t GetValue(UInt_t ivar) const
return value of i&#39;th variable
Definition: Event.cxx:237
#define ClassImp(name)
Definition: Rtypes.h:336
static Int_t UnSerialize(TString file, PyObject **obj)
Unserialize Python object.
double Double_t
Definition: RtypesCore.h:55
int type
Definition: TGX11.cxx:120
virtual void ReadModelFromFile()
you should not use this method at all Int_t Int_t Double_t Double_t Double_t e
Definition: TRolke.cxx:630
PyObject * pMinWeightFractionLeaf
Definition: MethodPyGTB.h:113
std::vector< Float_t > & GetMulticlassValues()
const TString & Color(const TString &)
human readable color strings
Definition: Tools.cxx:839
#define REGISTER_METHOD(CLASS)
for example
Abstract ClassifierFactory template that handles arbitrary types.
Ranking * fRanking
Definition: MethodBase.h:569
Double_t fMinWeightFractionLeaf
Definition: MethodPyGTB.h:114
virtual void AddRank(const Rank &rank)
Add a new rank take ownership of it.
Definition: Ranking.cxx:86
virtual void DeclareCompatibilityOptions()
options that are used ONLY for the READER to ensure backward compatibility they are hence without any...
Definition: MethodBase.cxx:601
PyObject * fLocalNS
Definition: PyMethodBase.h:143
TString fMaxFeatures
Definition: MethodPyGTB.h:139
Long64_t GetNEvents(Types::ETreeType type=Types::kMaxTreeType) const
Definition: DataSet.h:215
PyObject * pLearningRate
Definition: MethodPyGTB.h:88
double result[121]
PyObject * pMinSamplesLeaf
Definition: MethodPyGTB.h:109
std::vector< Double_t > GetMvaValues(Long64_t firstEvt=0, Long64_t lastEvt=-1, Bool_t logProgress=false)
get all the MVA values for the events of the current Data type
TString fRandomState
Definition: MethodPyGTB.h:132
const Bool_t kTRUE
Definition: RtypesCore.h:91
MethodPyGTB(const TString &jobName, const TString &methodTitle, DataSetInfo &theData, const TString &theOption="")
Definition: MethodPyGTB.cxx:58
virtual void TestClassification()
initialization
PyObject * pMaxLeafNodes
Definition: MethodPyGTB.h:156
virtual void TestClassification()
initialization
const Event * GetEvent() const
Definition: DataSet.cxx:202
_object PyObject
Definition: TPyArg.h:20
void NoErrorCalc(Double_t *const err, Double_t *const errUpper)
Definition: MethodBase.cxx:829
TString fMaxLeafNodes
Definition: MethodPyGTB.h:157
std::vector< Float_t > classValues
Definition: MethodPyGTB.h:73
Bool_t IsModelPersistence()
Definition: MethodBase.h:367