Logo ROOT   6.16/01
Reference Guide
MethodPyGTB.cxx
Go to the documentation of this file.
1// @(#)root/tmva/pymva $Id$
2// Authors: Omar Zapata, Lorenzo Moneta, Sergei Gleyzer 2015
3
4/**********************************************************************************
5 * Project: TMVA - a Root-integrated toolkit for multivariate data analysis *
6 * Package: TMVA *
7 * Class : MethodPyGTB *
8 * Web : http://oproject.org *
9 * *
10 * Description: *
11 * GradientBoostingClassifier Classifiear from Scikit learn *
12 * *
13 * *
14 * Redistribution and use in source and binary forms, with or without *
15 * modification, are permitted according to the terms listed in LICENSE *
16 * (http://tmva.sourceforge.net/LICENSE) *
17 * *
18 **********************************************************************************/
19
20#include <Python.h> // Needs to be included first to avoid redefinition of _POSIX_C_SOURCE
21#include "TMVA/MethodPyGTB.h"
22
23#define NPY_NO_DEPRECATED_API NPY_1_7_API_VERSION
24#include <numpy/arrayobject.h>
25
26#include "TMVA/Configurable.h"
28#include "TMVA/Config.h"
29#include "TMVA/DataSet.h"
30#include "TMVA/Event.h"
31#include "TMVA/IMethod.h"
32#include "TMVA/MsgLogger.h"
33#include "TMVA/PDF.h"
34#include "TMVA/Ranking.h"
35#include "TMVA/Results.h"
37#include "TMVA/Tools.h"
38#include "TMVA/Types.h"
39#include "TMVA/Timer.h"
41
42#include "Riostream.h"
43#include "TMath.h"
44#include "TMatrix.h"
45#include "TMatrixD.h"
46#include "TVectorD.h"
47
48#include <iomanip>
49#include <fstream>
50
51using namespace TMVA;
52
53REGISTER_METHOD(PyGTB)
54
56
57//_______________________________________________________________________
58MethodPyGTB::MethodPyGTB(const TString &jobName,
59 const TString &methodTitle,
60 DataSetInfo &dsi,
61 const TString &theOption) :
62 PyMethodBase(jobName, Types::kPyGTB, methodTitle, dsi, theOption),
63 fLoss("deviance"),
64 fLearningRate(0.1),
65 fNestimators(100),
66 fSubsample(1.0),
67 fMinSamplesSplit(2),
68 fMinSamplesLeaf(1),
69 fMinWeightFractionLeaf(0.0),
70 fMaxDepth(3),
71 fInit("None"),
72 fRandomState("None"),
73 fMaxFeatures("None"),
74 fVerbose(0),
75 fMaxLeafNodes("None"),
76 fWarmStart(kFALSE)
77{
78}
79
80//_______________________________________________________________________
81MethodPyGTB::MethodPyGTB(DataSetInfo &theData, const TString &theWeightFile)
82 : PyMethodBase(Types::kPyGTB, theData, theWeightFile),
83 fLoss("deviance"),
84 fLearningRate(0.1),
85 fNestimators(100),
86 fSubsample(1.0),
87 fMinSamplesSplit(2),
88 fMinSamplesLeaf(1),
89 fMinWeightFractionLeaf(0.0),
90 fMaxDepth(3),
91 fInit("None"),
92 fRandomState("None"),
93 fMaxFeatures("None"),
94 fVerbose(0),
95 fMaxLeafNodes("None"),
96 fWarmStart(kFALSE)
97{
98}
99
100
101//_______________________________________________________________________
103{
104}
105
106//_______________________________________________________________________
108{
109 if (type == Types::kClassification && numberClasses == 2) return kTRUE;
110 if (type == Types::kMulticlass && numberClasses >= 2) return kTRUE;
111 return kFALSE;
112}
113
114
115//_______________________________________________________________________
117{
119
120 DeclareOptionRef(fLoss, "Loss", "{'deviance', 'exponential'}, optional (default='deviance')\
121 loss function to be optimized. 'deviance' refers to\
122 deviance (= logistic regression) for classification\
123 with probabilistic outputs. For loss 'exponential' gradient\
124 boosting recovers the AdaBoost algorithm.");
125
126 DeclareOptionRef(fLearningRate, "LearningRate", "float, optional (default=0.1)\
127 learning rate shrinks the contribution of each tree by `learning_rate`.\
128 There is a trade-off between learning_rate and n_estimators.");
129
130 DeclareOptionRef(fNestimators, "NEstimators", "int (default=100)\
131 The number of boosting stages to perform. Gradient boosting\
132 is fairly robust to over-fitting so a large number usually\
133 results in better performance.");
134
135 DeclareOptionRef(fSubsample, "Subsample", "float, optional (default=1.0)\
136 The fraction of samples to be used for fitting the individual base\
137 learners. If smaller than 1.0 this results in Stochastic Gradient\
138 Boosting. `subsample` interacts with the parameter `n_estimators`.\
139 Choosing `subsample < 1.0` leads to a reduction of variance\
140 and an increase in bias.");
141
142 DeclareOptionRef(fMinSamplesSplit, "MinSamplesSplit", "integer, optional (default=2)\
143 The minimum number of samples required to split an internal node.");
144
145 DeclareOptionRef(fMinSamplesLeaf, "MinSamplesLeaf", "integer, optional (default=1) \
146 The minimum number of samples in newly created leaves. A split is \
147 discarded if after the split, one of the leaves would contain less then \
148 ``min_samples_leaf`` samples.");
149
150 DeclareOptionRef(fMinWeightFractionLeaf, "MinWeightFractionLeaf", "//float, optional (default=0.) \
151 The minimum weighted fraction of the input samples required to be at a \
152 leaf node.");
153
154 DeclareOptionRef(fMaxDepth, "MaxDepth", "integer or None, optional (default=None) \
155 The maximum depth of the tree. If None, then nodes are expanded until \
156 all leaves are pure or until all leaves contain less than \
157 min_samples_split samples. \
158 Ignored if ``max_leaf_nodes`` is not None.");
159
160 DeclareOptionRef(fInit, "Init", "BaseEstimator, None, optional (default=None)\
161 An estimator object that is used to compute the initial\
162 predictions. ``init`` has to provide ``fit`` and ``predict``.\
163 If None it uses ``loss.init_estimator`");
164
165 DeclareOptionRef(fRandomState, "RandomState", "int, RandomState instance or None, optional (default=None)\
166 If int, random_state is the seed used by the random number generator;\
167 If RandomState instance, random_state is the random number generator;\
168 If None, the random number generator is the RandomState instance used\
169 by `np.random`.");
170
171 DeclareOptionRef(fMaxFeatures, "MaxFeatures", "The number of features to consider when looking for the best split");
172
173 DeclareOptionRef(fVerbose, "Verbose", "int, optional (default=0)\
174 Controls the verbosity of the tree building process.");
175
176 DeclareOptionRef(fMaxLeafNodes, "MaxLeafNodes", "int or None, optional (default=None)\
177 Grow trees with ``max_leaf_nodes`` in best-first fashion.\
178 Best nodes are defined as relative reduction in impurity.\
179 If None then unlimited number of leaf nodes.\
180 If not None then ``max_depth`` will be ignored.");
181
182 DeclareOptionRef(fWarmStart, "WarmStart", "bool, optional (default=False)\
183 When set to ``True``, reuse the solution of the previous call to fit\
184 and add more estimators to the ensemble, otherwise, just fit a whole\
185 new forest.");
186
187 DeclareOptionRef(fFilenameClassifier, "FilenameClassifier",
188 "Store trained classifier in this file");
189}
190
191//_______________________________________________________________________
192// Check options and load them to local python namespace
194{
195 if (fLoss != "deviance" && fLoss != "exponential") {
196 Log() << kFATAL << Form("Loss = %s ... that does not work!", fLoss.Data())
197 << " The options are 'deviance' or 'exponential'." << Endl;
198 }
199 pLoss = Eval(Form("'%s'", fLoss.Data()));
200 PyDict_SetItemString(fLocalNS, "loss", pLoss);
201
202 if (fLearningRate <= 0) {
203 Log() << kFATAL << "LearningRate <= 0 ... that does not work!" << Endl;
204 }
206 PyDict_SetItemString(fLocalNS, "learningRate", pLearningRate);
207
208 if (fNestimators <= 0) {
209 Log() << kFATAL << "NEstimators <= 0 ... that does not work!" << Endl;
210 }
212 PyDict_SetItemString(fLocalNS, "nEstimators", pNestimators);
213
214 if (fMinSamplesSplit < 0) {
215 Log() << kFATAL << "MinSamplesSplit < 0 ... that does not work!" << Endl;
216 }
218 PyDict_SetItemString(fLocalNS, "minSamplesSplit", pMinSamplesSplit);
219
220 if (fSubsample < 0) {
221 Log() << kFATAL << "Subsample < 0 ... that does not work!" << Endl;
222 }
223 pSubsample = Eval(Form("%f", fSubsample));
224 PyDict_SetItemString(fLocalNS, "subsample", pSubsample);
225
226 if (fMinSamplesLeaf < 0) {
227 Log() << kFATAL << "MinSamplesLeaf < 0 ... that does not work!" << Endl;
228 }
230 PyDict_SetItemString(fLocalNS, "minSamplesLeaf", pMinSamplesLeaf);
231
232 if (fMinSamplesSplit < 0) {
233 Log() << kFATAL << "MinSamplesSplit < 0 ... that does not work!" << Endl;
234 }
236 PyDict_SetItemString(fLocalNS, "minSamplesSplit", pMinSamplesSplit);
237
238 if (fMinWeightFractionLeaf < 0) {
239 Log() << kFATAL << "MinWeightFractionLeaf < 0 ... that does not work !" << Endl;
240 }
242 PyDict_SetItemString(fLocalNS, "minWeightFractionLeaf", pMinWeightFractionLeaf);
243
244 if (fMaxDepth <= 0) {
245 Log() << kFATAL << " MaxDepth <= 0 ... that does not work !! " << Endl;
246 }
247 pMaxDepth = Eval(Form("%i", fMaxDepth));
248 PyDict_SetItemString(fLocalNS, "maxDepth", pMaxDepth);
249
250 pInit = Eval(fInit);
251 if (!pInit) {
252 Log() << kFATAL << Form("Init = %s ... that does not work!", fInit.Data())
253 << " The options are None or BaseEstimator, which is an estimator object that"
254 << "is used to compute the initial predictions. "
255 << "'init' has to provide 'fit' and 'predict' methods."
256 << " If None it uses 'loss.init_estimator'." << Endl;
257 }
258 PyDict_SetItemString(fLocalNS, "init", pInit);
259
261 if (!pRandomState) {
262 Log() << kFATAL << Form(" RandomState = %s ... that does not work! ", fRandomState.Data())
263 << " If int, random_state is the seed used by the random number generator;"
264 << " If RandomState instance, random_state is the random number generator;"
265 << " If None, the random number generator is the RandomState instance used by 'np.random'."
266 << Endl;
267 }
268 PyDict_SetItemString(fLocalNS, "randomState", pRandomState);
269
270 if (fMaxFeatures == "auto" || fMaxFeatures == "sqrt" || fMaxFeatures == "log2"){
271 fMaxFeatures = Form("'%s'", fMaxFeatures.Data());
272 }
274 PyDict_SetItemString(fLocalNS, "maxFeatures", pMaxFeatures);
275
276 if (!pMaxFeatures) {
277 Log() << kFATAL << Form(" MaxFeatures = %s... that does not work !! ", fMaxFeatures.Data())
278 << "int, float, string or None, optional (default='auto')"
279 << "The number of features to consider when looking for the best split:"
280 << "If int, then consider `max_features` features at each split."
281 << "If float, then `max_features` is a percentage and"
282 << "`int(max_features * n_features)` features are considered at each split."
283 << "If 'auto', then `max_features=sqrt(n_features)`."
284 << "If 'sqrt', then `max_features=sqrt(n_features)`."
285 << "If 'log2', then `max_features=log2(n_features)`."
286 << "If None, then `max_features=n_features`." << Endl;
287 }
288
290 if (!pMaxLeafNodes) {
291 Log() << kFATAL << Form(" MaxLeafNodes = %s... that does not work!", fMaxLeafNodes.Data())
292 << " The options are None or integer." << Endl;
293 }
294 PyDict_SetItemString(fLocalNS, "maxLeafNodes", pMaxLeafNodes);
295
296 pVerbose = Eval(Form("%i", fVerbose));
297 PyDict_SetItemString(fLocalNS, "verbose", pVerbose);
298
300 PyDict_SetItemString(fLocalNS, "warmStart", pWarmStart);
301
302 // If no filename is given, set default
303 if(fFilenameClassifier.IsNull()) {
304 fFilenameClassifier = GetWeightFileDir() + "/PyGTBModel_" + GetName() + ".PyData";
305 }
306}
307
308//_______________________________________________________________________
310{
311 _import_array(); //require to use numpy arrays
312
313 // Check options and load them to local python namespace
315
316 // Import module for gradient tree boosting classifier
317 PyRunString("import sklearn.ensemble");
318
319 // Get data properties
322}
323
325{
326 // Load training data (data, classes, weights) to python arrays
327 int fNrowsTraining = Data()->GetNTrainingEvents(); //every row is an event, a class type and a weight
328 npy_intp dimsData[2];
329 dimsData[0] = fNrowsTraining;
330 dimsData[1] = fNvars;
331 fTrainData = (PyArrayObject *)PyArray_SimpleNew(2, dimsData, NPY_FLOAT);
332 PyDict_SetItemString(fLocalNS, "trainData", (PyObject*)fTrainData);
333 float *TrainData = (float *)(PyArray_DATA(fTrainData));
334
335 npy_intp dimsClasses = (npy_intp) fNrowsTraining;
336 fTrainDataClasses = (PyArrayObject *)PyArray_SimpleNew(1, &dimsClasses, NPY_FLOAT);
337 PyDict_SetItemString(fLocalNS, "trainDataClasses", (PyObject*)fTrainDataClasses);
338 float *TrainDataClasses = (float *)(PyArray_DATA(fTrainDataClasses));
339
340 fTrainDataWeights = (PyArrayObject *)PyArray_SimpleNew(1, &dimsClasses, NPY_FLOAT);
341 PyDict_SetItemString(fLocalNS, "trainDataWeights", (PyObject*)fTrainDataWeights);
342 float *TrainDataWeights = (float *)(PyArray_DATA(fTrainDataWeights));
343
344 for (int i = 0; i < fNrowsTraining; i++) {
345 // Fill training data matrix
346 const TMVA::Event *e = Data()->GetTrainingEvent(i);
347 for (UInt_t j = 0; j < fNvars; j++) {
348 TrainData[j + i * fNvars] = e->GetValue(j);
349 }
350
351 // Fill target classes
352 TrainDataClasses[i] = e->GetClass();
353
354 // Get event weight
355 TrainDataWeights[i] = e->GetWeight();
356 }
357
358 // Create classifier object
359 PyRunString("classifier = sklearn.ensemble.GradientBoostingClassifier(loss=loss, learning_rate=learningRate, n_estimators=nEstimators, max_depth=maxDepth, min_samples_split=minSamplesSplit, min_samples_leaf=minSamplesLeaf, min_weight_fraction_leaf=minWeightFractionLeaf, subsample=subsample, max_features=maxFeatures, max_leaf_nodes=maxLeafNodes, init=init, verbose=verbose, warm_start=warmStart, random_state=randomState)",
360 "Failed to setup classifier");
361
362 // Fit classifier
363 // NOTE: We dump the output to a variable so that the call does not pollute stdout
364 PyRunString("dump = classifier.fit(trainData, trainDataClasses, trainDataWeights)", "Failed to train classifier");
365
366 // Store classifier
367 fClassifier = PyDict_GetItemString(fLocalNS, "classifier");
368 if(fClassifier == 0) {
369 Log() << kFATAL << "Can't create classifier object from GradientBoostingClassifier" << Endl;
370 Log() << Endl;
371 }
372
373 if (IsModelPersistence()) {
374 Log() << Endl;
375 Log() << gTools().Color("bold") << "Saving state file: " << gTools().Color("reset") << fFilenameClassifier << Endl;
376 Log() << Endl;
378 }
379}
380
381//_______________________________________________________________________
383{
385}
386
387//_______________________________________________________________________
388std::vector<Double_t> MethodPyGTB::GetMvaValues(Long64_t firstEvt, Long64_t lastEvt, Bool_t logProgress)
389{
390 // Load model if not already done
391 if (fClassifier == 0) ReadModelFromFile();
392
393 // Determine number of events
394 Long64_t nEvents = Data()->GetNEvents();
395 if (firstEvt > lastEvt || lastEvt > nEvents) lastEvt = nEvents;
396 if (firstEvt < 0) firstEvt = 0;
397 nEvents = lastEvt-firstEvt;
398
399 // use timer
400 Timer timer( nEvents, GetName(), kTRUE );
401
402 if (logProgress)
403 Log() << kHEADER << Form("[%s] : ",DataInfo().GetName())
404 << "Evaluation of " << GetMethodName() << " on "
405 << (Data()->GetCurrentType() == Types::kTraining ? "training" : "testing")
406 << " sample (" << nEvents << " events)" << Endl;
407
408 // Get data
409 npy_intp dims[2];
410 dims[0] = nEvents;
411 dims[1] = fNvars;
412 PyArrayObject *pEvent= (PyArrayObject *)PyArray_SimpleNew(2, dims, NPY_FLOAT);
413 float *pValue = (float *)(PyArray_DATA(pEvent));
414
415 for (Int_t ievt=0; ievt<nEvents; ievt++) {
416 Data()->SetCurrentEvent(ievt);
417 const TMVA::Event *e = Data()->GetEvent();
418 for (UInt_t i = 0; i < fNvars; i++) {
419 pValue[ievt * fNvars + i] = e->GetValue(i);
420 }
421 }
422
423 // Get prediction from classifier
424 PyArrayObject *result = (PyArrayObject *)PyObject_CallMethod(fClassifier, const_cast<char *>("predict_proba"), const_cast<char *>("(O)"), pEvent);
425 double *proba = (double *)(PyArray_DATA(result));
426
427 // Return signal probabilities
428 if(Long64_t(mvaValues.size()) != nEvents) mvaValues.resize(nEvents);
429 for (int i = 0; i < nEvents; ++i) {
431 }
432
433 Py_DECREF(pEvent);
434 Py_DECREF(result);
435
436 if (logProgress) {
437 Log() << kINFO
438 << "Elapsed time for evaluation of " << nEvents << " events: "
439 << timer.GetElapsedTime() << " " << Endl;
440 }
441
442
443 return mvaValues;
444}
445
446//_______________________________________________________________________
448{
449 // cannot determine error
450 NoErrorCalc(errLower, errUpper);
451
452 // Load model if not already done
453 if (fClassifier == 0) ReadModelFromFile();
454
455 // Get current event and load to python array
456 const TMVA::Event *e = Data()->GetEvent();
457 npy_intp dims[2];
458 dims[0] = 1;
459 dims[1] = fNvars;
460 PyArrayObject *pEvent= (PyArrayObject *)PyArray_SimpleNew(2, dims, NPY_FLOAT);
461 float *pValue = (float *)(PyArray_DATA(pEvent));
462 for (UInt_t i = 0; i < fNvars; i++) pValue[i] = e->GetValue(i);
463
464 // Get prediction from classifier
465 PyArrayObject *result = (PyArrayObject *)PyObject_CallMethod(fClassifier, const_cast<char *>("predict_proba"), const_cast<char *>("(O)"), pEvent);
466 double *proba = (double *)(PyArray_DATA(result));
467
468 // Return MVA value
469 Double_t mvaValue;
470 mvaValue = proba[TMVA::Types::kSignal]; // getting signal probability
471
472 Py_DECREF(result);
473 Py_DECREF(pEvent);
474
475 return mvaValue;
476}
477
478//_______________________________________________________________________
479std::vector<Float_t>& MethodPyGTB::GetMulticlassValues()
480{
481 // Load model if not already done
482 if (fClassifier == 0) ReadModelFromFile();
483
484 // Get current event and load to python array
485 const TMVA::Event *e = Data()->GetEvent();
486 npy_intp dims[2];
487 dims[0] = 1;
488 dims[1] = fNvars;
489 PyArrayObject *pEvent= (PyArrayObject *)PyArray_SimpleNew(2, dims, NPY_FLOAT);
490 float *pValue = (float *)(PyArray_DATA(pEvent));
491 for (UInt_t i = 0; i < fNvars; i++) pValue[i] = e->GetValue(i);
492
493 // Get prediction from classifier
494 PyArrayObject *result = (PyArrayObject *)PyObject_CallMethod(fClassifier, const_cast<char *>("predict_proba"), const_cast<char *>("(O)"), pEvent);
495 double *proba = (double *)(PyArray_DATA(result));
496
497 // Return MVA values
498 if(UInt_t(classValues.size()) != fNoutputs) classValues.resize(fNoutputs);
499 for(UInt_t i = 0; i < fNoutputs; i++) classValues[i] = proba[i];
500
501 Py_DECREF(pEvent);
502 Py_DECREF(result);
503
504 return classValues;
505}
506
507//_______________________________________________________________________
509{
510 if (!PyIsInitialized()) {
511 PyInitialize();
512 }
513
514 Log() << Endl;
515 Log() << gTools().Color("bold") << "Loading state file: " << gTools().Color("reset") << fFilenameClassifier << Endl;
516 Log() << Endl;
517
518 // Load classifier from file
520 if(err != 0)
521 {
522 Log() << kFATAL << Form("Failed to load classifier from file (error code: %i): %s", err, fFilenameClassifier.Data()) << Endl;
523 }
524
525 // Book classifier object in python dict
526 PyDict_SetItemString(fLocalNS, "classifier", fClassifier);
527
528 // Load data properties
529 // NOTE: This has to be repeated here for the reader application
532}
533
534//_______________________________________________________________________
536{
537 // Get feature importance from classifier as an array with length equal
538 // number of variables, higher value signals a higher importance
539 PyArrayObject* pRanking = (PyArrayObject*) PyObject_GetAttrString(fClassifier, "feature_importances_");
540 if(pRanking == 0) Log() << kFATAL << "Failed to get ranking from classifier" << Endl;
541
542 // Fill ranking object and return it
543 fRanking = new Ranking(GetName(), "Variable Importance");
544 Double_t* rankingData = (Double_t*) PyArray_DATA(pRanking);
545 for(UInt_t iVar=0; iVar<fNvars; iVar++){
546 fRanking->AddRank(Rank(GetInputLabel(iVar), rankingData[iVar]));
547 }
548
549 Py_DECREF(pRanking);
550
551 return fRanking;
552}
553
554//_______________________________________________________________________
556{
557 // typical length of text line:
558 // "|--------------------------------------------------------------|"
559 Log() << "A gradient tree boosting classifier builds a model from an ensemble" << Endl;
560 Log() << "of decision trees, which are adapted each boosting step to fit better" << Endl;
561 Log() << "to previously misclassified events." << Endl;
562 Log() << Endl;
563 Log() << "Check out the scikit-learn documentation for more information." << Endl;
564}
565
566
#define REGISTER_METHOD(CLASS)
for example
#define e(i)
Definition: RSha256.hxx:103
int Int_t
Definition: RtypesCore.h:41
unsigned int UInt_t
Definition: RtypesCore.h:42
const Bool_t kFALSE
Definition: RtypesCore.h:88
bool Bool_t
Definition: RtypesCore.h:59
double Double_t
Definition: RtypesCore.h:55
long long Long64_t
Definition: RtypesCore.h:69
const Bool_t kTRUE
Definition: RtypesCore.h:87
#define ClassImp(name)
Definition: Rtypes.h:363
int type
Definition: TGX11.cxx:120
_object PyObject
Definition: TPyArg.h:20
char * Form(const char *fmt,...)
OptionBase * DeclareOptionRef(T &ref, const TString &name, const TString &desc="")
MsgLogger & Log() const
Definition: Configurable.h:122
Class that contains all the data information.
Definition: DataSetInfo.h:60
UInt_t GetNClasses() const
Definition: DataSetInfo.h:136
const Event * GetEvent() const
Definition: DataSet.cxx:202
Types::ETreeType GetCurrentType() const
Definition: DataSet.h:205
Long64_t GetNEvents(Types::ETreeType type=Types::kMaxTreeType) const
Definition: DataSet.h:217
Long64_t GetNTrainingEvents() const
Definition: DataSet.h:79
void SetCurrentEvent(Long64_t ievt) const
Definition: DataSet.h:99
const Event * GetTrainingEvent(Long64_t ievt) const
Definition: DataSet.h:85
virtual void DeclareCompatibilityOptions()
options that are used ONLY for the READER to ensure backward compatibility they are hence without any...
Definition: MethodBase.cxx:601
const char * GetName() const
Definition: MethodBase.h:325
const TString & GetWeightFileDir() const
Definition: MethodBase.h:481
const TString & GetMethodName() const
Definition: MethodBase.h:322
DataSetInfo & DataInfo() const
Definition: MethodBase.h:401
virtual void TestClassification()
initialization
UInt_t GetNVariables() const
Definition: MethodBase.h:336
Bool_t IsModelPersistence()
Definition: MethodBase.h:374
void NoErrorCalc(Double_t *const err, Double_t *const errUpper)
Definition: MethodBase.cxx:841
const TString & GetInputLabel(Int_t i) const
Definition: MethodBase.h:341
Ranking * fRanking
Definition: MethodBase.h:576
DataSet * Data() const
Definition: MethodBase.h:400
Double_t fSubsample
Definition: MethodPyGTB.h:98
PyObject * pInit
Definition: MethodPyGTB.h:125
PyObject * pMinSamplesLeaf
Definition: MethodPyGTB.h:109
Double_t fMinWeightFractionLeaf
Definition: MethodPyGTB.h:114
std::vector< Double_t > mvaValues
Definition: MethodPyGTB.h:72
PyObject * pMaxFeatures
Definition: MethodPyGTB.h:138
std::vector< Double_t > GetMvaValues(Long64_t firstEvt=0, Long64_t lastEvt=-1, Bool_t logProgress=false)
get all the MVA values for the events of the current Data type
PyObject * pMaxDepth
Definition: MethodPyGTB.h:117
PyObject * pMaxLeafNodes
Definition: MethodPyGTB.h:156
std::vector< Float_t > classValues
Definition: MethodPyGTB.h:73
Bool_t HasAnalysisType(Types::EAnalysisType type, UInt_t numberClasses, UInt_t numberTargets)
Double_t fLearningRate
Definition: MethodPyGTB.h:89
void GetHelpMessage() const
TString fMaxFeatures
Definition: MethodPyGTB.h:139
MethodPyGTB(const TString &jobName, const TString &methodTitle, DataSetInfo &theData, const TString &theOption="")
Definition: MethodPyGTB.cxx:58
TString fRandomState
Definition: MethodPyGTB.h:132
PyObject * pLearningRate
Definition: MethodPyGTB.h:88
const Ranking * CreateRanking()
virtual void TestClassification()
initialization
std::vector< Float_t > & GetMulticlassValues()
virtual void ReadModelFromFile()
PyObject * pNestimators
Definition: MethodPyGTB.h:93
TString fMaxLeafNodes
Definition: MethodPyGTB.h:157
PyObject * pVerbose
Definition: MethodPyGTB.h:153
TString fFilenameClassifier
Definition: MethodPyGTB.h:77
PyObject * pMinSamplesSplit
Definition: MethodPyGTB.h:105
PyObject * pLoss
Definition: MethodPyGTB.h:81
PyObject * pSubsample
Definition: MethodPyGTB.h:97
PyObject * pRandomState
Definition: MethodPyGTB.h:131
PyObject * pWarmStart
Definition: MethodPyGTB.h:163
PyObject * pMinWeightFractionLeaf
Definition: MethodPyGTB.h:113
Double_t GetMvaValue(Double_t *errLower=0, Double_t *errUpper=0)
static int PyIsInitialized()
Check Python interpreter initialization status.
PyArrayObject * fTrainData
Definition: PyMethodBase.h:122
PyObject * Eval(TString code)
Evaluate Python code.
static void PyInitialize()
Initialize Python interpreter.
static void Serialize(TString file, PyObject *classifier)
Serialize Python object.
PyArrayObject * fTrainDataWeights
Definition: PyMethodBase.h:123
static Int_t UnSerialize(TString file, PyObject **obj)
Unserialize Python object.
PyObject * fClassifier
Definition: PyMethodBase.h:120
PyArrayObject * fTrainDataClasses
Definition: PyMethodBase.h:124
void PyRunString(TString code, TString errorMessage="Failed to run python code", int start=Py_single_input)
Execute Python code from string.
PyObject * fLocalNS
Definition: PyMethodBase.h:143
Ranking for variables in method (implementation)
Definition: Ranking.h:48
virtual void AddRank(const Rank &rank)
Add a new rank take ownership of it.
Definition: Ranking.cxx:86
Timing information for training and evaluation of MVA methods.
Definition: Timer.h:58
TString GetElapsedTime(Bool_t Scientific=kTRUE)
returns pretty string with elapsed time
Definition: Timer.cxx:134
const TString & Color(const TString &)
human readable color strings
Definition: Tools.cxx:840
Singleton class for Global types used by TMVA.
Definition: Types.h:73
@ kSignal
Definition: Types.h:136
EAnalysisType
Definition: Types.h:127
@ kMulticlass
Definition: Types.h:130
@ kClassification
Definition: Types.h:128
@ kTraining
Definition: Types.h:144
Abstract ClassifierFactory template that handles arbitrary types.
Tools & gTools()
MsgLogger & Endl(MsgLogger &ml)
Definition: MsgLogger.h:158