Logo ROOT   6.16/01
Reference Guide
MethodPyRandomForest.cxx
Go to the documentation of this file.
1// @(#)root/tmva/pymva $Id$
2// Authors: Omar Zapata, Lorenzo Moneta, Sergei Gleyzer 2015
3
4/**********************************************************************************
5 * Project: TMVA - a Root-integrated toolkit for multivariate data analysis *
6 * Package: TMVA *
7 * Class : MethodPyRandomForest *
8 * Web : http://oproject.org *
9 * *
10 * Description: *
11 * Random Forest Classifiear from Scikit learn *
12 * *
13 * *
14 * Redistribution and use in source and binary forms, with or without *
15 * modification, are permitted according to the terms listed in LICENSE *
16 * (http://tmva.sourceforge.net/LICENSE) *
17 * *
18 **********************************************************************************/
19#include <Python.h> // Needs to be included first to avoid redefinition of _POSIX_C_SOURCE
21
22#define NPY_NO_DEPRECATED_API NPY_1_7_API_VERSION
23#include <numpy/arrayobject.h>
24
25#include "TMVA/Configurable.h"
27#include "TMVA/Config.h"
28#include "TMVA/DataSet.h"
29#include "TMVA/Event.h"
30#include "TMVA/IMethod.h"
31#include "TMVA/MsgLogger.h"
32#include "TMVA/PDF.h"
33#include "TMVA/Ranking.h"
34#include "TMVA/Results.h"
35#include "TMVA/Tools.h"
36#include "TMVA/Types.h"
37#include "TMVA/Timer.h"
39
40#include "Riostream.h"
41#include "TMath.h"
42#include "TMatrix.h"
43#include "TMatrixD.h"
44#include "TVectorD.h"
45
46#include <iomanip>
47#include <fstream>
48
49using namespace TMVA;
50
51REGISTER_METHOD(PyRandomForest)
52
54
55//_______________________________________________________________________
56MethodPyRandomForest::MethodPyRandomForest(const TString &jobName,
57 const TString &methodTitle,
58 DataSetInfo &dsi,
59 const TString &theOption) :
60 PyMethodBase(jobName, Types::kPyRandomForest, methodTitle, dsi, theOption),
61 fNestimators(10),
62 fCriterion("gini"),
63 fMaxDepth("None"),
64 fMinSamplesSplit(2),
65 fMinSamplesLeaf(1),
66 fMinWeightFractionLeaf(0),
67 fMaxFeatures("'auto'"),
68 fMaxLeafNodes("None"),
69 fBootstrap(kTRUE),
70 fOobScore(kFALSE),
71 fNjobs(1),
72 fRandomState("None"),
73 fVerbose(0),
74 fWarmStart(kFALSE),
75 fClassWeight("None")
76{
77}
78
79//_______________________________________________________________________
80MethodPyRandomForest::MethodPyRandomForest(DataSetInfo &theData, const TString &theWeightFile)
81 : PyMethodBase(Types::kPyRandomForest, theData, theWeightFile),
82 fNestimators(10),
83 fCriterion("gini"),
84 fMaxDepth("None"),
85 fMinSamplesSplit(2),
86 fMinSamplesLeaf(1),
87 fMinWeightFractionLeaf(0),
88 fMaxFeatures("'auto'"),
89 fMaxLeafNodes("None"),
90 fBootstrap(kTRUE),
91 fOobScore(kFALSE),
92 fNjobs(1),
93 fRandomState("None"),
94 fVerbose(0),
95 fWarmStart(kFALSE),
96 fClassWeight("None")
97{
98}
99
100
101//_______________________________________________________________________
103{
104}
105
106//_______________________________________________________________________
108{
109 if (type == Types::kClassification && numberClasses == 2) return kTRUE;
110 if (type == Types::kMulticlass && numberClasses >= 2) return kTRUE;
111 return kFALSE;
112}
113
114//_______________________________________________________________________
116{
118
119 DeclareOptionRef(fNestimators, "NEstimators", "Integer, optional (default=10). The number of trees in the forest.");
120 DeclareOptionRef(fCriterion, "Criterion", "String, optional (default='gini') \
121 The function to measure the quality of a split. Supported criteria are \
122 'gini' for the Gini impurity and 'entropy' for the information gain. \
123 Note: this parameter is tree-specific.");
124
125 DeclareOptionRef(fMaxDepth, "MaxDepth", "integer or None, optional (default=None) \
126 The maximum depth of the tree. If None, then nodes are expanded until \
127 all leaves are pure or until all leaves contain less than \
128 min_samples_split samples. \
129 Ignored if ``max_leaf_nodes`` is not None.");
130
131 DeclareOptionRef(fMinSamplesSplit, "MinSamplesSplit", "integer, optional (default=2)\
132 The minimum number of samples required to split an internal node.");
133
134 DeclareOptionRef(fMinSamplesLeaf, "MinSamplesLeaf", "integer, optional (default=1) \
135 The minimum number of samples in newly created leaves. A split is \
136 discarded if after the split, one of the leaves would contain less then \
137 ``min_samples_leaf`` samples.");
138 DeclareOptionRef(fMinWeightFractionLeaf, "MinWeightFractionLeaf", "//float, optional (default=0.) \
139 The minimum weighted fraction of the input samples required to be at a \
140 leaf node.");
141 DeclareOptionRef(fMaxFeatures, "MaxFeatures", "The number of features to consider when looking for the best split");
142
143 DeclareOptionRef(fMaxLeafNodes, "MaxLeafNodes", "int or None, optional (default=None)\
144 Grow trees with ``max_leaf_nodes`` in best-first fashion.\
145 Best nodes are defined as relative reduction in impurity.\
146 If None then unlimited number of leaf nodes.\
147 If not None then ``max_depth`` will be ignored.");
148
149 DeclareOptionRef(fBootstrap, "Bootstrap", "boolean, optional (default=True) \
150 Whether bootstrap samples are used when building trees.");
151
152 DeclareOptionRef(fOobScore, "OoBScore", " bool Whether to use out-of-bag samples to estimate\
153 the generalization error.");
154
155 DeclareOptionRef(fNjobs, "NJobs", " integer, optional (default=1) \
156 The number of jobs to run in parallel for both `fit` and `predict`. \
157 If -1, then the number of jobs is set to the number of cores.");
158
159 DeclareOptionRef(fRandomState, "RandomState", "int, RandomState instance or None, optional (default=None)\
160 If int, random_state is the seed used by the random number generator;\
161 If RandomState instance, random_state is the random number generator;\
162 If None, the random number generator is the RandomState instance used\
163 by `np.random`.");
164
165 DeclareOptionRef(fVerbose, "Verbose", "int, optional (default=0)\
166 Controls the verbosity of the tree building process.");
167
168 DeclareOptionRef(fWarmStart, "WarmStart", "bool, optional (default=False)\
169 When set to ``True``, reuse the solution of the previous call to fit\
170 and add more estimators to the ensemble, otherwise, just fit a whole\
171 new forest.");
172
173 DeclareOptionRef(fClassWeight, "ClassWeight", "dict, list of dicts, \"auto\", \"subsample\" or None, optional\
174 Weights associated with classes in the form ``{class_label: weight}``.\
175 If not given, all classes are supposed to have weight one. For\
176 multi-output problems, a list of dicts can be provided in the same\
177 order as the columns of y.\
178 The \"auto\" mode uses the values of y to automatically adjust\
179 weights inversely proportional to class frequencies in the input data.\
180 The \"subsample\" mode is the same as \"auto\" except that weights are\
181 computed based on the bootstrap sample for every tree grown.\
182 For multi-output, the weights of each column of y will be multiplied.\
183 Note that these weights will be multiplied with sample_weight (passed\
184 through the fit method) if sample_weight is specified.");
185
186 DeclareOptionRef(fFilenameClassifier, "FilenameClassifier",
187 "Store trained classifier in this file");
188}
189
190//_______________________________________________________________________
191// Check options and load them to local python namespace
193{
194 if (fNestimators <= 0) {
195 Log() << kFATAL << " NEstimators <=0... that does not work !! " << Endl;
196 }
198 PyDict_SetItemString(fLocalNS, "nEstimators", pNestimators);
199
200 if (fCriterion != "gini" && fCriterion != "entropy") {
201 Log() << kFATAL << Form(" Criterion = %s... that does not work !! ", fCriterion.Data())
202 << " The options are `gini` or `entropy`." << Endl;
203 }
204 pCriterion = Eval(Form("'%s'", fCriterion.Data()));
205 PyDict_SetItemString(fLocalNS, "criterion", pCriterion);
206
208 PyDict_SetItemString(fLocalNS, "maxDepth", pMaxDepth);
209 if (!pMaxDepth) {
210 Log() << kFATAL << Form(" MaxDepth = %s... that does not work !! ", fMaxDepth.Data())
211 << " The options are None or integer." << Endl;
212 }
213
214 if (fMinSamplesSplit < 0) {
215 Log() << kFATAL << " MinSamplesSplit < 0... that does not work !! " << Endl;
216 }
218 PyDict_SetItemString(fLocalNS, "minSamplesSplit", pMinSamplesSplit);
219
220 if (fMinSamplesLeaf < 0) {
221 Log() << kFATAL << " MinSamplesLeaf < 0... that does not work !! " << Endl;
222 }
224 PyDict_SetItemString(fLocalNS, "minSamplesLeaf", pMinSamplesLeaf);
225
226 if (fMinWeightFractionLeaf < 0) {
227 Log() << kERROR << " MinWeightFractionLeaf < 0... that does not work !! " << Endl;
228 }
230 PyDict_SetItemString(fLocalNS, "minWeightFractionLeaf", pMinWeightFractionLeaf);
231
232 if (fMaxFeatures == "auto" || fMaxFeatures == "sqrt" || fMaxFeatures == "log2"){
233 fMaxFeatures = Form("'%s'", fMaxFeatures.Data());
234 }
236 PyDict_SetItemString(fLocalNS, "maxFeatures", pMaxFeatures);
237
238 if (!pMaxFeatures) {
239 Log() << kFATAL << Form(" MaxFeatures = %s... that does not work !! ", fMaxFeatures.Data())
240 << "int, float, string or None, optional (default='auto')"
241 << "The number of features to consider when looking for the best split:"
242 << "If int, then consider `max_features` features at each split."
243 << "If float, then `max_features` is a percentage and"
244 << "`int(max_features * n_features)` features are considered at each split."
245 << "If 'auto', then `max_features=sqrt(n_features)`."
246 << "If 'sqrt', then `max_features=sqrt(n_features)`."
247 << "If 'log2', then `max_features=log2(n_features)`."
248 << "If None, then `max_features=n_features`." << Endl;
249 }
250
252 if (!pMaxLeafNodes) {
253 Log() << kFATAL << Form(" MaxLeafNodes = %s... that does not work !! ", fMaxLeafNodes.Data())
254 << " The options are None or integer." << Endl;
255 }
256 PyDict_SetItemString(fLocalNS, "maxLeafNodes", pMaxLeafNodes);
257
259 if (!pRandomState) {
260 Log() << kFATAL << Form(" RandomState = %s... that does not work !! ", fRandomState.Data())
261 << "If int, random_state is the seed used by the random number generator;"
262 << "If RandomState instance, random_state is the random number generator;"
263 << "If None, the random number generator is the RandomState instance used by `np.random`." << Endl;
264 }
265 PyDict_SetItemString(fLocalNS, "randomState", pRandomState);
266
268 if (!pClassWeight) {
269 Log() << kFATAL << Form(" ClassWeight = %s... that does not work !! ", fClassWeight.Data())
270 << "dict, list of dicts, 'auto', 'subsample' or None, optional" << Endl;
271 }
272 PyDict_SetItemString(fLocalNS, "classWeight", pClassWeight);
273
274 if(fNjobs < 1) {
275 Log() << kFATAL << Form(" NJobs = %i... that does not work !! ", fNjobs)
276 << "Value has to be greater than zero." << Endl;
277 }
278 pNjobs = Eval(Form("%i", fNjobs));
279 PyDict_SetItemString(fLocalNS, "nJobs", pNjobs);
280
282 PyDict_SetItemString(fLocalNS, "bootstrap", pBootstrap);
284 PyDict_SetItemString(fLocalNS, "oobScore", pOobScore);
285 pVerbose = Eval(Form("%i", fVerbose));
286 PyDict_SetItemString(fLocalNS, "verbose", pVerbose);
288 PyDict_SetItemString(fLocalNS, "warmStart", pWarmStart);
289
290 // If no filename is given, set default
291 if(fFilenameClassifier.IsNull())
292 {
293 fFilenameClassifier = GetWeightFileDir() + "/PyRFModel_" + GetName() + ".PyData";
294 }
295}
296
297//_______________________________________________________________________
299{
300 _import_array(); //require to use numpy arrays
301
302 // Check options and load them to local python namespace
304
305 // Import module for random forest classifier
306 PyRunString("import sklearn.ensemble");
307
308 // Get data properties
311}
312
313//_______________________________________________________________________
315{
316 // Load training data (data, classes, weights) to python arrays
317 int fNrowsTraining = Data()->GetNTrainingEvents(); //every row is an event, a class type and a weight
318 npy_intp dimsData[2];
319 dimsData[0] = fNrowsTraining;
320 dimsData[1] = fNvars;
321 fTrainData = (PyArrayObject *)PyArray_SimpleNew(2, dimsData, NPY_FLOAT);
322 PyDict_SetItemString(fLocalNS, "trainData", (PyObject*)fTrainData);
323 float *TrainData = (float *)(PyArray_DATA(fTrainData));
324
325 npy_intp dimsClasses = (npy_intp) fNrowsTraining;
326 fTrainDataClasses = (PyArrayObject *)PyArray_SimpleNew(1, &dimsClasses, NPY_FLOAT);
327 PyDict_SetItemString(fLocalNS, "trainDataClasses", (PyObject*)fTrainDataClasses);
328 float *TrainDataClasses = (float *)(PyArray_DATA(fTrainDataClasses));
329
330 fTrainDataWeights = (PyArrayObject *)PyArray_SimpleNew(1, &dimsClasses, NPY_FLOAT);
331 PyDict_SetItemString(fLocalNS, "trainDataWeights", (PyObject*)fTrainDataWeights);
332 float *TrainDataWeights = (float *)(PyArray_DATA(fTrainDataWeights));
333
334 for (int i = 0; i < fNrowsTraining; i++) {
335 // Fill training data matrix
336 const TMVA::Event *e = Data()->GetTrainingEvent(i);
337 for (UInt_t j = 0; j < fNvars; j++) {
338 TrainData[j + i * fNvars] = e->GetValue(j);
339 }
340
341 // Fill target classes
342 TrainDataClasses[i] = e->GetClass();
343
344 // Get event weight
345 TrainDataWeights[i] = e->GetWeight();
346 }
347
348 // Create classifier object
349 PyRunString("classifier = sklearn.ensemble.RandomForestClassifier(bootstrap=bootstrap, class_weight=classWeight, criterion=criterion, max_depth=maxDepth, max_features=maxFeatures, max_leaf_nodes=maxLeafNodes, min_samples_leaf=minSamplesLeaf, min_samples_split=minSamplesSplit, min_weight_fraction_leaf=minWeightFractionLeaf, n_estimators=nEstimators, n_jobs=nJobs, oob_score=oobScore, random_state=randomState, verbose=verbose, warm_start=warmStart)",
350 "Failed to setup classifier");
351
352 // Fit classifier
353 // NOTE: We dump the output to a variable so that the call does not pollute stdout
354 PyRunString("dump = classifier.fit(trainData, trainDataClasses, trainDataWeights)", "Failed to train classifier");
355
356 // Store classifier
357 fClassifier = PyDict_GetItemString(fLocalNS, "classifier");
358 if(fClassifier == 0) {
359 Log() << kFATAL << "Can't create classifier object from RandomForestClassifier" << Endl;
360 Log() << Endl;
361 }
362
363 if (IsModelPersistence()) {
364 Log() << Endl;
365 Log() << gTools().Color("bold") << "Saving state file: " << gTools().Color("reset") << fFilenameClassifier << Endl;
366 Log() << Endl;
368 }
369}
370
371//_______________________________________________________________________
373{
375}
376
377//_______________________________________________________________________
378std::vector<Double_t> MethodPyRandomForest::GetMvaValues(Long64_t firstEvt, Long64_t lastEvt, Bool_t logProgress)
379{
380 // Load model if not already done
381 if (fClassifier == 0) ReadModelFromFile();
382
383 // Determine number of events
384 Long64_t nEvents = Data()->GetNEvents();
385 if (firstEvt > lastEvt || lastEvt > nEvents) lastEvt = nEvents;
386 if (firstEvt < 0) firstEvt = 0;
387 nEvents = lastEvt-firstEvt;
388
389 // use timer
390 Timer timer( nEvents, GetName(), kTRUE );
391
392 if (logProgress)
393 Log() << kHEADER << Form("[%s] : ",DataInfo().GetName())
394 << "Evaluation of " << GetMethodName() << " on "
395 << (Data()->GetCurrentType() == Types::kTraining ? "training" : "testing")
396 << " sample (" << nEvents << " events)" << Endl;
397
398 // Get data
399 npy_intp dims[2];
400 dims[0] = nEvents;
401 dims[1] = fNvars;
402 PyArrayObject *pEvent= (PyArrayObject *)PyArray_SimpleNew(2, dims, NPY_FLOAT);
403 float *pValue = (float *)(PyArray_DATA(pEvent));
404
405 for (Int_t ievt=0; ievt<nEvents; ievt++) {
406 Data()->SetCurrentEvent(ievt);
407 const TMVA::Event *e = Data()->GetEvent();
408 for (UInt_t i = 0; i < fNvars; i++) {
409 pValue[ievt * fNvars + i] = e->GetValue(i);
410 }
411 }
412
413 // Get prediction from classifier
414 PyArrayObject *result = (PyArrayObject *)PyObject_CallMethod(fClassifier, const_cast<char *>("predict_proba"), const_cast<char *>("(O)"), pEvent);
415 double *proba = (double *)(PyArray_DATA(result));
416
417 // Return signal probabilities
418 if(Long64_t(mvaValues.size()) != nEvents) mvaValues.resize(nEvents);
419 for (int i = 0; i < nEvents; ++i) {
421 }
422
423 Py_DECREF(pEvent);
424 Py_DECREF(result);
425
426 if (logProgress) {
427 Log() << kINFO
428 << "Elapsed time for evaluation of " << nEvents << " events: "
429 << timer.GetElapsedTime() << " " << Endl;
430 }
431
432 return mvaValues;
433}
434
435//_______________________________________________________________________
437{
438 // cannot determine error
439 NoErrorCalc(errLower, errUpper);
440
441 // Load model if not already done
442 if (fClassifier == 0) ReadModelFromFile();
443
444 // Get current event and load to python array
445 const TMVA::Event *e = Data()->GetEvent();
446 npy_intp dims[2];
447 dims[0] = 1;
448 dims[1] = fNvars;
449 PyArrayObject *pEvent= (PyArrayObject *)PyArray_SimpleNew(2, dims, NPY_FLOAT);
450 float *pValue = (float *)(PyArray_DATA(pEvent));
451 for (UInt_t i = 0; i < fNvars; i++) pValue[i] = e->GetValue(i);
452
453 // Get prediction from classifier
454 PyArrayObject *result = (PyArrayObject *)PyObject_CallMethod(fClassifier, const_cast<char *>("predict_proba"), const_cast<char *>("(O)"), pEvent);
455 double *proba = (double *)(PyArray_DATA(result));
456
457 // Return MVA value
458 Double_t mvaValue;
459 mvaValue = proba[TMVA::Types::kSignal]; // getting signal probability
460
461 Py_DECREF(result);
462 Py_DECREF(pEvent);
463
464 return mvaValue;
465}
466
467//_______________________________________________________________________
469{
470 // Load model if not already done
471 if (fClassifier == 0) ReadModelFromFile();
472
473 // Get current event and load to python array
474 const TMVA::Event *e = Data()->GetEvent();
475 npy_intp dims[2];
476 dims[0] = 1;
477 dims[1] = fNvars;
478 PyArrayObject *pEvent= (PyArrayObject *)PyArray_SimpleNew(2, dims, NPY_FLOAT);
479 float *pValue = (float *)(PyArray_DATA(pEvent));
480 for (UInt_t i = 0; i < fNvars; i++) pValue[i] = e->GetValue(i);
481
482 // Get prediction from classifier
483 PyArrayObject *result = (PyArrayObject *)PyObject_CallMethod(fClassifier, const_cast<char *>("predict_proba"), const_cast<char *>("(O)"), pEvent);
484 double *proba = (double *)(PyArray_DATA(result));
485
486 // Return MVA values
487 if(UInt_t(classValues.size()) != fNoutputs) classValues.resize(fNoutputs);
488 for(UInt_t i = 0; i < fNoutputs; i++) classValues[i] = proba[i];
489
490 Py_DECREF(pEvent);
491 Py_DECREF(result);
492
493 return classValues;
494}
495
496//_______________________________________________________________________
498{
499 if (!PyIsInitialized()) {
500 PyInitialize();
501 }
502
503 Log() << Endl;
504 Log() << gTools().Color("bold") << "Loading state file: " << gTools().Color("reset") << fFilenameClassifier << Endl;
505 Log() << Endl;
506
507 // Load classifier from file
509 if(err != 0)
510 {
511 Log() << kFATAL << Form("Failed to load classifier from file (error code: %i): %s", err, fFilenameClassifier.Data()) << Endl;
512 }
513
514 // Book classifier object in python dict
515 PyDict_SetItemString(fLocalNS, "classifier", fClassifier);
516
517 // Load data properties
518 // NOTE: This has to be repeated here for the reader application
521}
522
523//_______________________________________________________________________
525{
526 // Get feature importance from classifier as an array with length equal
527 // number of variables, higher value signals a higher importance
528 PyArrayObject* pRanking = (PyArrayObject*) PyObject_GetAttrString(fClassifier, "feature_importances_");
529 if(pRanking == 0) Log() << kFATAL << "Failed to get ranking from classifier" << Endl;
530
531 // Fill ranking object and return it
532 fRanking = new Ranking(GetName(), "Variable Importance");
533 Double_t* rankingData = (Double_t*) PyArray_DATA(pRanking);
534 for(UInt_t iVar=0; iVar<fNvars; iVar++){
535 fRanking->AddRank(Rank(GetInputLabel(iVar), rankingData[iVar]));
536 }
537
538 Py_DECREF(pRanking);
539
540 return fRanking;
541}
542
543//_______________________________________________________________________
545{
546 // typical length of text line:
547 // "|--------------------------------------------------------------|"
548 Log() << "A random forest is a meta estimator that fits a number of decision" << Endl;
549 Log() << "tree classifiers on various sub-samples of the dataset and use" << Endl;
550 Log() << "averaging to improve the predictive accuracy and control over-fitting." << Endl;
551 Log() << Endl;
552 Log() << "Check out the scikit-learn documentation for more information." << Endl;
553}
#define REGISTER_METHOD(CLASS)
for example
#define e(i)
Definition: RSha256.hxx:103
int Int_t
Definition: RtypesCore.h:41
unsigned int UInt_t
Definition: RtypesCore.h:42
const Bool_t kFALSE
Definition: RtypesCore.h:88
bool Bool_t
Definition: RtypesCore.h:59
double Double_t
Definition: RtypesCore.h:55
long long Long64_t
Definition: RtypesCore.h:69
const Bool_t kTRUE
Definition: RtypesCore.h:87
#define ClassImp(name)
Definition: Rtypes.h:363
int type
Definition: TGX11.cxx:120
_object PyObject
Definition: TPyArg.h:20
char * Form(const char *fmt,...)
OptionBase * DeclareOptionRef(T &ref, const TString &name, const TString &desc="")
MsgLogger & Log() const
Definition: Configurable.h:122
Class that contains all the data information.
Definition: DataSetInfo.h:60
UInt_t GetNClasses() const
Definition: DataSetInfo.h:136
const Event * GetEvent() const
Definition: DataSet.cxx:202
Types::ETreeType GetCurrentType() const
Definition: DataSet.h:205
Long64_t GetNEvents(Types::ETreeType type=Types::kMaxTreeType) const
Definition: DataSet.h:217
Long64_t GetNTrainingEvents() const
Definition: DataSet.h:79
void SetCurrentEvent(Long64_t ievt) const
Definition: DataSet.h:99
const Event * GetTrainingEvent(Long64_t ievt) const
Definition: DataSet.h:85
virtual void DeclareCompatibilityOptions()
options that are used ONLY for the READER to ensure backward compatibility they are hence without any...
Definition: MethodBase.cxx:601
const char * GetName() const
Definition: MethodBase.h:325
const TString & GetWeightFileDir() const
Definition: MethodBase.h:481
const TString & GetMethodName() const
Definition: MethodBase.h:322
DataSetInfo & DataInfo() const
Definition: MethodBase.h:401
virtual void TestClassification()
initialization
UInt_t GetNVariables() const
Definition: MethodBase.h:336
Bool_t IsModelPersistence()
Definition: MethodBase.h:374
void NoErrorCalc(Double_t *const err, Double_t *const errUpper)
Definition: MethodBase.cxx:841
const TString & GetInputLabel(Int_t i) const
Definition: MethodBase.h:341
Ranking * fRanking
Definition: MethodBase.h:576
DataSet * Data() const
Definition: MethodBase.h:400
MethodPyRandomForest(const TString &jobName, const TString &methodTitle, DataSetInfo &theData, const TString &theOption="")
std::vector< Float_t > & GetMulticlassValues()
Bool_t HasAnalysisType(Types::EAnalysisType type, UInt_t numberClasses, UInt_t numberTargets)
std::vector< Double_t > GetMvaValues(Long64_t firstEvt=0, Long64_t lastEvt=-1, Bool_t logProgress=false)
get all the MVA values for the events of the current Data type
std::vector< Float_t > classValues
std::vector< Double_t > mvaValues
virtual void TestClassification()
initialization
Double_t GetMvaValue(Double_t *errLower=0, Double_t *errUpper=0)
static int PyIsInitialized()
Check Python interpreter initialization status.
PyArrayObject * fTrainData
Definition: PyMethodBase.h:122
PyObject * Eval(TString code)
Evaluate Python code.
static void PyInitialize()
Initialize Python interpreter.
static void Serialize(TString file, PyObject *classifier)
Serialize Python object.
PyArrayObject * fTrainDataWeights
Definition: PyMethodBase.h:123
static Int_t UnSerialize(TString file, PyObject **obj)
Unserialize Python object.
PyObject * fClassifier
Definition: PyMethodBase.h:120
PyArrayObject * fTrainDataClasses
Definition: PyMethodBase.h:124
void PyRunString(TString code, TString errorMessage="Failed to run python code", int start=Py_single_input)
Execute Python code from string.
PyObject * fLocalNS
Definition: PyMethodBase.h:143
Ranking for variables in method (implementation)
Definition: Ranking.h:48
virtual void AddRank(const Rank &rank)
Add a new rank take ownership of it.
Definition: Ranking.cxx:86
Timing information for training and evaluation of MVA methods.
Definition: Timer.h:58
TString GetElapsedTime(Bool_t Scientific=kTRUE)
returns pretty string with elapsed time
Definition: Timer.cxx:134
const TString & Color(const TString &)
human readable color strings
Definition: Tools.cxx:840
Singleton class for Global types used by TMVA.
Definition: Types.h:73
@ kSignal
Definition: Types.h:136
EAnalysisType
Definition: Types.h:127
@ kMulticlass
Definition: Types.h:130
@ kClassification
Definition: Types.h:128
@ kTraining
Definition: Types.h:144
Abstract ClassifierFactory template that handles arbitrary types.
Tools & gTools()
MsgLogger & Endl(MsgLogger &ml)
Definition: MsgLogger.h:158