Logo ROOT   6.18/05
Reference Guide
MethodPyRandomForest.cxx
Go to the documentation of this file.
1// @(#)root/tmva/pymva $Id$
2// Authors: Omar Zapata, Lorenzo Moneta, Sergei Gleyzer 2015
3
4/**********************************************************************************
5 * Project: TMVA - a Root-integrated toolkit for multivariate data analysis *
6 * Package: TMVA *
7 * Class : MethodPyRandomForest *
8 * Web : http://oproject.org *
9 * *
10 * Description: *
11 * Random Forest Classifiear from Scikit learn *
12 * *
13 * *
14 * Redistribution and use in source and binary forms, with or without *
15 * modification, are permitted according to the terms listed in LICENSE *
16 * (http://tmva.sourceforge.net/LICENSE) *
17 * *
18 **********************************************************************************/
19#include <Python.h> // Needs to be included first to avoid redefinition of _POSIX_C_SOURCE
21
22#define NPY_NO_DEPRECATED_API NPY_1_7_API_VERSION
23#include <numpy/arrayobject.h>
24
25#include "TMVA/Configurable.h"
27#include "TMVA/Config.h"
28#include "TMVA/DataSet.h"
29#include "TMVA/Event.h"
30#include "TMVA/IMethod.h"
31#include "TMVA/MsgLogger.h"
32#include "TMVA/PDF.h"
33#include "TMVA/Ranking.h"
34#include "TMVA/Results.h"
35#include "TMVA/Tools.h"
36#include "TMVA/Types.h"
37#include "TMVA/Timer.h"
39
40#include "Riostream.h"
41#include "TMath.h"
42#include "TMatrix.h"
43#include "TMatrixD.h"
44#include "TVectorD.h"
45
46#include <iomanip>
47#include <fstream>
48
49using namespace TMVA;
50
51namespace TMVA {
52namespace Internal {
53class PyGILRAII {
54 PyGILState_STATE m_GILState;
55
56public:
57 PyGILRAII() : m_GILState(PyGILState_Ensure()) {}
58 ~PyGILRAII() { PyGILState_Release(m_GILState); }
59};
60} // namespace Internal
61} // namespace TMVA
62
63REGISTER_METHOD(PyRandomForest)
64
66
67//_______________________________________________________________________
68MethodPyRandomForest::MethodPyRandomForest(const TString &jobName,
69 const TString &methodTitle,
70 DataSetInfo &dsi,
71 const TString &theOption) :
72 PyMethodBase(jobName, Types::kPyRandomForest, methodTitle, dsi, theOption),
73 fNestimators(10),
74 fCriterion("gini"),
75 fMaxDepth("None"),
76 fMinSamplesSplit(2),
77 fMinSamplesLeaf(1),
78 fMinWeightFractionLeaf(0),
79 fMaxFeatures("'auto'"),
80 fMaxLeafNodes("None"),
81 fBootstrap(kTRUE),
82 fOobScore(kFALSE),
83 fNjobs(1),
84 fRandomState("None"),
85 fVerbose(0),
86 fWarmStart(kFALSE),
87 fClassWeight("None")
88{
89}
90
91//_______________________________________________________________________
93 : PyMethodBase(Types::kPyRandomForest, theData, theWeightFile),
94 fNestimators(10),
95 fCriterion("gini"),
96 fMaxDepth("None"),
97 fMinSamplesSplit(2),
98 fMinSamplesLeaf(1),
99 fMinWeightFractionLeaf(0),
100 fMaxFeatures("'auto'"),
101 fMaxLeafNodes("None"),
102 fBootstrap(kTRUE),
103 fOobScore(kFALSE),
104 fNjobs(1),
105 fRandomState("None"),
106 fVerbose(0),
107 fWarmStart(kFALSE),
108 fClassWeight("None")
109{
110}
111
112
113//_______________________________________________________________________
115{
116}
117
118//_______________________________________________________________________
120{
121 if (type == Types::kClassification && numberClasses == 2) return kTRUE;
122 if (type == Types::kMulticlass && numberClasses >= 2) return kTRUE;
123 return kFALSE;
124}
125
126//_______________________________________________________________________
128{
130
131 DeclareOptionRef(fNestimators, "NEstimators", "Integer, optional (default=10). The number of trees in the forest.");
132 DeclareOptionRef(fCriterion, "Criterion", "String, optional (default='gini') \
133 The function to measure the quality of a split. Supported criteria are \
134 'gini' for the Gini impurity and 'entropy' for the information gain. \
135 Note: this parameter is tree-specific.");
136
137 DeclareOptionRef(fMaxDepth, "MaxDepth", "integer or None, optional (default=None) \
138 The maximum depth of the tree. If None, then nodes are expanded until \
139 all leaves are pure or until all leaves contain less than \
140 min_samples_split samples. \
141 Ignored if ``max_leaf_nodes`` is not None.");
142
143 DeclareOptionRef(fMinSamplesSplit, "MinSamplesSplit", "integer, optional (default=2)\
144 The minimum number of samples required to split an internal node.");
145
146 DeclareOptionRef(fMinSamplesLeaf, "MinSamplesLeaf", "integer, optional (default=1) \
147 The minimum number of samples in newly created leaves. A split is \
148 discarded if after the split, one of the leaves would contain less then \
149 ``min_samples_leaf`` samples.");
150 DeclareOptionRef(fMinWeightFractionLeaf, "MinWeightFractionLeaf", "//float, optional (default=0.) \
151 The minimum weighted fraction of the input samples required to be at a \
152 leaf node.");
153 DeclareOptionRef(fMaxFeatures, "MaxFeatures", "The number of features to consider when looking for the best split");
154
155 DeclareOptionRef(fMaxLeafNodes, "MaxLeafNodes", "int or None, optional (default=None)\
156 Grow trees with ``max_leaf_nodes`` in best-first fashion.\
157 Best nodes are defined as relative reduction in impurity.\
158 If None then unlimited number of leaf nodes.\
159 If not None then ``max_depth`` will be ignored.");
160
161 DeclareOptionRef(fBootstrap, "Bootstrap", "boolean, optional (default=True) \
162 Whether bootstrap samples are used when building trees.");
163
164 DeclareOptionRef(fOobScore, "OoBScore", " bool Whether to use out-of-bag samples to estimate\
165 the generalization error.");
166
167 DeclareOptionRef(fNjobs, "NJobs", " integer, optional (default=1) \
168 The number of jobs to run in parallel for both `fit` and `predict`. \
169 If -1, then the number of jobs is set to the number of cores.");
170
171 DeclareOptionRef(fRandomState, "RandomState", "int, RandomState instance or None, optional (default=None)\
172 If int, random_state is the seed used by the random number generator;\
173 If RandomState instance, random_state is the random number generator;\
174 If None, the random number generator is the RandomState instance used\
175 by `np.random`.");
176
177 DeclareOptionRef(fVerbose, "Verbose", "int, optional (default=0)\
178 Controls the verbosity of the tree building process.");
179
180 DeclareOptionRef(fWarmStart, "WarmStart", "bool, optional (default=False)\
181 When set to ``True``, reuse the solution of the previous call to fit\
182 and add more estimators to the ensemble, otherwise, just fit a whole\
183 new forest.");
184
185 DeclareOptionRef(fClassWeight, "ClassWeight", "dict, list of dicts, \"auto\", \"subsample\" or None, optional\
186 Weights associated with classes in the form ``{class_label: weight}``.\
187 If not given, all classes are supposed to have weight one. For\
188 multi-output problems, a list of dicts can be provided in the same\
189 order as the columns of y.\
190 The \"auto\" mode uses the values of y to automatically adjust\
191 weights inversely proportional to class frequencies in the input data.\
192 The \"subsample\" mode is the same as \"auto\" except that weights are\
193 computed based on the bootstrap sample for every tree grown.\
194 For multi-output, the weights of each column of y will be multiplied.\
195 Note that these weights will be multiplied with sample_weight (passed\
196 through the fit method) if sample_weight is specified.");
197
198 DeclareOptionRef(fFilenameClassifier, "FilenameClassifier",
199 "Store trained classifier in this file");
200}
201
202//_______________________________________________________________________
203// Check options and load them to local python namespace
205{
206 if (fNestimators <= 0) {
207 Log() << kFATAL << " NEstimators <=0... that does not work !! " << Endl;
208 }
210 PyDict_SetItemString(fLocalNS, "nEstimators", pNestimators);
211
212 if (fCriterion != "gini" && fCriterion != "entropy") {
213 Log() << kFATAL << Form(" Criterion = %s... that does not work !! ", fCriterion.Data())
214 << " The options are `gini` or `entropy`." << Endl;
215 }
216 pCriterion = Eval(Form("'%s'", fCriterion.Data()));
217 PyDict_SetItemString(fLocalNS, "criterion", pCriterion);
218
220 PyDict_SetItemString(fLocalNS, "maxDepth", pMaxDepth);
221 if (!pMaxDepth) {
222 Log() << kFATAL << Form(" MaxDepth = %s... that does not work !! ", fMaxDepth.Data())
223 << " The options are None or integer." << Endl;
224 }
225
226 if (fMinSamplesSplit < 0) {
227 Log() << kFATAL << " MinSamplesSplit < 0... that does not work !! " << Endl;
228 }
230 PyDict_SetItemString(fLocalNS, "minSamplesSplit", pMinSamplesSplit);
231
232 if (fMinSamplesLeaf < 0) {
233 Log() << kFATAL << " MinSamplesLeaf < 0... that does not work !! " << Endl;
234 }
236 PyDict_SetItemString(fLocalNS, "minSamplesLeaf", pMinSamplesLeaf);
237
238 if (fMinWeightFractionLeaf < 0) {
239 Log() << kERROR << " MinWeightFractionLeaf < 0... that does not work !! " << Endl;
240 }
242 PyDict_SetItemString(fLocalNS, "minWeightFractionLeaf", pMinWeightFractionLeaf);
243
244 if (fMaxFeatures == "auto" || fMaxFeatures == "sqrt" || fMaxFeatures == "log2"){
245 fMaxFeatures = Form("'%s'", fMaxFeatures.Data());
246 }
248 PyDict_SetItemString(fLocalNS, "maxFeatures", pMaxFeatures);
249
250 if (!pMaxFeatures) {
251 Log() << kFATAL << Form(" MaxFeatures = %s... that does not work !! ", fMaxFeatures.Data())
252 << "int, float, string or None, optional (default='auto')"
253 << "The number of features to consider when looking for the best split:"
254 << "If int, then consider `max_features` features at each split."
255 << "If float, then `max_features` is a percentage and"
256 << "`int(max_features * n_features)` features are considered at each split."
257 << "If 'auto', then `max_features=sqrt(n_features)`."
258 << "If 'sqrt', then `max_features=sqrt(n_features)`."
259 << "If 'log2', then `max_features=log2(n_features)`."
260 << "If None, then `max_features=n_features`." << Endl;
261 }
262
264 if (!pMaxLeafNodes) {
265 Log() << kFATAL << Form(" MaxLeafNodes = %s... that does not work !! ", fMaxLeafNodes.Data())
266 << " The options are None or integer." << Endl;
267 }
268 PyDict_SetItemString(fLocalNS, "maxLeafNodes", pMaxLeafNodes);
269
271 if (!pRandomState) {
272 Log() << kFATAL << Form(" RandomState = %s... that does not work !! ", fRandomState.Data())
273 << "If int, random_state is the seed used by the random number generator;"
274 << "If RandomState instance, random_state is the random number generator;"
275 << "If None, the random number generator is the RandomState instance used by `np.random`." << Endl;
276 }
277 PyDict_SetItemString(fLocalNS, "randomState", pRandomState);
278
280 if (!pClassWeight) {
281 Log() << kFATAL << Form(" ClassWeight = %s... that does not work !! ", fClassWeight.Data())
282 << "dict, list of dicts, 'auto', 'subsample' or None, optional" << Endl;
283 }
284 PyDict_SetItemString(fLocalNS, "classWeight", pClassWeight);
285
286 if(fNjobs < 1) {
287 Log() << kFATAL << Form(" NJobs = %i... that does not work !! ", fNjobs)
288 << "Value has to be greater than zero." << Endl;
289 }
290 pNjobs = Eval(Form("%i", fNjobs));
291 PyDict_SetItemString(fLocalNS, "nJobs", pNjobs);
292
294 PyDict_SetItemString(fLocalNS, "bootstrap", pBootstrap);
296 PyDict_SetItemString(fLocalNS, "oobScore", pOobScore);
297 pVerbose = Eval(Form("%i", fVerbose));
298 PyDict_SetItemString(fLocalNS, "verbose", pVerbose);
300 PyDict_SetItemString(fLocalNS, "warmStart", pWarmStart);
301
302 // If no filename is given, set default
304 {
305 fFilenameClassifier = GetWeightFileDir() + "/PyRFModel_" + GetName() + ".PyData";
306 }
307}
308
309//_______________________________________________________________________
311{
312 TMVA::Internal::PyGILRAII raii;
313 _import_array(); //require to use numpy arrays
314
315 // Check options and load them to local python namespace
317
318 // Import module for random forest classifier
319 PyRunString("import sklearn.ensemble");
320
321 // Get data properties
324}
325
326//_______________________________________________________________________
328{
329 // Load training data (data, classes, weights) to python arrays
330 int fNrowsTraining = Data()->GetNTrainingEvents(); //every row is an event, a class type and a weight
331 npy_intp dimsData[2];
332 dimsData[0] = fNrowsTraining;
333 dimsData[1] = fNvars;
334 fTrainData = (PyArrayObject *)PyArray_SimpleNew(2, dimsData, NPY_FLOAT);
335 PyDict_SetItemString(fLocalNS, "trainData", (PyObject*)fTrainData);
336 float *TrainData = (float *)(PyArray_DATA(fTrainData));
337
338 npy_intp dimsClasses = (npy_intp) fNrowsTraining;
339 fTrainDataClasses = (PyArrayObject *)PyArray_SimpleNew(1, &dimsClasses, NPY_FLOAT);
340 PyDict_SetItemString(fLocalNS, "trainDataClasses", (PyObject*)fTrainDataClasses);
341 float *TrainDataClasses = (float *)(PyArray_DATA(fTrainDataClasses));
342
343 fTrainDataWeights = (PyArrayObject *)PyArray_SimpleNew(1, &dimsClasses, NPY_FLOAT);
344 PyDict_SetItemString(fLocalNS, "trainDataWeights", (PyObject*)fTrainDataWeights);
345 float *TrainDataWeights = (float *)(PyArray_DATA(fTrainDataWeights));
346
347 for (int i = 0; i < fNrowsTraining; i++) {
348 // Fill training data matrix
349 const TMVA::Event *e = Data()->GetTrainingEvent(i);
350 for (UInt_t j = 0; j < fNvars; j++) {
351 TrainData[j + i * fNvars] = e->GetValue(j);
352 }
353
354 // Fill target classes
355 TrainDataClasses[i] = e->GetClass();
356
357 // Get event weight
358 TrainDataWeights[i] = e->GetWeight();
359 }
360
361 // Create classifier object
362 PyRunString("classifier = sklearn.ensemble.RandomForestClassifier(bootstrap=bootstrap, class_weight=classWeight, criterion=criterion, max_depth=maxDepth, max_features=maxFeatures, max_leaf_nodes=maxLeafNodes, min_samples_leaf=minSamplesLeaf, min_samples_split=minSamplesSplit, min_weight_fraction_leaf=minWeightFractionLeaf, n_estimators=nEstimators, n_jobs=nJobs, oob_score=oobScore, random_state=randomState, verbose=verbose, warm_start=warmStart)",
363 "Failed to setup classifier");
364
365 // Fit classifier
366 // NOTE: We dump the output to a variable so that the call does not pollute stdout
367 PyRunString("dump = classifier.fit(trainData, trainDataClasses, trainDataWeights)", "Failed to train classifier");
368
369 // Store classifier
370 fClassifier = PyDict_GetItemString(fLocalNS, "classifier");
371 if(fClassifier == 0) {
372 Log() << kFATAL << "Can't create classifier object from RandomForestClassifier" << Endl;
373 Log() << Endl;
374 }
375
376 if (IsModelPersistence()) {
377 Log() << Endl;
378 Log() << gTools().Color("bold") << "Saving state file: " << gTools().Color("reset") << fFilenameClassifier << Endl;
379 Log() << Endl;
381 }
382}
383
384//_______________________________________________________________________
386{
388}
389
390//_______________________________________________________________________
391std::vector<Double_t> MethodPyRandomForest::GetMvaValues(Long64_t firstEvt, Long64_t lastEvt, Bool_t logProgress)
392{
393 // Load model if not already done
394 if (fClassifier == 0) ReadModelFromFile();
395
396 // Determine number of events
397 Long64_t nEvents = Data()->GetNEvents();
398 if (firstEvt > lastEvt || lastEvt > nEvents) lastEvt = nEvents;
399 if (firstEvt < 0) firstEvt = 0;
400 nEvents = lastEvt-firstEvt;
401
402 // use timer
403 Timer timer( nEvents, GetName(), kTRUE );
404
405 if (logProgress)
406 Log() << kHEADER << Form("[%s] : ",DataInfo().GetName())
407 << "Evaluation of " << GetMethodName() << " on "
408 << (Data()->GetCurrentType() == Types::kTraining ? "training" : "testing")
409 << " sample (" << nEvents << " events)" << Endl;
410
411 // Get data
412 npy_intp dims[2];
413 dims[0] = nEvents;
414 dims[1] = fNvars;
415 PyArrayObject *pEvent= (PyArrayObject *)PyArray_SimpleNew(2, dims, NPY_FLOAT);
416 float *pValue = (float *)(PyArray_DATA(pEvent));
417
418 for (Int_t ievt=0; ievt<nEvents; ievt++) {
419 Data()->SetCurrentEvent(ievt);
420 const TMVA::Event *e = Data()->GetEvent();
421 for (UInt_t i = 0; i < fNvars; i++) {
422 pValue[ievt * fNvars + i] = e->GetValue(i);
423 }
424 }
425
426 // Get prediction from classifier
427 PyArrayObject *result = (PyArrayObject *)PyObject_CallMethod(fClassifier, const_cast<char *>("predict_proba"), const_cast<char *>("(O)"), pEvent);
428 double *proba = (double *)(PyArray_DATA(result));
429
430 // Return signal probabilities
431 if(Long64_t(mvaValues.size()) != nEvents) mvaValues.resize(nEvents);
432 for (int i = 0; i < nEvents; ++i) {
434 }
435
436 Py_DECREF(pEvent);
437 Py_DECREF(result);
438
439 if (logProgress) {
440 Log() << kINFO
441 << "Elapsed time for evaluation of " << nEvents << " events: "
442 << timer.GetElapsedTime() << " " << Endl;
443 }
444
445 return mvaValues;
446}
447
448//_______________________________________________________________________
450{
451 // cannot determine error
452 NoErrorCalc(errLower, errUpper);
453
454 // Load model if not already done
455 if (fClassifier == 0) ReadModelFromFile();
456
457 // Get current event and load to python array
458 const TMVA::Event *e = Data()->GetEvent();
459 npy_intp dims[2];
460 dims[0] = 1;
461 dims[1] = fNvars;
462 PyArrayObject *pEvent= (PyArrayObject *)PyArray_SimpleNew(2, dims, NPY_FLOAT);
463 float *pValue = (float *)(PyArray_DATA(pEvent));
464 for (UInt_t i = 0; i < fNvars; i++) pValue[i] = e->GetValue(i);
465
466 // Get prediction from classifier
467 PyArrayObject *result = (PyArrayObject *)PyObject_CallMethod(fClassifier, const_cast<char *>("predict_proba"), const_cast<char *>("(O)"), pEvent);
468 double *proba = (double *)(PyArray_DATA(result));
469
470 // Return MVA value
471 Double_t mvaValue;
472 mvaValue = proba[TMVA::Types::kSignal]; // getting signal probability
473
474 Py_DECREF(result);
475 Py_DECREF(pEvent);
476
477 return mvaValue;
478}
479
480//_______________________________________________________________________
482{
483 // Load model if not already done
484 if (fClassifier == 0) ReadModelFromFile();
485
486 // Get current event and load to python array
487 const TMVA::Event *e = Data()->GetEvent();
488 npy_intp dims[2];
489 dims[0] = 1;
490 dims[1] = fNvars;
491 PyArrayObject *pEvent= (PyArrayObject *)PyArray_SimpleNew(2, dims, NPY_FLOAT);
492 float *pValue = (float *)(PyArray_DATA(pEvent));
493 for (UInt_t i = 0; i < fNvars; i++) pValue[i] = e->GetValue(i);
494
495 // Get prediction from classifier
496 PyArrayObject *result = (PyArrayObject *)PyObject_CallMethod(fClassifier, const_cast<char *>("predict_proba"), const_cast<char *>("(O)"), pEvent);
497 double *proba = (double *)(PyArray_DATA(result));
498
499 // Return MVA values
500 if(UInt_t(classValues.size()) != fNoutputs) classValues.resize(fNoutputs);
501 for(UInt_t i = 0; i < fNoutputs; i++) classValues[i] = proba[i];
502
503 Py_DECREF(pEvent);
504 Py_DECREF(result);
505
506 return classValues;
507}
508
509//_______________________________________________________________________
511{
512 if (!PyIsInitialized()) {
513 PyInitialize();
514 }
515
516 Log() << Endl;
517 Log() << gTools().Color("bold") << "Loading state file: " << gTools().Color("reset") << fFilenameClassifier << Endl;
518 Log() << Endl;
519
520 // Load classifier from file
522 if(err != 0)
523 {
524 Log() << kFATAL << Form("Failed to load classifier from file (error code: %i): %s", err, fFilenameClassifier.Data()) << Endl;
525 }
526
527 // Book classifier object in python dict
528 PyDict_SetItemString(fLocalNS, "classifier", fClassifier);
529
530 // Load data properties
531 // NOTE: This has to be repeated here for the reader application
534}
535
536//_______________________________________________________________________
538{
539 // Get feature importance from classifier as an array with length equal
540 // number of variables, higher value signals a higher importance
541 PyArrayObject* pRanking = (PyArrayObject*) PyObject_GetAttrString(fClassifier, "feature_importances_");
542 if(pRanking == 0) Log() << kFATAL << "Failed to get ranking from classifier" << Endl;
543
544 // Fill ranking object and return it
545 fRanking = new Ranking(GetName(), "Variable Importance");
546 Double_t* rankingData = (Double_t*) PyArray_DATA(pRanking);
547 for(UInt_t iVar=0; iVar<fNvars; iVar++){
548 fRanking->AddRank(Rank(GetInputLabel(iVar), rankingData[iVar]));
549 }
550
551 Py_DECREF(pRanking);
552
553 return fRanking;
554}
555
556//_______________________________________________________________________
558{
559 // typical length of text line:
560 // "|--------------------------------------------------------------|"
561 Log() << "A random forest is a meta estimator that fits a number of decision" << Endl;
562 Log() << "tree classifiers on various sub-samples of the dataset and use" << Endl;
563 Log() << "averaging to improve the predictive accuracy and control over-fitting." << Endl;
564 Log() << Endl;
565 Log() << "Check out the scikit-learn documentation for more information." << Endl;
566}
#define REGISTER_METHOD(CLASS)
for example
#define e(i)
Definition: RSha256.hxx:103
int Int_t
Definition: RtypesCore.h:41
unsigned int UInt_t
Definition: RtypesCore.h:42
const Bool_t kFALSE
Definition: RtypesCore.h:88
bool Bool_t
Definition: RtypesCore.h:59
double Double_t
Definition: RtypesCore.h:55
long long Long64_t
Definition: RtypesCore.h:69
const Bool_t kTRUE
Definition: RtypesCore.h:87
#define ClassImp(name)
Definition: Rtypes.h:365
int type
Definition: TGX11.cxx:120
_object PyObject
Definition: TPyArg.h:20
char * Form(const char *fmt,...)
OptionBase * DeclareOptionRef(T &ref, const TString &name, const TString &desc="")
MsgLogger & Log() const
Definition: Configurable.h:122
Class that contains all the data information.
Definition: DataSetInfo.h:60
UInt_t GetNClasses() const
Definition: DataSetInfo.h:136
const Event * GetEvent() const
Definition: DataSet.cxx:202
Types::ETreeType GetCurrentType() const
Definition: DataSet.h:205
Long64_t GetNEvents(Types::ETreeType type=Types::kMaxTreeType) const
Definition: DataSet.h:217
Long64_t GetNTrainingEvents() const
Definition: DataSet.h:79
void SetCurrentEvent(Long64_t ievt) const
Definition: DataSet.h:99
const Event * GetTrainingEvent(Long64_t ievt) const
Definition: DataSet.h:85
virtual void DeclareCompatibilityOptions()
options that are used ONLY for the READER to ensure backward compatibility they are hence without any...
Definition: MethodBase.cxx:601
const char * GetName() const
Definition: MethodBase.h:325
const TString & GetWeightFileDir() const
Definition: MethodBase.h:481
const TString & GetMethodName() const
Definition: MethodBase.h:322
DataSetInfo & DataInfo() const
Definition: MethodBase.h:401
virtual void TestClassification()
initialization
UInt_t GetNVariables() const
Definition: MethodBase.h:336
Bool_t IsModelPersistence()
Definition: MethodBase.h:374
void NoErrorCalc(Double_t *const err, Double_t *const errUpper)
Definition: MethodBase.cxx:841
const TString & GetInputLabel(Int_t i) const
Definition: MethodBase.h:341
Ranking * fRanking
Definition: MethodBase.h:576
DataSet * Data() const
Definition: MethodBase.h:400
MethodPyRandomForest(const TString &jobName, const TString &methodTitle, DataSetInfo &theData, const TString &theOption="")
std::vector< Float_t > & GetMulticlassValues()
Bool_t HasAnalysisType(Types::EAnalysisType type, UInt_t numberClasses, UInt_t numberTargets)
std::vector< Double_t > GetMvaValues(Long64_t firstEvt=0, Long64_t lastEvt=-1, Bool_t logProgress=false)
get all the MVA values for the events of the current Data type
std::vector< Float_t > classValues
std::vector< Double_t > mvaValues
virtual void TestClassification()
initialization
Double_t GetMvaValue(Double_t *errLower=0, Double_t *errUpper=0)
static int PyIsInitialized()
Check Python interpreter initialization status.
PyArrayObject * fTrainData
Definition: PyMethodBase.h:122
PyObject * Eval(TString code)
Evaluate Python code.
static void PyInitialize()
Initialize Python interpreter.
static void Serialize(TString file, PyObject *classifier)
Serialize Python object.
PyArrayObject * fTrainDataWeights
Definition: PyMethodBase.h:123
static Int_t UnSerialize(TString file, PyObject **obj)
Unserialize Python object.
PyObject * fClassifier
Definition: PyMethodBase.h:120
PyArrayObject * fTrainDataClasses
Definition: PyMethodBase.h:124
void PyRunString(TString code, TString errorMessage="Failed to run python code", int start=Py_single_input)
Execute Python code from string.
PyObject * fLocalNS
Definition: PyMethodBase.h:143
Ranking for variables in method (implementation)
Definition: Ranking.h:48
virtual void AddRank(const Rank &rank)
Add a new rank take ownership of it.
Definition: Ranking.cxx:86
Timing information for training and evaluation of MVA methods.
Definition: Timer.h:58
TString GetElapsedTime(Bool_t Scientific=kTRUE)
returns pretty string with elapsed time
Definition: Timer.cxx:134
const TString & Color(const TString &)
human readable color strings
Definition: Tools.cxx:840
Singleton class for Global types used by TMVA.
Definition: Types.h:73
@ kSignal
Definition: Types.h:136
EAnalysisType
Definition: Types.h:127
@ kMulticlass
Definition: Types.h:130
@ kClassification
Definition: Types.h:128
@ kTraining
Definition: Types.h:144
Basic string class.
Definition: TString.h:131
const char * Data() const
Definition: TString.h:364
Bool_t IsNull() const
Definition: TString.h:402
create variable transformations
Tools & gTools()
MsgLogger & Endl(MsgLogger &ml)
Definition: MsgLogger.h:158