Logo ROOT  
Reference Guide
 
Loading...
Searching...
No Matches
MethodPyRandomForest.cxx
Go to the documentation of this file.
1// @(#)root/tmva/pymva $Id$
2// Authors: Omar Zapata, Lorenzo Moneta, Sergei Gleyzer 2015
3
4/**********************************************************************************
5 * Project: TMVA - a Root-integrated toolkit for multivariate data analysis *
6 * Package: TMVA *
7 * Class : MethodPyRandomForest *
8 * Web : http://oproject.org *
9 * *
10 * Description: *
11 * Random Forest Classifiear from Scikit learn *
12 * *
13 * *
14 * Redistribution and use in source and binary forms, with or without *
15 * modification, are permitted according to the terms listed in LICENSE *
16 * (http://tmva.sourceforge.net/LICENSE) *
17 * *
18 **********************************************************************************/
19#include <Python.h> // Needs to be included first to avoid redefinition of _POSIX_C_SOURCE
21
22#define NPY_NO_DEPRECATED_API NPY_1_7_API_VERSION
23#include <numpy/arrayobject.h>
24
25#include "TMVA/Configurable.h"
27#include "TMVA/Config.h"
28#include "TMVA/DataSet.h"
29#include "TMVA/Event.h"
30#include "TMVA/IMethod.h"
31#include "TMVA/MsgLogger.h"
32#include "TMVA/PDF.h"
33#include "TMVA/Ranking.h"
34#include "TMVA/Results.h"
35#include "TMVA/Tools.h"
36#include "TMVA/Types.h"
37#include "TMVA/Timer.h"
39
40#include "TMatrix.h"
41
42using namespace TMVA;
43
44namespace TMVA {
45namespace Internal {
46class PyGILRAII {
47 PyGILState_STATE m_GILState;
48
49public:
50 PyGILRAII() : m_GILState(PyGILState_Ensure()) {}
51 ~PyGILRAII() { PyGILState_Release(m_GILState); }
52};
53} // namespace Internal
54} // namespace TMVA
55
56REGISTER_METHOD(PyRandomForest)
57
59
60//_______________________________________________________________________
62 const TString &methodTitle,
63 DataSetInfo &dsi,
64 const TString &theOption) :
65 PyMethodBase(jobName, Types::kPyRandomForest, methodTitle, dsi, theOption),
66 fNestimators(10),
67 fCriterion("gini"),
68 fMaxDepth("None"),
69 fMinSamplesSplit(2),
70 fMinSamplesLeaf(1),
71 fMinWeightFractionLeaf(0),
72 fMaxFeatures("'sqrt'"),
73 fMaxLeafNodes("None"),
74 fBootstrap(kTRUE),
75 fOobScore(kFALSE),
76 fNjobs(1),
77 fRandomState("None"),
78 fVerbose(0),
79 fWarmStart(kFALSE),
80 fClassWeight("None")
81{
82}
83
84//_______________________________________________________________________
86 : PyMethodBase(Types::kPyRandomForest, theData, theWeightFile),
87 fNestimators(10),
88 fCriterion("gini"),
89 fMaxDepth("None"),
90 fMinSamplesSplit(2),
91 fMinSamplesLeaf(1),
92 fMinWeightFractionLeaf(0),
93 fMaxFeatures("'sqrt'"),
94 fMaxLeafNodes("None"),
95 fBootstrap(kTRUE),
96 fOobScore(kFALSE),
97 fNjobs(1),
98 fRandomState("None"),
99 fVerbose(0),
100 fWarmStart(kFALSE),
101 fClassWeight("None")
102{
103}
104
105
106//_______________________________________________________________________
108{
109}
110
111//_______________________________________________________________________
113{
114 if (type == Types::kClassification && numberClasses == 2) return kTRUE;
115 if (type == Types::kMulticlass && numberClasses >= 2) return kTRUE;
116 return kFALSE;
117}
118
119//_______________________________________________________________________
121{
123
124 DeclareOptionRef(fNestimators, "NEstimators", "Integer, optional (default=10). The number of trees in the forest.");
125 DeclareOptionRef(fCriterion, "Criterion", "String, optional (default='gini') \
126 The function to measure the quality of a split. Supported criteria are \
127 'gini' for the Gini impurity and 'entropy' for the information gain. \
128 Note: this parameter is tree-specific.");
129
130 DeclareOptionRef(fMaxDepth, "MaxDepth", "integer or None, optional (default=None) \
131 The maximum depth of the tree. If None, then nodes are expanded until \
132 all leaves are pure or until all leaves contain less than \
133 min_samples_split samples. \
134 Ignored if ``max_leaf_nodes`` is not None.");
135
136 DeclareOptionRef(fMinSamplesSplit, "MinSamplesSplit", "integer, optional (default=2)\
137 The minimum number of samples required to split an internal node.");
138
139 DeclareOptionRef(fMinSamplesLeaf, "MinSamplesLeaf", "integer, optional (default=1) \
140 The minimum number of samples in newly created leaves. A split is \
141 discarded if after the split, one of the leaves would contain less then \
142 ``min_samples_leaf`` samples.");
143 DeclareOptionRef(fMinWeightFractionLeaf, "MinWeightFractionLeaf", "//float, optional (default=0.) \
144 The minimum weighted fraction of the input samples required to be at a \
145 leaf node.");
146 DeclareOptionRef(fMaxFeatures, "MaxFeatures", "The number of features to consider when looking for the best split");
147
148 DeclareOptionRef(fMaxLeafNodes, "MaxLeafNodes", "int or None, optional (default=None)\
149 Grow trees with ``max_leaf_nodes`` in best-first fashion.\
150 Best nodes are defined as relative reduction in impurity.\
151 If None then unlimited number of leaf nodes.\
152 If not None then ``max_depth`` will be ignored.");
153
154 DeclareOptionRef(fBootstrap, "Bootstrap", "boolean, optional (default=True) \
155 Whether bootstrap samples are used when building trees.");
156
157 DeclareOptionRef(fOobScore, "OoBScore", " bool Whether to use out-of-bag samples to estimate\
158 the generalization error.");
159
160 DeclareOptionRef(fNjobs, "NJobs", " integer, optional (default=1) \
161 The number of jobs to run in parallel for both `fit` and `predict`. \
162 If -1, then the number of jobs is set to the number of cores.");
163
164 DeclareOptionRef(fRandomState, "RandomState", "int, RandomState instance or None, optional (default=None)\
165 If int, random_state is the seed used by the random number generator;\
166 If RandomState instance, random_state is the random number generator;\
167 If None, the random number generator is the RandomState instance used\
168 by `np.random`.");
169
170 DeclareOptionRef(fVerbose, "Verbose", "int, optional (default=0)\
171 Controls the verbosity of the tree building process.");
172
173 DeclareOptionRef(fWarmStart, "WarmStart", "bool, optional (default=False)\
174 When set to ``True``, reuse the solution of the previous call to fit\
175 and add more estimators to the ensemble, otherwise, just fit a whole\
176 new forest.");
177
178 DeclareOptionRef(fClassWeight, "ClassWeight", "dict, list of dicts, \"auto\", \"subsample\" or None, optional\
179 Weights associated with classes in the form ``{class_label: weight}``.\
180 If not given, all classes are supposed to have weight one. For\
181 multi-output problems, a list of dicts can be provided in the same\
182 order as the columns of y.\
183 The \"auto\" mode uses the values of y to automatically adjust\
184 weights inversely proportional to class frequencies in the input data.\
185 The \"subsample\" mode is the same as \"auto\" except that weights are\
186 computed based on the bootstrap sample for every tree grown.\
187 For multi-output, the weights of each column of y will be multiplied.\
188 Note that these weights will be multiplied with sample_weight (passed\
189 through the fit method) if sample_weight is specified.");
190
191 DeclareOptionRef(fFilenameClassifier, "FilenameClassifier",
192 "Store trained classifier in this file");
193}
194
195//_______________________________________________________________________
196// Check options and load them to local python namespace
198{
199 if (fNestimators <= 0) {
200 Log() << kFATAL << " NEstimators <=0... that does not work !! " << Endl;
201 }
203 PyDict_SetItemString(fLocalNS, "nEstimators", pNestimators);
204
205 if (fCriterion != "gini" && fCriterion != "entropy") {
206 Log() << kFATAL << Form(" Criterion = %s... that does not work !! ", fCriterion.Data())
207 << " The options are `gini` or `entropy`." << Endl;
208 }
209 pCriterion = Eval(Form("'%s'", fCriterion.Data()));
210 PyDict_SetItemString(fLocalNS, "criterion", pCriterion);
211
213 PyDict_SetItemString(fLocalNS, "maxDepth", pMaxDepth);
214 if (!pMaxDepth) {
215 Log() << kFATAL << Form(" MaxDepth = %s... that does not work !! ", fMaxDepth.Data())
216 << " The options are None or integer." << Endl;
217 }
218
219 if (fMinSamplesSplit < 0) {
220 Log() << kFATAL << " MinSamplesSplit < 0... that does not work !! " << Endl;
221 }
223 PyDict_SetItemString(fLocalNS, "minSamplesSplit", pMinSamplesSplit);
224
225 if (fMinSamplesLeaf < 0) {
226 Log() << kFATAL << " MinSamplesLeaf < 0... that does not work !! " << Endl;
227 }
229 PyDict_SetItemString(fLocalNS, "minSamplesLeaf", pMinSamplesLeaf);
230
231 if (fMinWeightFractionLeaf < 0) {
232 Log() << kERROR << " MinWeightFractionLeaf < 0... that does not work !! " << Endl;
233 }
235 PyDict_SetItemString(fLocalNS, "minWeightFractionLeaf", pMinWeightFractionLeaf);
236
237 if (fMaxFeatures == "auto") fMaxFeatures = "sqrt"; // change in API from v 1.11
238 if (fMaxFeatures == "sqrt" || fMaxFeatures == "log2"){
239 fMaxFeatures = Form("'%s'", fMaxFeatures.Data());
240 }
242 PyDict_SetItemString(fLocalNS, "maxFeatures", pMaxFeatures);
243
244 if (!pMaxFeatures) {
245 Log() << kFATAL << Form(" MaxFeatures = %s... that does not work !! ", fMaxFeatures.Data())
246 << "int, float, string or None, optional (default='auto')"
247 << "The number of features to consider when looking for the best split:"
248 << "If int, then consider `max_features` features at each split."
249 << "If float, then `max_features` is a percentage and"
250 << "`int(max_features * n_features)` features are considered at each split."
251 << "If 'auto', then `max_features=sqrt(n_features)`."
252 << "If 'sqrt', then `max_features=sqrt(n_features)`."
253 << "If 'log2', then `max_features=log2(n_features)`."
254 << "If None, then `max_features=n_features`." << Endl;
255 }
256
258 if (!pMaxLeafNodes) {
259 Log() << kFATAL << Form(" MaxLeafNodes = %s... that does not work !! ", fMaxLeafNodes.Data())
260 << " The options are None or integer." << Endl;
261 }
262 PyDict_SetItemString(fLocalNS, "maxLeafNodes", pMaxLeafNodes);
263
265 if (!pRandomState) {
266 Log() << kFATAL << Form(" RandomState = %s... that does not work !! ", fRandomState.Data())
267 << "If int, random_state is the seed used by the random number generator;"
268 << "If RandomState instance, random_state is the random number generator;"
269 << "If None, the random number generator is the RandomState instance used by `np.random`." << Endl;
270 }
271 PyDict_SetItemString(fLocalNS, "randomState", pRandomState);
272
274 if (!pClassWeight) {
275 Log() << kFATAL << Form(" ClassWeight = %s... that does not work !! ", fClassWeight.Data())
276 << "dict, list of dicts, 'auto', 'subsample' or None, optional" << Endl;
277 }
278 PyDict_SetItemString(fLocalNS, "classWeight", pClassWeight);
279
280 if(fNjobs < 1) {
281 Log() << kFATAL << Form(" NJobs = %i... that does not work !! ", fNjobs)
282 << "Value has to be greater than zero." << Endl;
283 }
284 pNjobs = Eval(Form("%i", fNjobs));
285 PyDict_SetItemString(fLocalNS, "nJobs", pNjobs);
286
287 pBootstrap = (fBootstrap) ? Eval("True") : Eval("False");
288 PyDict_SetItemString(fLocalNS, "bootstrap", pBootstrap);
289 pOobScore = (fOobScore) ? Eval("True") : Eval("False");
290 PyDict_SetItemString(fLocalNS, "oobScore", pOobScore);
291 pVerbose = Eval(Form("%i", fVerbose));
292 PyDict_SetItemString(fLocalNS, "verbose", pVerbose);
293 pWarmStart = (fWarmStart) ? Eval("True") : Eval("False");
294 PyDict_SetItemString(fLocalNS, "warmStart", pWarmStart);
295
296 // If no filename is given, set default
298 {
299 fFilenameClassifier = GetWeightFileDir() + "/PyRFModel_" + GetName() + ".PyData";
300 }
301}
302
303//_______________________________________________________________________
305{
307 _import_array(); //require to use numpy arrays
308
309 // Check options and load them to local python namespace
311
312 // Import module for random forest classifier
313 PyRunString("import sklearn.ensemble");
314
315 // Get data properties
318}
319
320//_______________________________________________________________________
322{
323 // Load training data (data, classes, weights) to python arrays
324 int fNrowsTraining = Data()->GetNTrainingEvents(); //every row is an event, a class type and a weight
325 npy_intp dimsData[2];
326 dimsData[0] = fNrowsTraining;
327 dimsData[1] = fNvars;
328 PyArrayObject * fTrainData = (PyArrayObject *)PyArray_SimpleNew(2, dimsData, NPY_FLOAT);
329 PyDict_SetItemString(fLocalNS, "trainData", (PyObject*)fTrainData);
330 float *TrainData = (float *)(PyArray_DATA(fTrainData));
331
332 npy_intp dimsClasses = (npy_intp) fNrowsTraining;
333 PyArrayObject * fTrainDataClasses = (PyArrayObject *)PyArray_SimpleNew(1, &dimsClasses, NPY_FLOAT);
334 PyDict_SetItemString(fLocalNS, "trainDataClasses", (PyObject*)fTrainDataClasses);
335 float *TrainDataClasses = (float *)(PyArray_DATA(fTrainDataClasses));
336
337 PyArrayObject * fTrainDataWeights = (PyArrayObject *)PyArray_SimpleNew(1, &dimsClasses, NPY_FLOAT);
338 PyDict_SetItemString(fLocalNS, "trainDataWeights", (PyObject*)fTrainDataWeights);
339 float *TrainDataWeights = (float *)(PyArray_DATA(fTrainDataWeights));
340
341 for (int i = 0; i < fNrowsTraining; i++) {
342 // Fill training data matrix
343 const TMVA::Event *e = Data()->GetTrainingEvent(i);
344 for (UInt_t j = 0; j < fNvars; j++) {
345 TrainData[j + i * fNvars] = e->GetValue(j);
346 }
347
348 // Fill target classes
349 TrainDataClasses[i] = e->GetClass();
350
351 // Get event weight
352 TrainDataWeights[i] = e->GetWeight();
353 }
354
355 // Create classifier object
356 PyRunString("classifier = sklearn.ensemble.RandomForestClassifier(bootstrap=bootstrap, class_weight=classWeight, criterion=criterion, max_depth=maxDepth, max_features=maxFeatures, max_leaf_nodes=maxLeafNodes, min_samples_leaf=minSamplesLeaf, min_samples_split=minSamplesSplit, min_weight_fraction_leaf=minWeightFractionLeaf, n_estimators=nEstimators, n_jobs=nJobs, oob_score=oobScore, random_state=randomState, verbose=verbose, warm_start=warmStart)",
357 "Failed to setup classifier");
358
359 // Fit classifier
360 // NOTE: We dump the output to a variable so that the call does not pollute stdout
361 PyRunString("dump = classifier.fit(trainData, trainDataClasses, trainDataWeights)", "Failed to train classifier");
362
363 // Store classifier
364 fClassifier = PyDict_GetItemString(fLocalNS, "classifier");
365 if(fClassifier == 0) {
366 Log() << kFATAL << "Can't create classifier object from RandomForestClassifier" << Endl;
367 Log() << Endl;
368 }
369
370 if (IsModelPersistence()) {
371 Log() << Endl;
372 Log() << gTools().Color("bold") << "Saving state file: " << gTools().Color("reset") << fFilenameClassifier << Endl;
373 Log() << Endl;
375 }
376}
377
378//_______________________________________________________________________
380{
382}
383
384//_______________________________________________________________________
385std::vector<Double_t> MethodPyRandomForest::GetMvaValues(Long64_t firstEvt, Long64_t lastEvt, Bool_t logProgress)
386{
387 // Load model if not already done
388 if (fClassifier == 0) ReadModelFromFile();
389
390 // Determine number of events
391 Long64_t nEvents = Data()->GetNEvents();
392 if (firstEvt > lastEvt || lastEvt > nEvents) lastEvt = nEvents;
393 if (firstEvt < 0) firstEvt = 0;
394 nEvents = lastEvt-firstEvt;
395
396 // use timer
397 Timer timer( nEvents, GetName(), kTRUE );
398
399 if (logProgress)
400 Log() << kHEADER << Form("[%s] : ",DataInfo().GetName())
401 << "Evaluation of " << GetMethodName() << " on "
402 << (Data()->GetCurrentType() == Types::kTraining ? "training" : "testing")
403 << " sample (" << nEvents << " events)" << Endl;
404
405 // Get data
406 npy_intp dims[2];
407 dims[0] = nEvents;
408 dims[1] = fNvars;
409 PyArrayObject *pEvent= (PyArrayObject *)PyArray_SimpleNew(2, dims, NPY_FLOAT);
410 float *pValue = (float *)(PyArray_DATA(pEvent));
411
412 for (Int_t ievt=0; ievt<nEvents; ievt++) {
413 Data()->SetCurrentEvent(ievt);
414 const TMVA::Event *e = Data()->GetEvent();
415 for (UInt_t i = 0; i < fNvars; i++) {
416 pValue[ievt * fNvars + i] = e->GetValue(i);
417 }
418 }
419
420 // Get prediction from classifier
421 PyArrayObject *result = (PyArrayObject *)PyObject_CallMethod(fClassifier, const_cast<char *>("predict_proba"), const_cast<char *>("(O)"), pEvent);
422 double *proba = (double *)(PyArray_DATA(result));
423
424 // Return signal probabilities
425 if(Long64_t(mvaValues.size()) != nEvents) mvaValues.resize(nEvents);
426 for (int i = 0; i < nEvents; ++i) {
428 }
429
430 Py_DECREF(pEvent);
431 Py_DECREF(result);
432
433 if (logProgress) {
434 Log() << kINFO
435 << "Elapsed time for evaluation of " << nEvents << " events: "
436 << timer.GetElapsedTime() << " " << Endl;
437 }
438
439 return mvaValues;
440}
441
442//_______________________________________________________________________
444{
445 // cannot determine error
446 NoErrorCalc(errLower, errUpper);
447
448 // Load model if not already done
449 if (fClassifier == 0) ReadModelFromFile();
450
451 // Get current event and load to python array
452 const TMVA::Event *e = Data()->GetEvent();
453 npy_intp dims[2];
454 dims[0] = 1;
455 dims[1] = fNvars;
456 PyArrayObject *pEvent= (PyArrayObject *)PyArray_SimpleNew(2, dims, NPY_FLOAT);
457 float *pValue = (float *)(PyArray_DATA(pEvent));
458 for (UInt_t i = 0; i < fNvars; i++) pValue[i] = e->GetValue(i);
459
460 // Get prediction from classifier
461 PyArrayObject *result = (PyArrayObject *)PyObject_CallMethod(fClassifier, const_cast<char *>("predict_proba"), const_cast<char *>("(O)"), pEvent);
462 double *proba = (double *)(PyArray_DATA(result));
463
464 // Return MVA value
465 Double_t mvaValue;
466 mvaValue = proba[TMVA::Types::kSignal]; // getting signal probability
467
468 Py_DECREF(result);
469 Py_DECREF(pEvent);
470
471 return mvaValue;
472}
473
474//_______________________________________________________________________
476{
477 // Load model if not already done
478 if (fClassifier == 0) ReadModelFromFile();
479
480 // Get current event and load to python array
481 const TMVA::Event *e = Data()->GetEvent();
482 npy_intp dims[2];
483 dims[0] = 1;
484 dims[1] = fNvars;
485 PyArrayObject *pEvent= (PyArrayObject *)PyArray_SimpleNew(2, dims, NPY_FLOAT);
486 float *pValue = (float *)(PyArray_DATA(pEvent));
487 for (UInt_t i = 0; i < fNvars; i++) pValue[i] = e->GetValue(i);
488
489 // Get prediction from classifier
490 PyArrayObject *result = (PyArrayObject *)PyObject_CallMethod(fClassifier, const_cast<char *>("predict_proba"), const_cast<char *>("(O)"), pEvent);
491 double *proba = (double *)(PyArray_DATA(result));
492
493 // Return MVA values
494 if(UInt_t(classValues.size()) != fNoutputs) classValues.resize(fNoutputs);
495 for(UInt_t i = 0; i < fNoutputs; i++) classValues[i] = proba[i];
496
497 Py_DECREF(pEvent);
498 Py_DECREF(result);
499
500 return classValues;
501}
502
503//_______________________________________________________________________
505{
506 if (!PyIsInitialized()) {
507 PyInitialize();
508 }
509
510 Log() << Endl;
511 Log() << gTools().Color("bold") << "Loading state file: " << gTools().Color("reset") << fFilenameClassifier << Endl;
512 Log() << Endl;
513
514 // Load classifier from file
516 if(err != 0)
517 {
518 Log() << kFATAL << Form("Failed to load classifier from file (error code: %i): %s", err, fFilenameClassifier.Data()) << Endl;
519 }
520
521 // Book classifier object in python dict
522 PyDict_SetItemString(fLocalNS, "classifier", fClassifier);
523
524 // Load data properties
525 // NOTE: This has to be repeated here for the reader application
528}
529
530//_______________________________________________________________________
532{
533 // Get feature importance from classifier as an array with length equal
534 // number of variables, higher value signals a higher importance
535 PyArrayObject* pRanking = (PyArrayObject*) PyObject_GetAttrString(fClassifier, "feature_importances_");
536 if(pRanking == 0) Log() << kFATAL << "Failed to get ranking from classifier" << Endl;
537
538 // Fill ranking object and return it
539 fRanking = new Ranking(GetName(), "Variable Importance");
540 Double_t* rankingData = (Double_t*) PyArray_DATA(pRanking);
541 for(UInt_t iVar=0; iVar<fNvars; iVar++){
542 fRanking->AddRank(Rank(GetInputLabel(iVar), rankingData[iVar]));
543 }
544
545 Py_DECREF(pRanking);
546
547 return fRanking;
548}
549
550//_______________________________________________________________________
552{
553 // typical length of text line:
554 // "|--------------------------------------------------------------|"
555 Log() << "A random forest is a meta estimator that fits a number of decision" << Endl;
556 Log() << "tree classifiers on various sub-samples of the dataset and use" << Endl;
557 Log() << "averaging to improve the predictive accuracy and control over-fitting." << Endl;
558 Log() << Endl;
559 Log() << "Check out the scikit-learn documentation for more information." << Endl;
560}
#define REGISTER_METHOD(CLASS)
for example
_object PyObject
#define e(i)
Definition RSha256.hxx:103
unsigned int UInt_t
Definition RtypesCore.h:46
constexpr Bool_t kFALSE
Definition RtypesCore.h:101
long long Long64_t
Definition RtypesCore.h:80
constexpr Bool_t kTRUE
Definition RtypesCore.h:100
#define ClassImp(name)
Definition Rtypes.h:377
Option_t Option_t TPoint TPoint const char GetTextMagnitude GetFillStyle GetLineColor GetLineWidth GetMarkerStyle GetTextAlign GetTextColor GetTextSize void char Point_t Rectangle_t WindowAttributes_t Float_t Float_t Float_t Int_t Int_t UInt_t UInt_t Rectangle_t result
Option_t Option_t TPoint TPoint const char GetTextMagnitude GetFillStyle GetLineColor GetLineWidth GetMarkerStyle GetTextAlign GetTextColor GetTextSize void char Point_t Rectangle_t WindowAttributes_t Float_t Float_t Float_t Int_t Int_t UInt_t UInt_t Rectangle_t Int_t Int_t Window_t TString Int_t GCValues_t GetPrimarySelectionOwner GetDisplay GetScreen GetColormap GetNativeEvent const char const char dpyName wid window const char font_name cursor keysym reg const char only_if_exist regb h Point_t winding char text const char depth char const char Int_t count const char ColorStruct_t color const char Pixmap_t Pixmap_t PictureAttributes_t attr const char char ret_data h unsigned char height h Atom_t Int_t ULong_t ULong_t unsigned char prop_list Atom_t Atom_t Atom_t Time_t type
char * Form(const char *fmt,...)
Formats a string in a circular formatting buffer.
Definition TString.cxx:2467
OptionBase * DeclareOptionRef(T &ref, const TString &name, const TString &desc="")
MsgLogger & Log() const
Class that contains all the data information.
Definition DataSetInfo.h:62
UInt_t GetNClasses() const
const Event * GetEvent() const
returns event without transformations
Definition DataSet.cxx:202
Types::ETreeType GetCurrentType() const
Definition DataSet.h:194
Long64_t GetNEvents(Types::ETreeType type=Types::kMaxTreeType) const
Definition DataSet.h:206
Long64_t GetNTrainingEvents() const
Definition DataSet.h:68
void SetCurrentEvent(Long64_t ievt) const
Definition DataSet.h:88
const Event * GetTrainingEvent(Long64_t ievt) const
Definition DataSet.h:74
Float_t GetValue(UInt_t ivar) const
return value of i'th variable
Definition Event.cxx:236
virtual void DeclareCompatibilityOptions()
options that are used ONLY for the READER to ensure backward compatibility they are hence without any...
const char * GetName() const
Definition MethodBase.h:334
Bool_t IsModelPersistence() const
Definition MethodBase.h:383
const TString & GetWeightFileDir() const
Definition MethodBase.h:492
const TString & GetMethodName() const
Definition MethodBase.h:331
DataSetInfo & DataInfo() const
Definition MethodBase.h:410
virtual void TestClassification()
initialization
UInt_t GetNVariables() const
Definition MethodBase.h:345
void NoErrorCalc(Double_t *const err, Double_t *const errUpper)
const TString & GetInputLabel(Int_t i) const
Definition MethodBase.h:350
Ranking * fRanking
Definition MethodBase.h:587
DataSet * Data() const
Definition MethodBase.h:409
MethodPyRandomForest(const TString &jobName, const TString &methodTitle, DataSetInfo &theData, const TString &theOption="")
std::vector< Float_t > & GetMulticlassValues()
Bool_t HasAnalysisType(Types::EAnalysisType type, UInt_t numberClasses, UInt_t numberTargets)
std::vector< Double_t > GetMvaValues(Long64_t firstEvt=0, Long64_t lastEvt=-1, Bool_t logProgress=false)
get all the MVA values for the events of the current Data type
std::vector< Float_t > classValues
std::vector< Double_t > mvaValues
Double_t GetMvaValue(Double_t *errLower=nullptr, Double_t *errUpper=nullptr)
virtual void TestClassification()
initialization
static int PyIsInitialized()
Check Python interpreter initialization status.
PyObject * Eval(TString code)
Evaluate Python code.
static void PyInitialize()
Initialize Python interpreter.
static void Serialize(TString file, PyObject *classifier)
Serialize Python object.
static Int_t UnSerialize(TString file, PyObject **obj)
Unserialize Python object.
PyObject * fClassifier
void PyRunString(TString code, TString errorMessage="Failed to run python code", int start=256)
Execute Python code from string.
Ranking for variables in method (implementation)
Definition Ranking.h:48
virtual void AddRank(const Rank &rank)
Add a new rank take ownership of it.
Definition Ranking.cxx:86
Timing information for training and evaluation of MVA methods.
Definition Timer.h:58
TString GetElapsedTime(Bool_t Scientific=kTRUE)
returns pretty string with elapsed time
Definition Timer.cxx:146
const TString & Color(const TString &)
human readable color strings
Definition Tools.cxx:828
Singleton class for Global types used by TMVA.
Definition Types.h:71
@ kSignal
Never change this number - it is elsewhere assumed to be zero !
Definition Types.h:135
@ kMulticlass
Definition Types.h:129
@ kClassification
Definition Types.h:127
@ kTraining
Definition Types.h:143
Basic string class.
Definition TString.h:139
const char * Data() const
Definition TString.h:380
Bool_t IsNull() const
Definition TString.h:418
create variable transformations
Tools & gTools()
MsgLogger & Endl(MsgLogger &ml)
Definition MsgLogger.h:148