ROOT  6.07/01
Reference Guide
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Properties Friends Macros Groups Pages
MethodPyGTB.h
Go to the documentation of this file.
1 // @(#)root/tmva/pymva $Id$
2 // Authors: Omar Zapata, Lorenzo Moneta, Sergei Gleyzer 2015
3 
4 /**********************************************************************************
5  * Project: TMVA - a Root-integrated toolkit for multivariate data analysis *
6  * Package: TMVA *
7  * Class : MethodPyGTB *
8  * Web : http://oproject.org *
9  * *
10  * Description: *
11  * scikit-learn Package GradientBoostingClassifier method based on python *
12  * *
13  **********************************************************************************/
14 
15 #ifndef ROOT_TMVA_MethodPyGTB
16 #define ROOT_TMVA_MethodPyGTB
17 
18 //////////////////////////////////////////////////////////////////////////
19 // //
20 // MethodPyGTB //
21 // //
22 // //
23 //////////////////////////////////////////////////////////////////////////
24 
25 #ifndef ROOT_TMVA_PyMethodBase
26 #include "TMVA/PyMethodBase.h"
27 #endif
28 
29 namespace TMVA {
30 
31  class Factory; // DSMTEST
32  class Reader; // DSMTEST
33  class DataSetManager; // DSMTEST
34  class Types;
35  class MethodPyGTB : public PyMethodBase {
36 
37  public :
38 
39  // constructors
40  MethodPyGTB(const TString &jobName,
41  const TString &methodTitle,
42  DataSetInfo &theData,
43  const TString &theOption = "",
44  TDirectory *theTargetDir = NULL);
45 
47  const TString &theWeightFile,
48  TDirectory *theTargetDir = NULL);
49 
50 
51  ~MethodPyGTB(void);
52  void Train();
53  // options treatment
54  void Init();
55  void DeclareOptions();
56  void ProcessOptions();
57  // create ranking
59  {
60  return NULL; // = 0;
61  }
62 
63 
64  Bool_t HasAnalysisType(Types::EAnalysisType type, UInt_t numberClasses, UInt_t numberTargets);
65 
66  // performs classifier testing
67  virtual void TestClassification();
68 
69 
70  Double_t GetMvaValue(Double_t *errLower = 0, Double_t *errUpper = 0);
71 
73  // the actual "weights"
74  virtual void AddWeightsXMLTo(void * /* parent */ ) const {} // = 0;
75  virtual void ReadWeightsFromXML(void * /*wghtnode*/) {} // = 0;
76  virtual void ReadWeightsFromStream(std::istream &) {} //= 0; // backward compatibility
77  void ReadStateFromFile();
78  private :
80  friend class Factory; // DSMTEST
81  friend class Reader; // DSMTEST
82  protected:
83  //GTB options
84  TString loss;// {'deviance', 'exponential'}, optional (default='deviance')
85  //loss function to be optimized. 'deviance' refers to
86  //deviance (= logistic regression) for classification
87  //with probabilistic outputs. For loss 'exponential' gradient
88  //boosting recovers the AdaBoost algorithm.
89  Double_t learning_rate;//float, optional (default=0.1)
90  //learning rate shrinks the contribution of each tree by `learning_rate`.
91  //There is a trade-off between learning_rate and n_estimators.
92 
93  Int_t n_estimators;//integer, optional (default=10)
94  //The number of trees in the forest.
95  Double_t subsample;//float, optional (default=1.0)
96  //The fraction of samples to be used for fitting the individual base
97  //learners. If smaller than 1.0 this results in Stochastic Gradient
98  //Boosting. `subsample` interacts with the parameter `n_estimators`.
99  //Choosing `subsample < 1.0` leads to a reduction of variance
100  //and an increase in bias.
101  Int_t min_samples_split;// integer, optional (default=2)
102  //The minimum number of samples required to split an internal node.
103  Int_t min_samples_leaf;//integer, optional (default=1)
104  //The minimum number of samples required to be at a leaf node.
105  Double_t min_weight_fraction_leaf;//float, optional (default=0.)
106  //The minimum weighted fraction of the input samples required to be at a leaf node.
107  Int_t max_depth;//integer, optional (default=3)
108  //maximum depth of the individual regression estimators. The maximum
109  //depth limits the number of nodes in the tree. Tune this parameter
110  //for best performance; the best value depends on the interaction
111  //of the input variables.
112  //Ignored if ``max_leaf_nodes`` is not None.
113 
114  TString init;//BaseEstimator, None, optional (default=None)
115  //An estimator object that is used to compute the initial
116  //predictions. ``init`` has to provide ``fit`` and ``predict``.
117  //If None it uses ``loss.init_estimator``.
118  TString random_state;//int, RandomState instance or None, optional (default=None)
119  //If int, random_state is the seed used by the random number generator;
120  //If RandomState instance, random_state is the random number generator;
121  //If None, the random number generator is the RandomState instance used
122  //by `np.random`.
123  TString max_features;//int, float, string or None, optional (default="auto")
124  //The number of features to consider when looking for the best split:
125  //- If int, then consider `max_features` features at each split.
126  //- If float, then `max_features` is a percentage and
127  //`int(max_features * n_features)` features are considered at each split.
128  //- If "auto", then `max_features=sqrt(n_features)`.
129  //- If "sqrt", then `max_features=sqrt(n_features)`.
130  //- If "log2", then `max_features=log2(n_features)`.
131  //- If None, then `max_features=n_features`.
132  // Note: the search for a split does not stop until at least one
133  // valid partition of the node samples is found, even if it requires to
134  // effectively inspect more than ``max_features`` features.
135  // Note: this parameter is tree-specific.
136  Int_t verbose;//Controls the verbosity of the tree building process.
137  TString max_leaf_nodes;//int or None, optional (default=None)
138  //Grow trees with ``max_leaf_nodes`` in best-first fashion.
139  //Best nodes are defined as relative reduction in impurity.
140  //If None then unlimited number of leaf nodes.
141  //If not None then ``max_depth`` will be ignored.
142 
143  Bool_t warm_start;//bool, optional (default=False)
144  //When set to ``True``, reuse the solution of the previous call to fit
145  //and add more estimators to the ensemble, otherwise, just fit a whole
146  //new forest.
147  // get help message text
148  void GetHelpMessage() const;
149 
150 
152  };
153 } // namespace TMVA
154 #endif
virtual void ReadWeightsFromStream(std::istream &)
Definition: MethodPyGTB.h:76
Double_t GetMvaValue(Double_t *errLower=0, Double_t *errUpper=0)
EAnalysisType
Definition: Types.h:124
Basic string class.
Definition: TString.h:137
const Ranking * CreateRanking()
Definition: MethodPyGTB.h:58
int Int_t
Definition: RtypesCore.h:41
bool Bool_t
Definition: RtypesCore.h:59
Double_t min_weight_fraction_leaf
Definition: MethodPyGTB.h:105
TString max_leaf_nodes
Definition: MethodPyGTB.h:137
#define ClassDef(name, id)
Definition: Rtypes.h:254
Double_t subsample
Definition: MethodPyGTB.h:95
virtual void AddWeightsXMLTo(void *) const
Definition: MethodPyGTB.h:74
void GetHelpMessage() const
Bool_t HasAnalysisType(Types::EAnalysisType type, UInt_t numberClasses, UInt_t numberTargets)
unsigned int UInt_t
Definition: RtypesCore.h:42
TString random_state
Definition: MethodPyGTB.h:118
MethodPyGTB(const TString &jobName, const TString &methodTitle, DataSetInfo &theData, const TString &theOption="", TDirectory *theTargetDir=NULL)
double Double_t
Definition: RtypesCore.h:55
Describe directory structure in memory.
Definition: TDirectory.h:44
DataSetManager * fDataSetManager
Definition: MethodPyGTB.h:79
int type
Definition: TGX11.cxx:120
Double_t learning_rate
Definition: MethodPyGTB.h:89
TString max_features
Definition: MethodPyGTB.h:123
virtual void ReadWeightsFromXML(void *)
Definition: MethodPyGTB.h:75
#define NULL
Definition: Rtypes.h:82
virtual void ReadWeightsFromStream(std::istream &)=0
virtual void TestClassification()
initialization