138   , fNodePurityLimit(0)
 
  144   , fRandomisedTrees(
kFALSE)
 
  146   , fUsePoissonNvars(0)  
 
  147   , fDeltaPruneStrength(0)
 
  156                          const TString& theWeightFile) :
 
  164   , fNodePurityLimit(0)
 
  170   , fRandomisedTrees(
kFALSE)
 
  172   , fDeltaPruneStrength(0)
 
  214   DeclareOptionRef(fRandomisedTrees,
"UseRandomisedTrees",
"Choose at each node splitting a random set of variables and *bagging*");
 
  215   DeclareOptionRef(fUseNvars,
"UseNvars",
"Number of variables used if randomised Tree option is chosen");
 
  216   DeclareOptionRef(fUsePoissonNvars,
"UsePoissonNvars", 
"Interpret \"UseNvars\" not as fixed number but as mean of a Poisson distribution in each split with RandomisedTree option");
 
  217   DeclareOptionRef(fUseYesNoLeaf=
kTRUE, 
"UseYesNoLeaf",
 
  218                    "Use Sig or Bkg node type or the ratio S/B as classification in the leaf node");
 
  219   DeclareOptionRef(fNodePurityLimit=0.5, 
"NodePurityLimit", 
"In boosting/pruning, nodes with purity > NodePurityLimit are signal; background otherwise.");
 
  220   DeclareOptionRef(fSepTypeS=
"GiniIndex", 
"SeparationType", 
"Separation criterion for node splitting");
 
  221   AddPreDefVal(
TString(
"MisClassificationError"));
 
  222   AddPreDefVal(
TString(
"GiniIndex"));
 
  223   AddPreDefVal(
TString(
"CrossEntropy"));
 
  224   AddPreDefVal(
TString(
"SDivSqrtSPlusB"));
 
  225   DeclareOptionRef(fMinNodeEvents=-1, 
"nEventsMin", 
"deprecated !!! Minimum number of events required in a leaf node");
 
  226   DeclareOptionRef(fMinNodeSizeS, 
"MinNodeSize", 
"Minimum percentage of training events required in a leaf node (default: Classification: 10%, Regression: 1%)");
 
  227   DeclareOptionRef(fNCuts, 
"nCuts", 
"Number of steps during node cut optimisation");
 
  228   DeclareOptionRef(fPruneStrength, 
"PruneStrength", 
"Pruning strength (negative value == automatic adjustment)");
 
  229   DeclareOptionRef(fPruneMethodS=
"NoPruning", 
"PruneMethod", 
"Pruning method: NoPruning (switched off), ExpectedError or CostComplexity");
 
  231   AddPreDefVal(
TString(
"NoPruning"));
 
  232   AddPreDefVal(
TString(
"ExpectedError"));
 
  233   AddPreDefVal(
TString(
"CostComplexity"));
 
  235   if (DoRegression()) {
 
  236      DeclareOptionRef(fMaxDepth=50,
"MaxDepth",
"Max depth of the decision tree allowed");
 
  238      DeclareOptionRef(fMaxDepth=3,
"MaxDepth",
"Max depth of the decision tree allowed");
 
  249   DeclareOptionRef(fPruneBeforeBoost=
kFALSE, 
"PruneBeforeBoost",
 
  250                    "--> removed option .. only kept for reader backward compatibility");
 
  260   else if (fSepTypeS == 
"giniindex")              fSepType = 
new GiniIndex();
 
  261   else if (fSepTypeS == 
"crossentropy")           fSepType = 
new CrossEntropy();
 
  262   else if (fSepTypeS == 
"sdivsqrtsplusb")         fSepType = 
new SdivSqrtSplusB();
 
  264      Log() << kINFO << GetOptions() << 
Endl;
 
  265      Log() << kFATAL << 
"<ProcessOptions> unknown Separation Index option called" << 
Endl;
 
  270   fPruneMethodS.ToLower();
 
  275      Log() << kINFO << GetOptions() << 
Endl;
 
  276      Log() << kFATAL << 
"<ProcessOptions> unknown PruneMethod option:" << fPruneMethodS <<
" called" << 
Endl;
 
  279   if (fPruneStrength < 0) fAutomatic = 
kTRUE;
 
  283            <<  
"Sorry automatic pruning strength determination is not implemented yet for ExpectedErrorPruning" << 
Endl;
 
  287   if (this->Data()->HasNegativeEventWeights()){
 
  288      Log() << kINFO << 
" You are using a Monte Carlo that has also negative weights. " 
  289            << 
"That should in principle be fine as long as on average you end up with " 
  290            << 
"something positive. For this you have to make sure that the minimal number " 
  291            << 
"of (un-weighted) events demanded for a tree node (currently you use: MinNodeSize=" 
  293            <<
", (or the deprecated equivalent nEventsMin) you can set this via the " 
  294            <<
"MethodDT option string when booking the " 
  295            << 
"classifier) is large enough to allow for reasonable averaging!!! " 
  296            << 
" If this does not help.. maybe you want to try the option: IgnoreNegWeightsInTraining  " 
  297            << 
"which ignores events with negative weight in the training. " << 
Endl 
  298            << 
Endl << 
"Note: You'll get a WARNING message during the training if that should ever happen" << 
Endl;
 
  301   if (fRandomisedTrees){
 
  302      Log() << kINFO << 
" Randomised trees should use *bagging* as *boost* method. Did you set this in the *MethodBoost* ? . Here I can enforce only the *no pruning*" << 
Endl;
 
  307   if (fMinNodeEvents > 0){
 
  308      fMinNodeSize = fMinNodeEvents / Data()->GetNTrainingEvents() * 100;
 
  309      Log() << kWARNING << 
"You have explicitly set *nEventsMin*, the min absolute number \n" 
  310            << 
"of events in a leaf node. This is DEPRECATED, please use the option \n" 
  311            << 
"*MinNodeSize* giving the relative number as percentage of training \n" 
  312            << 
"events instead. \n" 
  313            << 
"nEventsMin="<<fMinNodeEvents<< 
"--> MinNodeSize="<<fMinNodeSize<<
"%" 
  316      SetMinNodeSize(fMinNodeSizeS);
 
  321   if (sizeInPercent > 0 && sizeInPercent < 50){
 
  322      fMinNodeSize=sizeInPercent;
 
  325      Log() << kERROR << 
"you have demanded a minimal node size of " 
  326            << sizeInPercent << 
"% of the training events.. \n" 
  327            << 
" that somehow does not make sense "<<
Endl;
 
  333   if (sizeInPercent.
IsAlnum()) SetMinNodeSize(sizeInPercent.
Atof());
 
  335      Log() << kERROR << 
"I had problems reading the option MinNodeEvents, which\n" 
  336            << 
"after removing a possible % sign now reads " << sizeInPercent << 
Endl;
 
  347   fMinNodeSizeS   = 
"5%";
 
  351   fDeltaPruneStrength=0.1;
 
  353   fUseNvars       = GetNvar();
 
  354   fUsePoissonNvars = 
kTRUE;
 
  357   SetSignalReferenceCut( 0 );
 
  378   fTree = 
new DecisionTree( fSepType, fMinNodeSize, fNCuts, &(DataInfo()), 0,
 
  379                             fRandomisedTrees, fUseNvars, fUsePoissonNvars,fMaxDepth,0 );
 
  380   fTree->SetNVars(GetNvar());
 
  381   if (fRandomisedTrees) Log()<<kWARNING<<
" randomised Trees do not work yet in this framework," 
  382                              << 
" as I do not know how to give each tree a new random seed, now they" 
  383                              << 
" will be all the same and that is not good " << 
Endl;
 
  384   fTree->SetAnalysisType( GetAnalysisType() );
 
  388   UInt_t nevents = Data()->GetNTrainingEvents();
 
  389   std::vector<const TMVA::Event*> tmp;
 
  390   for (
Long64_t ievt=0; ievt<nevents; ievt++) {
 
  391      const Event *
event = GetEvent(ievt);
 
  392      tmp.push_back(
event);
 
  394   fTree->BuildTree(tmp);
 
  416      for(
UInt_t i = 0; i < nodes.size(); i++)
 
  417         fTree->PruneNode(nodes[i]);
 
  501   return fPruneStrength;
 
  511   for (
Long64_t ievt=0; ievt<Data()->GetNEvents(); ievt++)
 
  513         const Event * ev = Data()->GetEvent(ievt);
 
  518   return  SumCorrect / (SumCorrect + SumWrong);
 
  525   fTree->AddXMLTo(parent);
 
  536   fTree->ReadXML(wghtnode,GetTrainingTMVAVersionCode());
 
  554   NoErrorCalc(err, errUpper);
 
  556   return fTree->CheckEvent(GetEvent(),fUseYesNoLeaf);
 
#define REGISTER_METHOD(CLASS)
for example
 
Option_t Option_t TPoint TPoint const char GetTextMagnitude GetFillStyle GetLineColor GetLineWidth GetMarkerStyle GetTextAlign GetTextColor GetTextSize void char Point_t Rectangle_t WindowAttributes_t Float_t Float_t Float_t Int_t Int_t UInt_t UInt_t Rectangle_t Int_t Int_t Window_t TString Int_t GCValues_t GetPrimarySelectionOwner GetDisplay GetScreen GetColormap GetNativeEvent const char const char dpyName wid window const char font_name cursor keysym reg const char only_if_exist regb h Point_t winding char text const char depth char const char Int_t count const char ColorStruct_t color const char Pixmap_t Pixmap_t PictureAttributes_t attr const char char ret_data h unsigned char height h Atom_t Int_t ULong_t ULong_t unsigned char prop_list Atom_t Atom_t Atom_t Time_t type
 
A helper class to prune a decision tree using the Cost Complexity method (see Classification and Regr...
 
void SetPruneStrength(Float_t alpha=-1.0)
 
void Optimize()
determine the pruning sequence
 
std::vector< TMVA::DecisionTreeNode * > GetOptimalPruneSequence() const
return the prune strength (=alpha) corresponding to the prune sequence
 
Float_t GetOptimalPruneStrength() const
 
Implementation of the CrossEntropy as separation criterion.
 
Class that contains all the data information.
 
static void SetIsTraining(bool on)
 
Implementation of a Decision Tree.
 
Double_t GetNodePurityLimit() const
 
Double_t CheckEvent(const TMVA::Event *, Bool_t UseYesNoLeaf=kFALSE) const
the event e is put into the decision tree (starting at the root node) and the output is NodeType (sig...
 
Double_t GetWeight() const
return the event weight - depending on whether the flag IgnoreNegWeightsInTraining is or not.
 
Implementation of the GiniIndex as separation criterion.
 
Virtual base Class for all MVA method.
 
virtual void DeclareCompatibilityOptions()
options that are used ONLY for the READER to ensure backward compatibility they are hence without any...
 
Analysis of Boosted Decision Trees.
 
virtual ~MethodDT(void)
destructor
 
MethodDT(const TString &jobName, const TString &methodTitle, DataSetInfo &theData, const TString &theOption="")
the standard constructor for just an ordinar "decision trees"
 
Double_t TestTreeQuality(DecisionTree *dt)
 
virtual Bool_t HasAnalysisType(Types::EAnalysisType type, UInt_t numberClasses, UInt_t numberTargets)
FDA can handle classification with 2 classes and regression with one regression-target.
 
const Ranking * CreateRanking()
 
void ReadWeightsFromXML(void *wghtnode)
 
void GetHelpMessage() const
 
Double_t GetMvaValue(Double_t *err=nullptr, Double_t *errUpper=nullptr)
returns MVA value
 
void AddWeightsXMLTo(void *parent) const
 
Double_t PruneTree()
prune the decision tree if requested (good for individual trees that are best grown out,...
 
void ReadWeightsFromStream(std::istream &istr)
 
void DeclareOptions()
Define the options (their key words) that can be set in the option string.
 
Bool_t fPruneBeforeBoost
ancient variable, only needed for "CompatibilityOptions"
 
void Init(void)
common initialisation with defaults for the DT-Method
 
void SetMinNodeSize(Double_t sizeInPercent)
 
void DeclareCompatibilityOptions()
options that are used ONLY for the READER to ensure backward compatibility
 
void ProcessOptions()
the option string is decoded, for available options see "DeclareOptions"
 
Implementation of the MisClassificationError as separation criterion.
 
Ranking for variables in method (implementation)
 
Implementation of the SdivSqrtSplusB as separation criterion.
 
Singleton class for Global types used by TMVA.
 
@ kValidation
these are placeholders... currently not used, but could be moved "forward" if
 
Double_t Atof() const
Return floating-point value contained in string.
 
TString & ReplaceAll(const TString &s1, const TString &s2)
 
Bool_t IsAlnum() const
Returns true if all characters in string are alphanumeric.
 
create variable transformations
 
MsgLogger & Endl(MsgLogger &ml)