122 DataSetInfo& theData,
125 TMVA::MethodBase( jobName, Types::kDT, methodTitle, theData, theOption, theTargetDir )
132 , fNodePurityLimit(0)
136 , fPruneMethod(DecisionTree::kNoPruning)
138 , fRandomisedTrees(kFALSE)
140 , fUsePoissonNvars(0)
141 , fDeltaPruneStrength(0)
158 , fNodePurityLimit(0)
164 , fRandomisedTrees(
kFALSE)
166 , fDeltaPruneStrength(0)
204 DeclareOptionRef(fRandomisedTrees,
"UseRandomisedTrees",
"Choose at each node splitting a random set of variables and *bagging*");
205 DeclareOptionRef(fUseNvars,
"UseNvars",
"Number of variables used if randomised Tree option is chosen");
206 DeclareOptionRef(fUsePoissonNvars,
"UsePoissonNvars",
"Interpret \"UseNvars\" not as fixed number but as mean of a Possion distribution in each split with RandomisedTree option");
207 DeclareOptionRef(fUseYesNoLeaf=
kTRUE,
"UseYesNoLeaf",
208 "Use Sig or Bkg node type or the ratio S/B as classification in the leaf node");
209 DeclareOptionRef(fNodePurityLimit=0.5,
"NodePurityLimit",
"In boosting/pruning, nodes with purity > NodePurityLimit are signal; background otherwise.");
210 DeclareOptionRef(fSepTypeS=
"GiniIndex",
"SeparationType",
"Separation criterion for node splitting");
211 AddPreDefVal(
TString(
"MisClassificationError"));
212 AddPreDefVal(
TString(
"GiniIndex"));
213 AddPreDefVal(
TString(
"CrossEntropy"));
214 AddPreDefVal(
TString(
"SDivSqrtSPlusB"));
215 DeclareOptionRef(fMinNodeEvents=-1,
"nEventsMin",
"deprecated !!! Minimum number of events required in a leaf node");
216 DeclareOptionRef(fMinNodeSizeS,
"MinNodeSize",
"Minimum percentage of training events required in a leaf node (default: Classification: 10%, Regression: 1%)");
217 DeclareOptionRef(fNCuts,
"nCuts",
"Number of steps during node cut optimisation");
218 DeclareOptionRef(fPruneStrength,
"PruneStrength",
"Pruning strength (negative value == automatic adjustment)");
219 DeclareOptionRef(fPruneMethodS=
"NoPruning",
"PruneMethod",
"Pruning method: NoPruning (switched off), ExpectedError or CostComplexity");
221 AddPreDefVal(
TString(
"NoPruning"));
222 AddPreDefVal(
TString(
"ExpectedError"));
223 AddPreDefVal(
TString(
"CostComplexity"));
225 if (DoRegression()) {
226 DeclareOptionRef(fMaxDepth=50,
"MaxDepth",
"Max depth of the decision tree allowed");
228 DeclareOptionRef(fMaxDepth=3,
"MaxDepth",
"Max depth of the decision tree allowed");
237 DeclareOptionRef(fPruneBeforeBoost=
kFALSE,
"PruneBeforeBoost",
238 "--> removed option .. only kept for reader backward compatibility");
248 else if (fSepTypeS ==
"giniindex") fSepType =
new GiniIndex();
249 else if (fSepTypeS ==
"crossentropy") fSepType =
new CrossEntropy();
250 else if (fSepTypeS ==
"sdivsqrtsplusb") fSepType =
new SdivSqrtSplusB();
253 Log() <<
kFATAL <<
"<ProcessOptions> unknown Separation Index option called" <<
Endl;
258 fPruneMethodS.ToLower();
264 Log() <<
kFATAL <<
"<ProcessOptions> unknown PruneMethod option:" << fPruneMethodS <<
" called" <<
Endl;
267 if (fPruneStrength < 0) fAutomatic =
kTRUE;
271 <<
"Sorry autmoatic pruning strength determination is not implemented yet for ExpectedErrorPruning" <<
Endl;
275 if (this->
Data()->HasNegativeEventWeights()){
276 Log() <<
kINFO <<
" You are using a Monte Carlo that has also negative weights. "
277 <<
"That should in principle be fine as long as on average you end up with "
278 <<
"something positive. For this you have to make sure that the minimal number "
279 <<
"of (un-weighted) events demanded for a tree node (currently you use: MinNodeSize="
281 <<
", (or the deprecated equivalent nEventsMin) you can set this via the "
282 <<
"MethodDT option string when booking the "
283 <<
"classifier) is large enough to allow for reasonable averaging!!! "
284 <<
" If this does not help.. maybe you want to try the option: IgnoreNegWeightsInTraining "
285 <<
"which ignores events with negative weight in the training. " <<
Endl
286 <<
Endl <<
"Note: You'll get a WARNING message during the training if that should ever happen" <<
Endl;
289 if (fRandomisedTrees){
290 Log() <<
kINFO <<
" Randomised trees should use *bagging* as *boost* method. Did you set this in the *MethodBoost* ? . Here I can enforce only the *no pruning*" <<
Endl;
295 if (fMinNodeEvents > 0){
296 fMinNodeSize = fMinNodeEvents /
Data()->GetNTrainingEvents() * 100;
297 Log() <<
kWARNING <<
"You have explicitly set *nEventsMin*, the min ablsolut number \n"
298 <<
"of events in a leaf node. This is DEPRECATED, please use the option \n"
299 <<
"*MinNodeSize* giving the relative number as percentage of training \n"
300 <<
"events instead. \n"
301 <<
"nEventsMin="<<fMinNodeEvents<<
"--> MinNodeSize="<<fMinNodeSize<<
"%"
304 SetMinNodeSize(fMinNodeSizeS);
309 if (sizeInPercent > 0 && sizeInPercent < 50){
310 fMinNodeSize=sizeInPercent;
313 Log() <<
kERROR <<
"you have demanded a minimal node size of "
314 << sizeInPercent <<
"% of the training events.. \n"
315 <<
" that somehow does not make sense "<<
Endl;
321 if (sizeInPercent.
IsAlnum()) SetMinNodeSize(sizeInPercent.
Atof());
323 Log() <<
kERROR <<
"I had problems reading the option MinNodeEvents, which\n"
324 <<
"after removing a possible % sign now reads " << sizeInPercent <<
Endl;
337 fMinNodeSizeS =
"5%";
341 fDeltaPruneStrength=0.1;
343 fUseNvars = GetNvar();
344 fUsePoissonNvars =
kTRUE;
347 SetSignalReferenceCut( 0 );
368 fTree =
new DecisionTree( fSepType, fMinNodeSize, fNCuts, &(DataInfo()), 0,
369 fRandomisedTrees, fUseNvars, fUsePoissonNvars,fMaxDepth,0 );
370 fTree->SetNVars(GetNvar());
371 if (fRandomisedTrees)
Log()<<
kWARNING<<
" randomised Trees do not work yet in this framework,"
372 <<
" as I do not know how to give each tree a new random seed, now they"
373 <<
" will be all the same and that is not good " <<
Endl;
374 fTree->SetAnalysisType( GetAnalysisType() );
378 UInt_t nevents =
Data()->GetNTrainingEvents();
379 std::vector<const TMVA::Event*> tmp;
380 for (
Long64_t ievt=0; ievt<nevents; ievt++) {
381 const Event *
event = GetEvent(ievt);
382 tmp.push_back(event);
384 fTree->BuildTree(tmp);
405 for(
UInt_t i = 0; i < nodes.size(); i++)
406 fTree->PruneNode(nodes[i]);
490 return fPruneStrength;
500 for (
Long64_t ievt=0; ievt<
Data()->GetNEvents(); ievt++)
502 const Event * ev =
Data()->GetEvent(ievt);
507 return SumCorrect / (SumCorrect + SumWrong);
514 fTree->AddXMLTo(parent);
525 fTree->ReadXML(wghtnode,GetTrainingTMVAVersionCode());
543 NoErrorCalc(err, errUpper);
545 return fTree->CheckEvent(GetEvent(),fUseYesNoLeaf);
void Optimize()
determine the pruning sequence
MsgLogger & Endl(MsgLogger &ml)
void GetHelpMessage() const
void Init(void)
common initialisation with defaults for the DT-Method
TString & ReplaceAll(const TString &s1, const TString &s2)
Double_t Atof() const
Return floating-point value contained in string.
Double_t GetNodePurityLimit() const
Bool_t IsAlnum() const
Returns true if all characters in string are alphanumeric.
virtual Bool_t HasAnalysisType(Types::EAnalysisType type, UInt_t numberClasses, UInt_t numberTargets)
FDA can handle classification with 2 classes and regression with one regression-target.
std::vector< TMVA::DecisionTreeNode * > GetOptimalPruneSequence() const
return the prune strength (=alpha) corresponding to the prune sequence
Double_t PruneTree()
prune the decision tree if requested (good for individual trees that are best grown out...
Double_t GetWeight() const
return the event weight - depending on whether the flag IgnoreNegWeightsInTraining is or not...
void SetMinNodeSize(Double_t sizeInPercent)
void ReadWeightsFromStream(std::istream &istr)
void DeclareOptions()
define the options (their key words) that can be set in the option string UseRandomisedTrees choose a...
MethodDT(const TString &jobName, const TString &methodTitle, DataSetInfo &theData, const TString &theOption="", TDirectory *theTargetDir=0)
std::vector< std::vector< double > > Data
Double_t CheckEvent(const TMVA::Event *, Bool_t UseYesNoLeaf=kFALSE) const
the event e is put into the decision tree (starting at the root node) and the output is NodeType (sig...
void SetPruneStrength(Float_t alpha=-1.0)
void ProcessOptions()
the option string is decoded, for available options see "DeclareOptions"
ClassImp(TMVA::MethodDT) TMVA
the standard constructor for just an ordinar "decision trees"
Double_t TestTreeQuality(DecisionTree *dt)
void AddWeightsXMLTo(void *parent) const
Describe directory structure in memory.
Double_t GetMvaValue(Double_t *err=0, Double_t *errUpper=0)
returns MVA value
void DeclareCompatibilityOptions()
options that are used ONLY for the READER to ensure backward compatibility they are hence without any...
#define REGISTER_METHOD(CLASS)
for example
Abstract ClassifierFactory template that handles arbitrary types.
virtual void DeclareCompatibilityOptions()
options that are used ONLY for the READER to ensure backward compatibility they are hence without any...
virtual ~MethodDT(void)
destructor
void ReadWeightsFromXML(void *wghtnode)
Float_t GetOptimalPruneStrength() const
const Ranking * CreateRanking()