155 const TString& theWeightFile) :
215 DeclareOptionRef(
fUsePoissonNvars,
"UsePoissonNvars",
"Interpret \"UseNvars\" not as fixed number but as mean of a Poisson distribution in each split with RandomisedTree option");
217 "Use Sig or Bkg node type or the ratio S/B as classification in the leaf node");
225 DeclareOptionRef(
fMinNodeSizeS,
"MinNodeSize",
"Minimum percentage of training events required in a leaf node (default: Classification: 10%, Regression: 1%)");
249 "--> removed option .. only kept for reader backward compatibility");
264 Log() << kFATAL <<
"<ProcessOptions> unknown Separation Index option called" <<
Endl;
275 Log() << kFATAL <<
"<ProcessOptions> unknown PruneMethod option:" <<
fPruneMethodS <<
" called" <<
Endl;
282 <<
"Sorry automatic pruning strength determination is not implemented yet for ExpectedErrorPruning" <<
Endl;
286 if (this->
Data()->HasNegativeEventWeights()){
287 Log() << kINFO <<
" You are using a Monte Carlo that has also negative weights. "
288 <<
"That should in principle be fine as long as on average you end up with "
289 <<
"something positive. For this you have to make sure that the minimal number "
290 <<
"of (un-weighted) events demanded for a tree node (currently you use: MinNodeSize="
292 <<
", (or the deprecated equivalent nEventsMin) you can set this via the "
293 <<
"MethodDT option string when booking the "
294 <<
"classifier) is large enough to allow for reasonable averaging!!! "
295 <<
" If this does not help.. maybe you want to try the option: IgnoreNegWeightsInTraining "
296 <<
"which ignores events with negative weight in the training. " <<
Endl
297 <<
Endl <<
"Note: You'll get a WARNING message during the training if that should ever happen" <<
Endl;
301 Log() << kINFO <<
" Randomised trees should use *bagging* as *boost* method. Did you set this in the *MethodBoost* ? . Here I can enforce only the *no pruning*" <<
Endl;
308 Log() << kWARNING <<
"You have explicitly set *nEventsMin*, the min absolute number \n"
309 <<
"of events in a leaf node. This is DEPRECATED, please use the option \n"
310 <<
"*MinNodeSize* giving the relative number as percentage of training \n"
311 <<
"events instead. \n"
320 if (sizeInPercent > 0 && sizeInPercent < 50){
324 Log() << kERROR <<
"you have demanded a minimal node size of "
325 << sizeInPercent <<
"% of the training events.. \n"
326 <<
" that somehow does not make sense "<<
Endl;
334 Log() << kERROR <<
"I had problems reading the option MinNodeEvents, which\n"
335 <<
"after removing a possible % sign now reads " << sizeInPercent <<
Endl;
380 if (
fRandomisedTrees)
Log()<<kWARNING<<
" randomised Trees do not work yet in this framework,"
381 <<
" as I do not know how to give each tree a new random seed, now they"
382 <<
" will be all the same and that is not good " <<
Endl;
387 UInt_t nevents =
Data()->GetNTrainingEvents();
388 std::vector<const TMVA::Event*> tmp;
389 for (
Long64_t ievt=0; ievt<nevents; ievt++) {
391 tmp.push_back(event);
393 fTree->BuildTree(tmp);
415 for(
UInt_t i = 0; i < nodes.size(); i++)
416 fTree->PruneNode(nodes[i]);
510 for (
Long64_t ievt=0; ievt<
Data()->GetNEvents(); ievt++)
512 const Event * ev =
Data()->GetEvent(ievt);
517 return SumCorrect / (SumCorrect + SumWrong);
524 fTree->AddXMLTo(parent);
#define REGISTER_METHOD(CLASS)
for example
unsigned int UInt_t
Unsigned integer 4 bytes (unsigned int).
bool Bool_t
Boolean (0=false, 1=true) (bool).
double Double_t
Double 8 bytes.
long long Long64_t
Portable signed long integer 8 bytes.
A helper class to prune a decision tree using the Cost Complexity method (see Classification and Regr...
void Optimize()
determine the pruning sequence
std::vector< TMVA::DecisionTreeNode * > GetOptimalPruneSequence() const
return the prune strength (=alpha) corresponding to the prune sequence
Float_t GetOptimalPruneStrength() const
OptionBase * DeclareOptionRef(T &ref, const TString &name, const TString &desc="")
void AddPreDefVal(const T &)
const TString & GetOptions() const
Implementation of the CrossEntropy as separation criterion.
Class that contains all the data information.
static void SetIsTraining(bool on)
Implementation of a Decision Tree.
Double_t GetNodePurityLimit() const
Double_t CheckEvent(const TMVA::Event *, Bool_t UseYesNoLeaf=kFALSE) const
the event e is put into the decision tree (starting at the root node) and the output is NodeType (sig...
Double_t GetWeight() const
return the event weight - depending on whether the flag IgnoreNegWeightsInTraining is or not.
Implementation of the GiniIndex as separation criterion.
MethodBase(const TString &jobName, Types::EMVA methodType, const TString &methodTitle, DataSetInfo &dsi, const TString &theOption="")
standard constructor
virtual void DeclareCompatibilityOptions()
options that are used ONLY for the READER to ensure backward compatibility they are hence without any...
Types::EAnalysisType GetAnalysisType() const
UInt_t GetTrainingTMVAVersionCode() const
Bool_t DoRegression() const
const Event * GetEvent() const
DataSetInfo & DataInfo() const
Types::EAnalysisType fAnalysisType
void SetSignalReferenceCut(Double_t cut)
void NoErrorCalc(Double_t *const err, Double_t *const errUpper)
UInt_t fMaxDepth
max depth
Bool_t fAutomatic
use user given prune strength or automatically determined one using a validation sample
Float_t fMinNodeSize
min percentage of training events in node
virtual ~MethodDT(void)
destructor
MethodDT(const TString &jobName, const TString &methodTitle, DataSetInfo &theData, const TString &theOption="")
the standard constructor for just an ordinar "decision trees"
void DeclareOptions() override
Define the options (their key words) that can be set in the option string.
Bool_t fUsePoissonNvars
fUseNvars is used as a poisson mean, and the actual value of useNvars is at each step drawn form that...
Int_t fUseNvars
the number of variables used in the randomised tree splitting
void ProcessOptions() override
the option string is decoded, for available options see "DeclareOptions"
void Init(void) override
common initialisation with defaults for the DT-Method
Double_t TestTreeQuality(DecisionTree *dt)
SeparationBase * fSepType
the separation used in node splitting
DecisionTree::EPruneMethod fPruneMethod
method used for pruning
Double_t fErrorFraction
ntuple var: misclassification error fraction
Double_t fDeltaPruneStrength
step size in pruning, is adjusted according to experience of previous trees
Bool_t fUseYesNoLeaf
use sig or bkg classification in leave nodes or sig/bkg
void ReadWeightsFromXML(void *wghtnode) override
const Ranking * CreateRanking() override
Double_t fNodePurityLimit
purity limit for sig/bkg nodes
TString fSepTypeS
the separation (option string) used in node splitting
TString fMinNodeSizeS
string containing min percentage of training events in node
void GetHelpMessage() const override
Double_t GetMvaValue(Double_t *err=nullptr, Double_t *errUpper=nullptr) override
returns MVA value
Double_t PruneTree()
prune the decision tree if requested (good for individual trees that are best grown out,...
void DeclareCompatibilityOptions() override
options that are used ONLY for the READER to ensure backward compatibility
void ReadWeightsFromStream(std::istream &istr) override
Bool_t fRandomisedTrees
choose a random subset of possible cut variables at each node during training
DecisionTree * fTree
the decision tree
void AddWeightsXMLTo(void *parent) const override
void Train(void) override
Bool_t HasAnalysisType(Types::EAnalysisType type, UInt_t numberClasses, UInt_t numberTargets) override
FDA can handle classification with 2 classes and regression with one regression-target.
Bool_t fPruneBeforeBoost
ancient variable, only needed for "CompatibilityOptions"
void SetMinNodeSize(Double_t sizeInPercent)
Int_t fNCuts
grid used in cut applied in node splitting
Double_t fPruneStrength
a parameter to set the "amount" of pruning..needs to be adjusted
TString fPruneMethodS
prune method option String
Int_t fMinNodeEvents
min number of events in node
Implementation of the MisClassificationError as separation criterion.
Ranking for variables in method (implementation).
Implementation of the SdivSqrtSplusB as separation criterion.
Singleton class for Global types used by TMVA.
@ kValidation
these are placeholders... currently not used, but could be moved "forward" if
Double_t Atof() const
Return floating-point value contained in string.
TString & ReplaceAll(const TString &s1, const TString &s2)
Bool_t IsAlnum() const
Returns true if all characters in string are alphanumeric.
create variable transformations
MsgLogger & Endl(MsgLogger &ml)