72 fValidationSample(validationSample),
73 fValidationDataSet(NULL),
78 if(qualityIndex == NULL) {
101 if(qualityIndex == NULL) {
135 std::ofstream outfile;
136 if (
fDebug) outfile.open(
"costcomplexity.log");
138 if (
fDebug) outfile <<
"ERROR: no validation sample, so cannot optimize pruning!" << std::endl;
140 if (
fDebug) outfile.close();
149 if(HaveStopCondition && alpha >
fAlpha)
break;
162 if (
fDebug) outfile << std::endl <<
"Caught trying to prune the root node!" << std::endl;
169 outfile <<
"===========================" << std::endl
170 <<
"Pruning branch listed below" << std::endl
171 <<
"===========================" << std::endl;
190 if(!HaveStopCondition) {
204 if(!HaveStopCondition) {
218 outfile << std::endl <<
"************ Summary **************" << std::endl
219 <<
"Number of trees in the sequence: " <<
fPruneSequence.size() << std::endl;
221 outfile <<
"Pruning strength parameters: [";
226 outfile <<
"Misclassification rates: [";
231 outfile <<
"Optimal index: " <<
fOptimalK+1 << std::endl;
242 std::vector<DecisionTreeNode*> optimalSequence;
248 return optimalSequence;
void Optimize()
determine the pruning sequence
std::vector< Float_t > fQualityIndexList
map of alpha -> pruning index
virtual void PrintRec(std::ostream &os) const
recursive printout of the node and its daughters
CCTreeNode * GetLeftDaughter()
const DataSet * fValidationDataSet
the event sample to select the optimally-pruned tree
Int_t fOptimalK
map of R(T) -> pruning index
void SetResubstitutionEstimate(Double_t R)
DecisionTree * fTree
flag indicates if fQualityIndex is owned by this
Bool_t fDebug
index of the optimal tree in the pruned tree sequence
Double_t GetResubstitutionEstimate() const
SeparationBase * fQualityIndex
the event sample to select the optimally-pruned tree
Implementation of the MisClassificationError as separation criterion.
Class that contains all the data information.
CCTreeNode * GetRightDaughter()
Int_t GetNLeafDaughters() const
VecExpr< UnaryOp< Fabs< T >, VecExpr< A, T, D >, T >, T, D > fabs(const VecExpr< A, T, D > &rhs)
Double_t TestTreeQuality(const EventList *validationSample)
return the misclassification rate of a pruned tree for a validation event sample using an EventList ...
Bool_t fOwnQIndex
the quality index used to calculate R(t), R(T) = sum[t in ~T]{ R(t) }
std::vector< TMVA::DecisionTreeNode * > fPruneSequence
(pruned) decision tree
std::vector< Event * > EventList
Implementation of a Decision Tree.
void SetNLeafDaughters(Int_t N)
An interface to calculate the "SeparationGain" for different separation criteria used in various trai...
void SetMinAlphaC(Double_t alpha)
const EventList * fValidationSample
regularization parameter in CC pruning
std::vector< TMVA::DecisionTreeNode * > GetOptimalPruneSequence() const
return the prune strength (=alpha) corresponding to the prune sequence
DecisionTreeNode * GetDTNode() const
void SetAlphaC(Double_t alpha)
Abstract ClassifierFactory template that handles arbitrary types.
Double_t GetAlphaC() const
CCPruner(DecisionTree *t_max, const EventList *validationSample, SeparationBase *qualityIndex=NULL)
constructor
Double_t GetMinAlphaC() const
void PruneNode(CCTreeNode *t)
remove the branch rooted at node t
std::vector< Float_t > fPruneStrengthList
map of weakest links (i.e., branches to prune) -> pruning index
Double_t GetNodeResubstitutionEstimate() const