72 fValidationSample(validationSample),
73 fValidationDataSet(NULL),
78 if(qualityIndex == NULL) {
95 fValidationSample(NULL),
96 fValidationDataSet(validationSample),
101 if(qualityIndex == NULL) {
132 Double_t epsilon = std::numeric_limits<double>::epsilon();
135 std::ofstream outfile;
136 if (
fDebug) outfile.open(
"costcomplexity.log");
138 if (
fDebug) outfile <<
"ERROR: no validation sample, so cannot optimize pruning!" << std::endl;
140 if (
fDebug) outfile.close();
145 while(
R->GetNLeafDaughters() > 1) {
146 if(
R->GetMinAlphaC() > alpha)
147 alpha =
R->GetMinAlphaC();
149 if(HaveStopCondition && alpha >
fAlpha)
break;
162 if (
fDebug) outfile << std::endl <<
"Caught trying to prune the root node!" << std::endl;
169 outfile <<
"===========================" << std::endl
170 <<
"Pruning branch listed below" << std::endl
171 <<
"===========================" << std::endl;
190 if(!HaveStopCondition) {
204 if(!HaveStopCondition) {
218 outfile << std::endl <<
"************ Summary **************" << std::endl
219 <<
"Number of trees in the sequence: " <<
fPruneSequence.size() << std::endl;
221 outfile <<
"Pruning strength parameters: [";
226 outfile <<
"Misclassification rates: [";
231 outfile <<
"Optimal index: " <<
fOptimalK+1 << std::endl;
242 std::vector<DecisionTreeNode*> optimalSequence;
248 return optimalSequence;
#define R(a, b, c, d, e, f, g, h, i)
CCPruner(DecisionTree *t_max, const EventList *validationSample, SeparationBase *qualityIndex=nullptr)
constructor
Float_t fAlpha
! regularization parameter in CC pruning
std::vector< Float_t > fQualityIndexList
! map of R(T) -> pruning index
void Optimize()
determine the pruning sequence
Bool_t fDebug
! debug flag
Bool_t fOwnQIndex
! flag indicates if fQualityIndex is owned by this
std::vector< Event * > EventList
std::vector< TMVA::DecisionTreeNode * > fPruneSequence
! map of weakest links (i.e., branches to prune) -> pruning index
const EventList * fValidationSample
! the event sample to select the optimally-pruned tree
std::vector< TMVA::DecisionTreeNode * > GetOptimalPruneSequence() const
return the prune strength (=alpha) corresponding to the prune sequence
Int_t fOptimalK
! index of the optimal tree in the pruned tree sequence
const DataSet * fValidationDataSet
! the event sample to select the optimally-pruned tree
std::vector< Float_t > fPruneStrengthList
! map of alpha -> pruning index
SeparationBase * fQualityIndex
! the quality index used to calculate R(t), R(T) = sum[t in ~T]{ R(t) }
DecisionTree * fTree
! (pruned) decision tree
Int_t GetNLeafDaughters() const
void SetNLeafDaughters(Int_t N)
Double_t GetMinAlphaC() const
Double_t GetNodeResubstitutionEstimate() const
void SetMinAlphaC(Double_t alpha)
void SetResubstitutionEstimate(Double_t R)
CCTreeNode * GetLeftDaughter()
Double_t GetResubstitutionEstimate() const
virtual void PrintRec(std::ostream &os) const
recursive printout of the node and its daughters
Double_t GetAlphaC() const
void SetAlphaC(Double_t alpha)
CCTreeNode * GetRightDaughter()
Double_t TestTreeQuality(const EventList *validationSample)
return the misclassification rate of a pruned tree for a validation event sample using an EventList
void PruneNode(CCTreeNode *t)
remove the branch rooted at node t
Class that contains all the data information.
Implementation of a Decision Tree.
Implementation of the MisClassificationError as separation criterion.
An interface to calculate the "SeparationGain" for different separation criteria used in various trai...
create variable transformations