69CostComplexityPruneTool::CostComplexityPruneTool(
SeparationBase* qualityIndex ) :
71 fLogger(new
MsgLogger(
"CostComplexityPruneTool") )
105 if( dt == NULL || (
IsAutomatic() && validationSample == NULL) ) {
123 Log() << kDEBUG <<
"Sum of weights in pruning validation sample: " << W <<
Endl;
124 Log() << kDEBUG <<
"Quality of tree prior to any pruning is " <<
Q/W <<
Endl;
131 catch(
const std::string &error) {
132 Log() << kERROR <<
"Couldn't initialize the tree meta data because of error ("
133 << error <<
")" <<
Endl;
137 Log() << kDEBUG <<
"Automatic cost complexity pruning is " << (
IsAutomatic()?
"on":
"off") <<
"." <<
Endl;
142 catch(
const std::string &error) {
143 Log() << kERROR <<
"Error optimizing pruning sequence ("
144 << error <<
")" <<
Endl;
148 Log() << kDEBUG <<
"Index of pruning sequence to stop at: " <<
fOptimalK <<
Endl;
158 Log() << kINFO <<
"no proper pruning could be calculated. Tree "
159 << dt->
GetTreeID() <<
" will not be pruned. Do not worry if this "
160 <<
" happens for a few trees " <<
Endl;
183 if(
n == NULL )
return;
189 else n->SetNodeR( (
s+
b)*
n->GetSeparationIndex() );
191 if(
n->GetLeft() != NULL &&
n->GetRight() != NULL) {
197 n->SetNTerminal(
n->GetLeft()->GetNTerminal() +
198 n->GetRight()->GetNTerminal());
200 n->SetSubTreeR( (
n->GetLeft()->GetSubTreeR() +
201 n->GetRight()->GetSubTreeR()));
203 n->SetAlpha( ((
n->GetNodeR() -
n->GetSubTreeR()) /
204 (
n->GetNTerminal() - 1)));
208 n->SetAlphaMinSubtree( std::min(
n->GetAlpha(), std::min(
n->GetLeft()->GetAlphaMinSubtree(),
209 n->GetRight()->GetAlphaMinSubtree())));
210 n->SetCC(
n->GetAlpha());
213 n->SetNTerminal( 1 );
n->SetTerminal( );
215 else n->SetSubTreeR( (
s+
b)*
n->GetSeparationIndex() );
216 n->SetAlpha(std::numeric_limits<double>::infinity( ));
217 n->SetAlphaMinSubtree(std::numeric_limits<double>::infinity( ));
218 n->SetCC(
n->GetAlpha());
263 while(
R->GetNTerminal() > 1) {
266 alpha =
TMath::Max(
R->GetAlphaMinSubtree(), alpha);
268 if(
R->GetAlphaMinSubtree() >=
R->GetAlpha() ) {
269 Log() << kDEBUG <<
"\nCaught trying to prune the root node!" <<
Endl;
290 Log() << kDEBUG <<
"\nCaught trying to prune the root node!" <<
Endl;
316 Log() << kDEBUG <<
"after this pruning step I would have " <<
R->GetNTerminal() <<
" remaining terminal nodes " <<
Endl;
352 Log() << kDEBUG <<
"\n************ Summary for Tree " << dt->
GetTreeID() <<
" *******" <<
Endl
355 Log() << kDEBUG <<
"Pruning strength parameters: [";
360 Log() << kDEBUG <<
"Misclassification rates: [";
#define R(a, b, c, d, e, f, g, h, i)
Double_t GetSubTreeR() const
void SetAlphaMinSubtree(Double_t g)
Double_t GetAlphaMinSubtree() const
void SetSubTreeR(Double_t r)
virtual DecisionTreeNode * GetLeft() const
Double_t GetNodeR() const
Double_t GetAlpha() const
Int_t GetNTerminal() const
void SetAlpha(Double_t alpha)
virtual DecisionTreeNode * GetParent() const
void SetNTerminal(Int_t n)
virtual DecisionTreeNode * GetRight() const
Implementation of a Decision Tree.
Double_t GetNodePurityLimit() const
void ApplyValidationSample(const EventConstList *validationSample) const
run the validation sample through the (pruned) tree and fill in the nodes the variables NSValidation ...
virtual DecisionTreeNode * GetRoot() const
void PruneNodeInPlace(TMVA::DecisionTreeNode *node)
prune a node temporarily (without actually deleting its descendants which allows testing the pruned t...
Double_t TestPrunedTreeQuality(const DecisionTreeNode *dt=NULL, Int_t mode=0) const
return the misclassification rate of a pruned tree a "pruned tree" may have set the variable "IsTermi...
Double_t GetSumWeights(const EventConstList *validationSample) const
calculate the normalization factor for a pruning validation sample
ostringstream derivative to redirect and format output
void SetMinType(EMsgType minType)
std::vector< DecisionTreeNode * > PruneSequence
the regularization parameter for pruning
Double_t PruneStrength
quality measure for a pruned subtree T of T_max
An interface to calculate the "SeparationGain" for different separation criteria used in various trai...
virtual Double_t GetSeparationIndex(const Double_t s, const Double_t b)=0
static constexpr double s
Abstract ClassifierFactory template that handles arbitrary types.
MsgLogger & Endl(MsgLogger &ml)
Short_t Max(Short_t a, Short_t b)