97 return std::abs(
x-
y) < std::numeric_limits<float>::epsilon() * std::abs(
x+
y) *
ulp
99 || std::abs(
x-
y) < std::numeric_limits<float>::min();
105 return std::abs(
x-
y) < std::numeric_limits<double>::epsilon() * std::abs(
x+
y) *
ulp
107 || std::abs(
x-
y) < std::numeric_limits<double>::min();
120 fMinLinCorrForFisher (1),
121 fUseExclusiveVars (
kTRUE),
129 fPruneMethod (kNoPruning),
130 fNNodesBeforePruning(0),
131 fNodePurityLimit(0.5),
139 fAnalysisType (
Types::kClassification),
157 fMinLinCorrForFisher (1),
158 fUseExclusiveVars (
kTRUE),
166 fPruneMethod (kNoPruning),
167 fNNodesBeforePruning(0),
176 fAnalysisType (
Types::kClassification),
186 Log() << kWARNING <<
" You had chosen the training mode using optimal cuts, not\n"
187 <<
" based on a grid of " <<
fNCuts <<
" by setting the option NCuts < 0\n"
188 <<
" as this doesn't exist yet, I set it to " <<
fNCuts <<
" and use the grid"
204 fUseFisherCuts (
d.fUseFisherCuts),
205 fMinLinCorrForFisher (
d.fMinLinCorrForFisher),
206 fUseExclusiveVars (
d.fUseExclusiveVars),
207 fSepType (
d.fSepType),
208 fRegType (
d.fRegType),
209 fMinSize (
d.fMinSize),
210 fMinNodeSize(
d.fMinNodeSize),
211 fMinSepGain (
d.fMinSepGain),
212 fUseSearchTree (
d.fUseSearchTree),
213 fPruneStrength (
d.fPruneStrength),
214 fPruneMethod (
d.fPruneMethod),
215 fNodePurityLimit(
d.fNodePurityLimit),
216 fRandomisedTree (
d.fRandomisedTree),
217 fUseNvars (
d.fUseNvars),
218 fUsePoissonNvars(
d.fUsePoissonNvars),
220 fMaxDepth (
d.fMaxDepth),
221 fSigClass (
d.fSigClass),
223 fAnalysisType(
d.fAnalysisType),
224 fDataSetInfo (
d.fDataSetInfo)
240 if (fMyTrandom)
delete fMyTrandom;
241 if (fRegType)
delete fRegType;
253 Log() << kFATAL <<
"SetParentTreeNodes: started with undefined ROOT node" <<
Endl;
258 if ((this->GetLeftDaughter(
n) ==
NULL) && (this->GetRightDaughter(
n) !=
NULL) ) {
259 Log() << kFATAL <<
" Node with only one daughter?? Something went wrong" <<
Endl;
261 }
else if ((this->GetLeftDaughter(
n) !=
NULL) && (this->GetRightDaughter(
n) ==
NULL) ) {
262 Log() << kFATAL <<
" Node with only one daughter?? Something went wrong" <<
Endl;
266 if (this->GetLeftDaughter(
n) !=
NULL) {
267 this->SetParentTreeInNodes( this->GetLeftDaughter(
n) );
269 if (this->GetRightDaughter(
n) !=
NULL) {
270 this->SetParentTreeInNodes( this->GetRightDaughter(
n) );
273 n->SetParentTree(
this);
274 if (
n->GetDepth() >
this->GetTotalTreeDepth()) this->SetTotalTreeDepth(
n->GetDepth());
282 std::string
type(
"");
302 xmin = std::vector<Float_t>(nvars);
303 xmax = std::vector<Float_t>(nvars);
316 xmin = std::vector<Float_t>(nvars);
317 xmax = std::vector<Float_t>(nvars);
344 if(nvars !=
other.nvars)
346 std::cout <<
"!!! ERROR BuildNodeInfo1+BuildNodeInfo2 failure. Nvars1 != Nvars2." << std::endl;
356 ret.target2 = target2 +
other.target2;
359 for(
Int_t i=0; i<nvars; i++)
386 this->GetRoot()->SetPos(
's');
387 this->GetRoot()->SetDepth(0);
388 this->GetRoot()->SetParentTree(
this);
391 Log() << kDEBUG <<
"\tThe minimal node size MinNodeSize=" << fMinNodeSize <<
" fMinNodeSize="<<fMinNodeSize<<
"% is translated to an actual number of events = "<< fMinSize<<
" for the training sample size of " <<
eventSample.size() <<
Endl;
392 Log() << kDEBUG <<
"\tNote: This number will be taken as absolute minimum in the node, " <<
Endl;
393 Log() << kDEBUG <<
" \tin terms of 'weighted events' and unweighted ones !! " <<
Endl;
400 if (fNvars==0) fNvars =
eventSample[0]->GetNVariables();
401 fVariableImportance.resize(fNvars);
403 else Log() << kFATAL <<
":<BuildTree> eventsample Size == 0 " <<
Endl;
425 if (
evt->GetClass() == fSigClass) {
435 if ( DoRegression() ) {
464 Log() << kWARNING <<
" One of the Decision Tree nodes has negative total number of signal or background events. "
465 <<
"(Nsig="<<
nodeInfo.s<<
" Nbkg="<<
nodeInfo.b<<
" Probaby you use a Monte Carlo with negative weights. That should in principle "
466 <<
"be fine as long as on average you end up with something positive. For this you have to make sure that the "
467 <<
"minimal number of (unweighted) events demanded for a tree node (currently you use: MinNodeSize="<<fMinNodeSize
468 <<
"% of training events, you can set this via the BDT option string when booking the classifier) is large enough "
469 <<
"to allow for reasonable averaging!!!" <<
Endl
470 <<
" If this does not help.. maybe you want to try the option: NoNegWeightsInTraining which ignores events "
471 <<
"with negative weight in the training." <<
Endl;
476 Log() << kDEBUG <<
"Event "<< i<<
" has (original) weight: " <<
eventSample[i]->GetWeight()/
eventSample[i]->GetBoostWeight()
480 Log() << kDEBUG <<
" that gives in total: " <<
nBkg<<
Endl;
490 if (node == this->GetRoot()) {
529 if (DoRegression()) {
569 Log() << kERROR <<
"<TrainNode> all events went to the same branch" <<
Endl
570 <<
"--- Hence new node == old node ... check" <<
Endl
574 <<
"\n when cutting on variable " << node->
GetSelector()
576 << kFATAL <<
"--- this should never happen, please write a bug report to Helge.Voss@cern.ch" <<
Endl;
603 if (DoRegression()) {
643 this->GetRoot()->SetPos(
's');
644 this->GetRoot()->SetDepth(0);
645 this->GetRoot()->SetParentTree(
this);
648 Log() << kDEBUG <<
"\tThe minimal node size MinNodeSize=" << fMinNodeSize <<
" fMinNodeSize="<<fMinNodeSize<<
"% is translated to an actual number of events = "<< fMinSize<<
" for the training sample size of " <<
eventSample.size() <<
Endl;
649 Log() << kDEBUG <<
"\tNote: This number will be taken as absolute minimum in the node, " <<
Endl;
650 Log() << kDEBUG <<
" \tin terms of 'weighted events' and unweighted ones !! " <<
Endl;
657 if (fNvars==0) fNvars =
eventSample[0]->GetNVariables();
658 fVariableImportance.resize(fNvars);
660 else Log() <<
kFATAL <<
":<BuildTree> eventsample Size == 0 " <<
Endl;
680 if (
evt->GetClass() == fSigClass) {
690 if ( DoRegression() ) {
707 Log() <<
kWARNING <<
" One of the Decision Tree nodes has negative total number of signal or background events. "
708 <<
"(Nsig="<<s<<
" Nbkg="<<
b<<
" Probaby you use a Monte Carlo with negative weights. That should in principle "
709 <<
"be fine as long as on average you end up with something positive. For this you have to make sure that the "
710 <<
"minimul number of (unweighted) events demanded for a tree node (currently you use: MinNodeSize="<<fMinNodeSize
711 <<
"% of training events, you can set this via the BDT option string when booking the classifier) is large enough "
712 <<
"to allow for reasonable averaging!!!" <<
Endl
713 <<
" If this does not help.. maybe you want to try the option: NoNegWeightsInTraining which ignores events "
714 <<
"with negative weight in the training." <<
Endl;
733 if (node == this->GetRoot()) {
760 && ( ( s!=0 &&
b !=0 && !DoRegression()) || ( (s+
b)!=0 && DoRegression()) ) ) {
771 if (DoRegression()) {
812 Log() <<
kERROR <<
"<TrainNode> all events went to the same branch" <<
Endl
813 <<
"--- Hence new node == old node ... check" <<
Endl
817 <<
"\n when cutting on variable " << node->
GetSelector()
819 <<
kFATAL <<
"--- this should never happen, please write a bug report to Helge.Voss@cern.ch" <<
Endl;
846 if (DoRegression()) {
895 node = this->GetRoot();
901 if (event.
GetClass() == fSigClass) {
914 this->FillEvent(event, node->
GetRight());
916 this->FillEvent(event, node->
GetLeft());
925 if (this->GetRoot()!=
NULL) this->GetRoot()->ClearNodeAndAllDaughters();
940 node = this->GetRoot();
949 if (
l->GetNodeType() *
r->GetNodeType() > 0) {
951 this->PruneNode(node);
955 return this->CountNodes();
969 if( fPruneMethod == kNoPruning )
return 0.0;
971 if (fPruneMethod == kExpectedErrorPruning)
974 else if (fPruneMethod == kCostComplexityPruning)
979 Log() << kFATAL <<
"Selected pruning method not yet implemented "
983 if(!
tool)
return 0.0;
985 tool->SetPruneStrength(GetPruneStrength());
986 if(
tool->IsAutomatic()) {
988 Log() << kFATAL <<
"Cannot automate the pruning algorithm without an "
989 <<
"independent validation sample!" <<
Endl;
991 Log() << kFATAL <<
"Cannot automate the pruning algorithm with "
992 <<
"independent validation sample of ZERO events!" <<
Endl;
999 Log() << kFATAL <<
"Error pruning tree! Check prune.log for more information."
1008 for (
UInt_t i = 0; i <
info->PruneSequence.size(); ++i) {
1010 PruneNode(
info->PruneSequence[i]);
1031 GetRoot()->ResetValidationData();
1046 n = this->GetRoot();
1048 Log() << kFATAL <<
"TestPrunedTreeQuality: started with undefined ROOT node" <<
Endl;
1053 if(
n->GetLeft() !=
NULL &&
n->GetRight() !=
NULL && !
n->IsTerminal() ) {
1054 return (TestPrunedTreeQuality(
n->GetLeft(),
mode ) +
1055 TestPrunedTreeQuality(
n->GetRight(),
mode ));
1058 if (DoRegression()) {
1060 return n->GetSumTarget2() - 2*
n->GetSumTarget()*
n->GetResponse() +
sumw*
n->GetResponse()*
n->GetResponse();
1064 if (
n->GetPurity() >
this->GetNodePurityLimit())
1065 return n->GetNBValidation();
1067 return n->GetNSValidation();
1069 else if (
mode == 1 ) {
1071 return (
n->GetPurity() *
n->GetNBValidation() + (1.0 -
n->GetPurity()) *
n->GetNSValidation());
1074 throw std::string(
"Unknown ValidationQualityMode");
1088 if (current ==
NULL) {
1089 Log() << kFATAL <<
"CheckEventWithPrunedTree: started with undefined ROOT node" <<
Endl;
1092 while(current !=
NULL) {
1093 if(
e->GetClass() == fSigClass)
1098 if (
e->GetNTargets() > 0) {
1134 n = this->GetRoot();
1136 Log() << kFATAL <<
"CountLeafNodes: started with undefined ROOT node" <<
Endl;
1143 if ((this->GetLeftDaughter(
n) ==
NULL) && (this->GetRightDaughter(
n) ==
NULL) ) {
1147 if (this->GetLeftDaughter(
n) !=
NULL) {
1148 countLeafs += this->CountLeafNodes( this->GetLeftDaughter(
n) );
1150 if (this->GetRightDaughter(
n) !=
NULL) {
1151 countLeafs += this->CountLeafNodes( this->GetRightDaughter(
n) );
1163 n = this->GetRoot();
1165 Log() << kFATAL <<
"DescendTree: started with undefined ROOT node" <<
Endl;
1170 if ((this->GetLeftDaughter(
n) ==
NULL) && (this->GetRightDaughter(
n) ==
NULL) ) {
1173 else if ((this->GetLeftDaughter(
n) ==
NULL) && (this->GetRightDaughter(
n) !=
NULL) ) {
1174 Log() << kFATAL <<
" Node with only one daughter?? Something went wrong" <<
Endl;
1177 else if ((this->GetLeftDaughter(
n) !=
NULL) && (this->GetRightDaughter(
n) ==
NULL) ) {
1178 Log() << kFATAL <<
" Node with only one daughter?? Something went wrong" <<
Endl;
1182 if (this->GetLeftDaughter(
n) !=
NULL) {
1183 this->DescendTree( this->GetLeftDaughter(
n) );
1185 if (this->GetRightDaughter(
n) !=
NULL) {
1186 this->DescendTree( this->GetRightDaughter(
n) );
1205 this->DeleteNode(
l);
1206 this->DeleteNode(
r);
1218 if(node ==
NULL)
return;
1221 node->
SetAlpha( std::numeric_limits<double>::infinity( ) );
1233 Node* current = this->GetRoot();
1237 if ( tmp &
sequence) current = this->GetRightDaughter(current);
1238 else current = this->GetLeftDaughter(current);
1258 Double_t bla = fMyTrandom->Rndm()*fNvars;
1268 if (
nSelectedVars !=
useNvars) { std::cout <<
"Bug in TrainNode - GetRandisedVariables()... sorry" << std::endl; std::exit(1);}
1285 nSelS = std::vector< std::vector<Double_t> >(cNvars);
1286 nSelB = std::vector< std::vector<Double_t> >(cNvars);
1287 nSelS_unWeighted = std::vector< std::vector<Double_t> >(cNvars);
1288 nSelB_unWeighted = std::vector< std::vector<Double_t> >(cNvars);
1289 target = std::vector< std::vector<Double_t> >(cNvars);
1290 target2 = std::vector< std::vector<Double_t> >(cNvars);
1293 nSelS[
ivar] = std::vector<Double_t>(nBins[
ivar], 0);
1294 nSelB[
ivar] = std::vector<Double_t>(nBins[
ivar], 0);
1295 nSelS_unWeighted[
ivar] = std::vector<Double_t>(nBins[
ivar], 0);
1296 nSelB_unWeighted[
ivar] = std::vector<Double_t>(nBins[
ivar], 0);
1298 target2[
ivar] = std::vector<Double_t>(nBins[
ivar], 0);
1320 std::vector< std::vector<Double_t> >
nSelS;
1321 std::vector< std::vector<Double_t> >
nSelB;
1334 if(cNvars !=
other.cNvars)
1336 std::cout <<
"!!! ERROR TrainNodeInfo1+TrainNodeInfo2 failure. cNvars1 != cNvars2." << std::endl;
1353 ret.nTotS_unWeighted = nTotS_unWeighted +
other.nTotS_unWeighted;
1355 ret.nTotB_unWeighted = nTotB_unWeighted +
other.nTotB_unWeighted;
1401 if (fRandomisedTree) {
1416 if (fUseFisherCuts) {
1431 Log() << kWARNING <<
" in TrainNodeFast, the covariance Matrices needed for the Fisher-Cuts returned error --> revert to just normal cuts for this node" <<
Endl;
1475 if (fUseFisherCuts &&
fisherOK) cNvars++;
1492 nBins[
ivar] = fNCuts+1;
1493 if (
ivar < fNvars) {
1494 if (fDataSetInfo->GetVariableInfo(
ivar).GetVarType() ==
'I') {
1560 if (
ivar < fNvars) {
1636 if (DoRegression()) {
1698 if (DoRegression()) {
1720 if (DoRegression()) {
1726 Log() << kFATAL <<
"Helge, you have a bug ....nodeInfo.nSelS_unw..+nodeInfo.nSelB_unw..= "
1728 <<
" while eventsample size = " <<
eventSample.size()
1734 Log() << kFATAL <<
"Helge, you have another bug ....nodeInfo.nSelS+nodeInfo.nSelB= "
1736 <<
" while total number of events = " <<
totalSum
1775 if ( ((
sl+
bl)>=fMinSize && (sr+
br)>=fMinSize)
1779 if (DoRegression()) {
1809 if (DoRegression()) {
1855 for (
UInt_t i=0; i<cNvars; i++) {
1910 Int_t nTotS_unWeighted, nTotB_unWeighted;
1922 if (fRandomisedTree) {
1937 if (fUseFisherCuts) {
1952 Log() <<
kWARNING <<
" in TrainNodeFast, the covariance Matrices needed for the Fisher-Cuts returned error --> revert to just normal cuts for this node" <<
Endl;
1996 if (fUseFisherCuts &&
fisherOK) cNvars++;
2015 nBins[
ivar] = fNCuts+1;
2016 if (
ivar < fNvars) {
2017 if (fDataSetInfo->GetVariableInfo(
ivar).GetVarType() ==
'I') {
2094 if (
ivar < fNvars) {
2112 nTotS_unWeighted=0; nTotB_unWeighted=0;
2118 nTotS_unWeighted++; }
2149 if (DoRegression()) {
2165 if (DoRegression()) {
2171 Log() <<
kFATAL <<
"Helge, you have a bug ....nSelS_unw..+nSelB_unw..= "
2172 << nSelS_unWeighted[
ivar][nBins[
ivar]-1] +nSelB_unWeighted[
ivar][nBins[
ivar]-1]
2173 <<
" while eventsample size = " <<
eventSample.size()
2179 Log() <<
kFATAL <<
"Helge, you have another bug ....nSelS+nSelB= "
2181 <<
" while total number of events = " <<
totalSum
2216 if ( ((
sl+
bl)>=fMinSize && (sr+
br)>=fMinSize)
2220 if (DoRegression()) {
2248 if (DoRegression()) {
2303 for (
UInt_t i=0; i<cNvars; i++) {
2306 delete [] nSelS_unWeighted[i];
2307 delete [] nSelB_unWeighted[i];
2309 delete [] target2[i];
2314 delete [] nSelS_unWeighted;
2315 delete [] nSelB_unWeighted;
2431 if (
ev->GetClass() == fSigClass )
sum2Sig[k] += ( (
xval[
x] - (*meanMatx)(
x, 0))*(
xval[
y] - (*meanMatx)(
y, 0)) )*weight;
2432 else sum2Bgd[k] += ( (
xval[
x] - (*meanMatx)(
x, 1))*(
xval[
y] - (*meanMatx)(
y, 1)) )*weight;
2489 Log() << kWARNING <<
"FisherCoeff matrix is almost singular with determinant="
2491 <<
" did you use the variables that are linear combinations or highly correlated?"
2495 Log() << kFATAL <<
"FisherCoeff matrix is singular with determinant="
2497 <<
" did you use the variables that are linear combinations?"
2540 Int_t nTotS_unWeighted = 0, nTotB_unWeighted = 0;
2546 std::vector<Double_t>
lCutValue( fNvars, 0.0 );
2547 std::vector<Double_t>
lSepGain( fNvars, -1.0e6 );
2548 std::vector<Char_t>
lCutType( fNvars );
2554 if((*it)->GetClass() == fSigClass) {
2555 nTotS += (*it)->GetWeight();
2559 nTotB += (*it)->GetWeight();
2569 if (fRandomisedTree) {
2570 if (fUseNvars ==0 ) {
2576 Double_t bla = fMyTrandom->Rndm()*fNvars;
2598 for( ; it !=
it_end; ++it ) {
2599 if((**it)->GetClass() == fSigClass )
2616 if( *(*it) ==
NULL ) {
2617 Log() << kFATAL <<
"In TrainNodeFull(): have a null event! Where index="
2631 Double_t nSelS = it->GetCumulativeWeight(
true);
2632 Double_t nSelB = it->GetCumulativeWeight(
false);
2694 Log() << kFATAL <<
"CheckEvent: started with undefined ROOT node" <<
Endl;
2703 Log() << kFATAL <<
"DT::CheckEvent: inconsistent tree structure" <<
Endl;
2708 if (DoRegression()) {
2732 Log() << kFATAL <<
"<SamplePurity> sumtot != sumsig+sumbkg"
2749 for (
UInt_t i=0; i< fNvars; i++) {
2750 sum += fVariableImportance[i];
2754 for (
UInt_t i=0; i< fNvars; i++) {
2755 if (
sum > std::numeric_limits<double>::epsilon())
2771 Log() << kFATAL <<
"<GetVariableImportance>" <<
Endl
2772 <<
"--- ivar = " <<
ivar <<
" is out of range " <<
Endl;
bool almost_equal_double(double x, double y, int ulp=4)
bool almost_equal_float(float x, float y, int ulp=4)
ROOT::Detail::TRangeCast< T, true > TRangeDynCast
TRangeDynCast is an adapter class that allows the typed iteration through a TCollection.
Option_t Option_t TPoint TPoint const char GetTextMagnitude GetFillStyle GetLineColor GetLineWidth GetMarkerStyle GetTextAlign GetTextColor GetTextSize void char Point_t Rectangle_t WindowAttributes_t Float_t Float_t Float_t Int_t Int_t UInt_t UInt_t Rectangle_t Int_t Int_t Window_t TString Int_t GCValues_t GetPrimarySelectionOwner GetDisplay GetScreen GetColormap GetNativeEvent const char const char dpyName wid window const char font_name cursor keysym reg const char only_if_exist regb h Point_t winding char text const char depth char const char Int_t count const char ColorStruct_t color const char Pixmap_t Pixmap_t PictureAttributes_t attr const char char ret_data h unsigned char height h Atom_t Int_t ULong_t ULong_t unsigned char prop_list Atom_t Atom_t target
Option_t Option_t TPoint TPoint const char GetTextMagnitude GetFillStyle GetLineColor GetLineWidth GetMarkerStyle GetTextAlign GetTextColor GetTextSize void char Point_t Rectangle_t WindowAttributes_t Float_t r
Option_t Option_t TPoint TPoint const char GetTextMagnitude GetFillStyle GetLineColor GetLineWidth GetMarkerStyle GetTextAlign GetTextColor GetTextSize void char Point_t Rectangle_t WindowAttributes_t Float_t Float_t Float_t Int_t Int_t UInt_t UInt_t Rectangle_t result
Option_t Option_t TPoint TPoint const char GetTextMagnitude GetFillStyle GetLineColor GetLineWidth GetMarkerStyle GetTextAlign GetTextColor GetTextSize void char Point_t Rectangle_t WindowAttributes_t index
Option_t Option_t TPoint TPoint const char mode
Option_t Option_t TPoint TPoint const char GetTextMagnitude GetFillStyle GetLineColor GetLineWidth GetMarkerStyle GetTextAlign GetTextColor GetTextSize void char Point_t Rectangle_t WindowAttributes_t Float_t Float_t Float_t Int_t Int_t UInt_t UInt_t Rectangle_t Int_t Int_t Window_t TString Int_t GCValues_t GetPrimarySelectionOwner GetDisplay GetScreen GetColormap GetNativeEvent const char const char dpyName wid window const char font_name cursor keysym reg const char only_if_exist regb h Point_t winding char text const char depth char const char Int_t count const char ColorStruct_t color const char Pixmap_t Pixmap_t PictureAttributes_t attr const char char ret_data h unsigned char height h Atom_t Int_t ULong_t ULong_t unsigned char prop_list Atom_t Atom_t Atom_t Time_t type
TMatrixT< Double_t > TMatrixD
const_iterator begin() const
const_iterator end() const
static void SetVarIndex(Int_t iVar)
Base class for BinarySearch and Decision Trees.
UInt_t fNNodes
total number of nodes in the tree (counted)
static Config & Instance()
static function: returns TMVA instance
Class that contains all the data information.
void SetNEvents_unweighted(Float_t nev)
set the number of unweighted events that entered the node (during training), if traininfo defined
void SetNodeType(Int_t t)
set node type: 1 signal node, -1 bkg leave, 0 intermediate Node
void SetSeparationGain(Float_t sep)
set the separation, or information gained BY this node's selection, if traininfo defined
void SetNBkgEvents(Float_t b)
set the sum of the backgr weights in the node, if traininfo defined
void SetCutType(Bool_t t)
set true: if event variable > cutValue ==> signal , false otherwise
Double_t GetNSValidation() const
return number of signal events from the pruning validation sample, or -1 if traininfo undefined
void IncrementNEvents_unweighted()
increment the number of events that entered the node (during training), if traininfo defined
void SetFisherCoeff(Int_t ivar, Double_t coeff)
set fisher coefficients
void SetNSigEvents_unboosted(Float_t s)
set the sum of the unboosted signal events in the node, if traininfo defined
void SetAlphaMinSubtree(Double_t g)
set the minimum alpha in the tree rooted at this node, if traininfo defined
void IncrementNBkgEvents(Float_t b)
increment the sum of the backgr weights in the node, if traininfo defined
void SetNEvents_unboosted(Float_t nev)
set the number of unboosted events that entered the node (during training), if traininfo defined
Float_t GetNSigEvents(void) const
return the sum of the signal weights in the node, or -1 if traininfo undefined
virtual void SetLeft(Node *l)
void SetTerminal(Bool_t s=kTRUE)
void SetResponse(Float_t r)
set the response of the node (for regression)
void SetSampleMax(UInt_t ivar, Float_t xmax)
set the maximum of variable ivar from the training sample that pass/end up in this node,...
void SetNBValidation(Double_t b)
set number of background events from the pruning validation sample, if traininfo defined
void IncrementNEvents(Float_t nev)
void SetPurity(void)
return the S/(S+B) (purity) for the node REM: even if nodes with purity 0.01 are very PURE background...
void SetSubTreeR(Double_t r)
set the resubstitution estimate, R(T_t), of the tree rooted at this node, if traininfo defined
void AddToSumTarget2(Float_t t2)
add to sum target 2, if traininfo defined
virtual DecisionTreeNode * GetLeft() const
Double_t GetNodeR() const
return the node resubstitution estimate, R(t), for Cost Complexity pruning, or -1 if traininfo undefi...
virtual Bool_t GoesRight(const Event &) const
test event if it descends the tree at this node to the right
void SetNFisherCoeff(Int_t nvars)
Short_t GetSelector() const
return index of variable used for discrimination at this node
void SetNSigEvents(Float_t s)
set the sum of the signal weights in the node, if traininfo defined
Float_t GetResponse(void) const
return the response of the node (for regression)
Float_t GetCutValue(void) const
return the cut value applied at this node
Int_t GetNodeType(void) const
return node type: 1 signal node, -1 bkg leave, 0 intermediate Node
void IncrementNBkgEvents_unweighted()
increment the sum of the backgr weights in the node, if traininfo defined
void SetNSigEvents_unweighted(Float_t s)
set the sum of the unweighted signal events in the node, if traininfo defined
Double_t GetNBValidation() const
return number of background events from the pruning validation sample, or -1 if traininfo undefined
void SetAlpha(Double_t alpha)
set the critical point alpha, if traininfo defined
void SetSeparationIndex(Float_t sep)
set the chosen index, measure of "purity" (separation between S and B) AT this node,...
virtual void SetRight(Node *r)
void SetRMS(Float_t r)
set the RMS of the response of the node (for regression)
void IncrementNSigEvents_unweighted()
increment the sum of the signal weights in the node, if traininfo defined
void SetNBkgEvents_unboosted(Float_t b)
set the sum of the unboosted backgr events in the node, if traininfo defined
Float_t GetPurity(void) const
return S/(S+B) (purity) at this node (from training)
void IncrementNSigEvents(Float_t s)
increment the sum of the signal weights in the node, if traininfo defined
Float_t GetSampleMax(UInt_t ivar) const
return the maximum of variable ivar from the training sample that pass/end up in this node,...
void SetCutValue(Float_t c)
set the cut value applied at this node
Float_t GetNBkgEvents(void) const
return the sum of the backgr weights in the node, or -1 if traininfo undefined
Float_t GetSampleMin(UInt_t ivar) const
return the minimum of variable ivar from the training sample that pass/end up in this node,...
void SetSampleMin(UInt_t ivar, Float_t xmin)
set the minimum of variable ivar from the training sample that pass/end up in this node,...
void SetSelector(Short_t i)
set index of variable used for discrimination at this node
virtual DecisionTreeNode * GetParent() const
void SetNBkgEvents_unweighted(Float_t b)
set the sum of the unweighted backgr events in the node, if traininfo defined
void SetNSValidation(Double_t s)
set number of signal events from the pruning validation sample, if traininfo defined
void AddToSumTarget(Float_t t)
add to sum target, if traininfo defined
void SetNTerminal(Int_t n)
set number of terminal nodes in the subtree rooted here, if traininfo defined
void SetNEvents(Float_t nev)
set the number of events that entered the node (during training), if traininfo defined
virtual DecisionTreeNode * GetRight() const
Implementation of a Decision Tree.
UInt_t BuildTree(const EventConstList &eventSample, DecisionTreeNode *node=nullptr)
building the decision tree by recursively calling the splitting of one (root-) node into two daughter...
void FillTree(const EventList &eventSample)
fill the existing the decision tree structure by filling event in from the top node and see where the...
void PruneNode(TMVA::DecisionTreeNode *node)
prune away the subtree below the node
void ApplyValidationSample(const EventConstList *validationSample) const
run the validation sample through the (pruned) tree and fill in the nodes the variables NSValidation ...
Double_t TrainNodeFull(const EventConstList &eventSample, DecisionTreeNode *node)
train a node by finding the single optimal cut for a single variable that best separates signal and b...
TMVA::DecisionTreeNode * GetEventNode(const TMVA::Event &e) const
get the pointer to the leaf node where a particular event ends up in... (used in gradient boosting)
void GetRandomisedVariables(Bool_t *useVariable, UInt_t *variableMap, UInt_t &nVars)
void SetParentTreeInNodes(Node *n=nullptr)
descend a tree to find all its leaf nodes, fill max depth reached in the tree at the same time.
void DescendTree(Node *n=nullptr)
descend a tree to find all its leaf nodes
static DecisionTree * CreateFromXML(void *node, UInt_t tmva_Version_Code=262657)
re-create a new tree (decision tree or search tree) from XML
std::vector< const TMVA::Event * > EventConstList
Double_t CheckEvent(const TMVA::Event *, Bool_t UseYesNoLeaf=kFALSE) const
the event e is put into the decision tree (starting at the root node) and the output is NodeType (sig...
static const Int_t fgRandomSeed
Int_t fNCuts
number of grid point in variable cut scans
UInt_t CleanTree(DecisionTreeNode *node=nullptr)
remove those last splits that result in two leaf nodes that are both of the type (i....
virtual ~DecisionTree(void)
destructor
Types::EAnalysisType fAnalysisType
kClassification(=0=false) or kRegression(=1=true)
std::vector< Double_t > GetVariableImportance()
Return the relative variable importance, normalized to all variables together having the importance 1...
void CheckEventWithPrunedTree(const TMVA::Event *) const
pass a single validation event through a pruned decision tree on the way down the tree,...
void PruneNodeInPlace(TMVA::DecisionTreeNode *node)
prune a node temporarily (without actually deleting its descendants which allows testing the pruned t...
Double_t TestPrunedTreeQuality(const DecisionTreeNode *dt=nullptr, Int_t mode=0) const
return the misclassification rate of a pruned tree a "pruned tree" may have set the variable "IsTermi...
Double_t PruneTree(const EventConstList *validationSample=nullptr)
prune (get rid of internal nodes) the Decision tree to avoid overtraining several different pruning m...
void FillEvent(const TMVA::Event &event, TMVA::DecisionTreeNode *node)
fill the existing the decision tree structure by filling event in from the top node and see where the...
void ClearTree()
clear the tree nodes (their S/N, Nevents etc), just keep the structure of the tree
Double_t SamplePurity(EventList eventSample)
calculates the purity S/(S+B) of a given event sample
Node * GetNode(ULong_t sequence, UInt_t depth)
retrieve node from the tree.
std::vector< Double_t > GetFisherCoefficients(const EventConstList &eventSample, UInt_t nFisherVars, UInt_t *mapVarInFisher)
calculate the fisher coefficients for the event sample and the variables used
UInt_t CountLeafNodes(TMVA::Node *n=nullptr)
return the number of terminal nodes in the sub-tree below Node n
Double_t TrainNodeFast(const EventConstList &eventSample, DecisionTreeNode *node)
Decide how to split a node using one of the variables that gives the best separation of signal/backgr...
RegressionVariance * fRegType
the separation criteria used in Regression
DecisionTree(void)
default constructor using the GiniIndex as separation criterion, no restrictions on minium number of ...
Double_t GetSumWeights(const EventConstList *validationSample) const
calculate the normalization factor for a pruning validation sample
Double_t GetWeight() const
return the event weight - depending on whether the flag IgnoreNegWeightsInTraining is or not.
Node for the BinarySearch or Decision Trees.
Calculate the "SeparationGain" for Regression analysis separation criteria used in various training a...
An interface to calculate the "SeparationGain" for different separation criteria used in various trai...
Singleton class for Global types used by TMVA.
Random number generator class based on M.
TSeq< unsigned int > TSeqU
MsgLogger & Endl(MsgLogger &ml)
Short_t Max(Short_t a, Short_t b)
Returns the largest of a and b.
Double_t Log(Double_t x)
Returns the natural logarithm of x.
Double_t Sqrt(Double_t x)
Returns the square root of x.
Short_t Min(Short_t a, Short_t b)
Returns the smallest of a and b.
Short_t Abs(Short_t d)
Returns the absolute value of parameter Short_t d.
BuildNodeInfo(Int_t fNvars, std::vector< Float_t > &inxmin, std::vector< Float_t > &inxmax)
std::vector< Float_t > xmin
BuildNodeInfo operator+(const BuildNodeInfo &other)
std::vector< Float_t > xmax
BuildNodeInfo(Int_t fNvars, const TMVA::Event *evt)
std::vector< std::vector< Double_t > > target2
std::vector< std::vector< Double_t > > nSelB_unWeighted
std::vector< std::vector< Double_t > > nSelB
std::vector< std::vector< Double_t > > target
std::vector< std::vector< Double_t > > nSelS_unWeighted
TrainNodeInfo operator+(const TrainNodeInfo &other)
std::vector< std::vector< Double_t > > nSelS
TrainNodeInfo(Int_t cNvars_, UInt_t *nBins_)
static uint64_t sum(uint64_t i)