96 return std::abs(
x-
y) < std::numeric_limits<float>::epsilon() * std::abs(
x+
y) * ulp
98 || std::abs(
x-
y) < std::numeric_limits<float>::min();
104 return std::abs(
x-
y) < std::numeric_limits<double>::epsilon() * std::abs(
x+
y) * ulp
106 || std::abs(
x-
y) < std::numeric_limits<double>::min();
178 if (sepType == NULL) {
185 Log() << kWARNING <<
" You had chosen the training mode using optimal cuts, not\n"
186 <<
" based on a grid of " <<
fNCuts <<
" by setting the option NCuts < 0\n"
187 <<
" as this doesn't exist yet, I set it to " <<
fNCuts <<
" and use the grid"
252 Log() << kFATAL <<
"SetParentTreeNodes: started with undefined ROOT node" <<
Endl;
258 Log() << kFATAL <<
" Node with only one daughter?? Something went wrong" <<
Endl;
261 Log() << kFATAL <<
" Node with only one daughter?? Something went wrong" <<
Endl;
272 n->SetParentTree(
this);
281 std::string type(
"");
285 dt->
ReadXML( node, tmva_Version_Code );
305 for (
Int_t ivar=0; ivar<fNvars; ivar++) {
319 for (
Int_t ivar=0; ivar<fNvars; ivar++) {
320 xmin[ivar]=inxmin[ivar];
321 xmax[ivar]=inxmax[ivar];
345 std::cout <<
"!!! ERROR BuildNodeInfo1+BuildNodeInfo2 failure. Nvars1 != Nvars2." << std::endl;
387 this->
GetRoot()->SetParentTree(
this);
390 Log() << kDEBUG <<
"\tThe minimal node size MinNodeSize=" <<
fMinNodeSize <<
" fMinNodeSize="<<
fMinNodeSize<<
"% is translated to an actual number of events = "<<
fMinSize<<
" for the training sample size of " << eventSample.size() <<
Endl;
391 Log() << kDEBUG <<
"\tNote: This number will be taken as absolute minimum in the node, " <<
Endl;
392 Log() << kDEBUG <<
" \tin terms of 'weighted events' and unweighted ones !! " <<
Endl;
396 UInt_t nevents = eventSample.size();
399 if (
fNvars==0)
fNvars = eventSample[0]->GetNVariables();
402 else Log() << kFATAL <<
":<BuildTree> eventsample Size == 0 " <<
Endl;
413 auto f = [
this, &eventSample, &nPartitions](
UInt_t partition = 0){
415 Int_t start = 1.0*partition/nPartitions*eventSample.size();
416 Int_t end = (partition+1.0)/nPartitions*eventSample.size();
425 nodeInfof.
s += weight;
427 nodeInfof.
sub += orgWeight;
430 nodeInfof.
b += weight;
432 nodeInfof.
bub += orgWeight;
436 nodeInfof.
target +=weight*tgt;
437 nodeInfof.
target2+=weight*tgt*tgt;
444 nodeInfof.
xmin[ivar]=val;
445 nodeInfof.
xmax[ivar]=val;
447 if (val < nodeInfof.
xmin[ivar]) nodeInfof.
xmin[ivar]=val;
448 if (val > nodeInfof.
xmax[ivar]) nodeInfof.
xmax[ivar]=val;
458 auto redfunc = [nodeInfoInit](std::vector<BuildNodeInfo>
v) ->
BuildNodeInfo {
return std::accumulate(
v.begin(),
v.end(), nodeInfoInit); };
462 if (nodeInfo.
s+nodeInfo.
b < 0) {
463 Log() << kWARNING <<
" One of the Decision Tree nodes has negative total number of signal or background events. "
464 <<
"(Nsig="<<nodeInfo.
s<<
" Nbkg="<<nodeInfo.
b<<
" Probaby you use a Monte Carlo with negative weights. That should in principle "
465 <<
"be fine as long as on average you end up with something positive. For this you have to make sure that the "
466 <<
"minimal number of (unweighted) events demanded for a tree node (currently you use: MinNodeSize="<<
fMinNodeSize
467 <<
"% of training events, you can set this via the BDT option string when booking the classifier) is large enough "
468 <<
"to allow for reasonable averaging!!!" <<
Endl
469 <<
" If this does not help.. maybe you want to try the option: NoNegWeightsInTraining which ignores events "
470 <<
"with negative weight in the training." <<
Endl;
472 for (
UInt_t i=0; i<eventSample.size(); i++) {
473 if (eventSample[i]->GetClass() !=
fSigClass) {
474 nBkg += eventSample[i]->GetWeight();
475 Log() << kDEBUG <<
"Event "<< i<<
" has (original) weight: " << eventSample[i]->GetWeight()/eventSample[i]->GetBoostWeight()
476 <<
" boostWeight: " << eventSample[i]->GetBoostWeight() <<
Endl;
479 Log() << kDEBUG <<
" that gives in total: " << nBkg<<
Endl;
525 if (separationGain < std::numeric_limits<double>::epsilon()) {
547 std::vector<const TMVA::Event*> leftSample; leftSample.reserve(nevents);
548 std::vector<const TMVA::Event*> rightSample; rightSample.reserve(nevents);
551 Double_t nRightUnBoosted=0, nLeftUnBoosted=0;
553 for (
UInt_t ie=0; ie< nevents ; ie++) {
555 rightSample.push_back(eventSample[ie]);
556 nRight += eventSample[ie]->GetWeight();
557 nRightUnBoosted += eventSample[ie]->GetOriginalWeight();
560 leftSample.push_back(eventSample[ie]);
561 nLeft += eventSample[ie]->GetWeight();
562 nLeftUnBoosted += eventSample[ie]->GetOriginalWeight();
566 if (leftSample.empty() || rightSample.empty()) {
568 Log() << kERROR <<
"<TrainNode> all events went to the same branch" <<
Endl
569 <<
"--- Hence new node == old node ... check" <<
Endl
570 <<
"--- left:" << leftSample.size()
571 <<
" right:" << rightSample.size() <<
Endl
572 <<
" while the separation is thought to be " << separationGain
573 <<
"\n when cutting on variable " << node->
GetSelector()
575 << kFATAL <<
"--- this should never happen, please write a bug report to Helge.Voss@cern.ch" <<
Endl;
642 this->GetRoot()->SetPos(
's');
643 this->GetRoot()->SetDepth(0);
644 this->GetRoot()->SetParentTree(
this);
645 fMinSize = fMinNodeSize/100. * eventSample.size();
647 Log() << kDEBUG <<
"\tThe minimal node size MinNodeSize=" << fMinNodeSize <<
" fMinNodeSize="<<fMinNodeSize<<
"% is translated to an actual number of events = "<< fMinSize<<
" for the training sample size of " << eventSample.size() <<
Endl;
648 Log() << kDEBUG <<
"\tNote: This number will be taken as absolute minimum in the node, " <<
Endl;
649 Log() << kDEBUG <<
" \tin terms of 'weighted events' and unweighted ones !! " <<
Endl;
653 UInt_t nevents = eventSample.size();
656 if (fNvars==0) fNvars = eventSample[0]->GetNVariables();
657 fVariableImportance.resize(fNvars);
659 else Log() <<
kFATAL <<
":<BuildTree> eventsample Size == 0 " <<
Endl;
669 for (
UInt_t ivar=0; ivar<fNvars; ivar++) {
675 for (
UInt_t iev=0; iev<eventSample.size(); iev++) {
676 const TMVA::Event* evt = eventSample[iev];
689 if ( DoRegression() ) {
692 target2+=weight*tgt*tgt;
696 for (
UInt_t ivar=0; ivar<fNvars; ivar++) {
698 if (iev==0)
xmin[ivar]=
xmax[ivar]=val;
699 if (val <
xmin[ivar])
xmin[ivar]=val;
700 if (val >
xmax[ivar])
xmax[ivar]=val;
706 Log() <<
kWARNING <<
" One of the Decision Tree nodes has negative total number of signal or background events. "
707 <<
"(Nsig="<<s<<
" Nbkg="<<
b<<
" Probaby you use a Monte Carlo with negative weights. That should in principle "
708 <<
"be fine as long as on average you end up with something positive. For this you have to make sure that the "
709 <<
"minimul number of (unweighted) events demanded for a tree node (currently you use: MinNodeSize="<<fMinNodeSize
710 <<
"% of training events, you can set this via the BDT option string when booking the classifier) is large enough "
711 <<
"to allow for reasonable averaging!!!" <<
Endl
712 <<
" If this does not help.. maybe you want to try the option: NoNegWeightsInTraining which ignores events "
713 <<
"with negative weight in the training." <<
Endl;
715 for (
UInt_t i=0; i<eventSample.size(); i++) {
716 if (eventSample[i]->
GetClass() != fSigClass) {
717 nBkg += eventSample[i]->GetWeight();
718 Log() <<
kDEBUG <<
"Event "<< i<<
" has (original) weight: " << eventSample[i]->GetWeight()/eventSample[i]->GetBoostWeight()
719 <<
" boostWeight: " << eventSample[i]->GetBoostWeight() <<
Endl;
732 if (node == this->GetRoot()) {
739 for (
UInt_t ivar=0; ivar<fNvars; ivar++) {
758 if ((eventSample.size() >= 2*fMinSize && s+
b >= 2*fMinSize) && node->
GetDepth() < fMaxDepth
759 && ( ( s!=0 &&
b !=0 && !DoRegression()) || ( (s+
b)!=0 && DoRegression()) ) ) {
762 separationGain = this->TrainNodeFast(eventSample, node);
764 separationGain = this->TrainNodeFull(eventSample, node);
766 if (separationGain < std::numeric_limits<double>::epsilon()) {
770 if (DoRegression()) {
785 if (node->
GetDepth() > this->GetTotalTreeDepth()) this->SetTotalTreeDepth(node->
GetDepth());
789 std::vector<const TMVA::Event*> leftSample; leftSample.reserve(nevents);
790 std::vector<const TMVA::Event*> rightSample; rightSample.reserve(nevents);
793 Double_t nRightUnBoosted=0, nLeftUnBoosted=0;
795 for (
UInt_t ie=0; ie< nevents ; ie++) {
797 rightSample.push_back(eventSample[ie]);
798 nRight += eventSample[ie]->GetWeight();
799 nRightUnBoosted += eventSample[ie]->GetOriginalWeight();
802 leftSample.push_back(eventSample[ie]);
803 nLeft += eventSample[ie]->GetWeight();
804 nLeftUnBoosted += eventSample[ie]->GetOriginalWeight();
809 if (leftSample.empty() || rightSample.empty()) {
811 Log() <<
kERROR <<
"<TrainNode> all events went to the same branch" <<
Endl
812 <<
"--- Hence new node == old node ... check" <<
Endl
813 <<
"--- left:" << leftSample.size()
814 <<
" right:" << rightSample.size() <<
Endl
815 <<
" while the separation is thought to be " << separationGain
816 <<
"\n when cutting on variable " << node->
GetSelector()
818 <<
kFATAL <<
"--- this should never happen, please write a bug report to Helge.Voss@cern.ch" <<
Endl;
822 TMVA::DecisionTreeNode *rightNode =
new TMVA::DecisionTreeNode(node,
'r');
828 TMVA::DecisionTreeNode *leftNode =
new TMVA::DecisionTreeNode(node,
'l');
839 this->BuildTree(rightSample, rightNode);
840 this->BuildTree(leftSample, leftNode );
845 if (DoRegression()) {
866 if (node->
GetDepth() > this->GetTotalTreeDepth()) this->SetTotalTreeDepth(node->
GetDepth());
881 for (
UInt_t i=0; i<eventSample.size(); i++) {
924 if (this->
GetRoot()!=NULL) this->
GetRoot()->ClearNodeAndAllDaughters();
948 if (
l->GetNodeType() *
r->GetNodeType() > 0) {
978 Log() << kFATAL <<
"Selected pruning method not yet implemented "
982 if(!tool)
return 0.0;
986 if(validationSample == NULL){
987 Log() << kFATAL <<
"Cannot automate the pruning algorithm without an "
988 <<
"independent validation sample!" <<
Endl;
989 }
else if(validationSample->size() == 0) {
990 Log() << kFATAL <<
"Cannot automate the pruning algorithm with "
991 <<
"independent validation sample of ZERO events!" <<
Endl;
998 Log() << kFATAL <<
"Error pruning tree! Check prune.log for more information."
1018 return pruneStrength;
1030 GetRoot()->ResetValidationData();
1031 for (
UInt_t ievt=0; ievt < validationSample->size(); ievt++) {
1047 Log() << kFATAL <<
"TestPrunedTreeQuality: started with undefined ROOT node" <<
Endl;
1052 if(
n->GetLeft() != NULL &&
n->GetRight() != NULL && !
n->IsTerminal() ) {
1058 Double_t sumw =
n->GetNSValidation() +
n->GetNBValidation();
1059 return n->GetSumTarget2() - 2*
n->GetSumTarget()*
n->GetResponse() + sumw*
n->GetResponse()*
n->GetResponse();
1063 if (
n->GetPurity() > this->GetNodePurityLimit())
1064 return n->GetNBValidation();
1066 return n->GetNSValidation();
1068 else if ( mode == 1 ) {
1070 return (
n->GetPurity() *
n->GetNBValidation() + (1.0 -
n->GetPurity()) *
n->GetNSValidation());
1073 throw std::string(
"Unknown ValidationQualityMode");
1087 if (current == NULL) {
1088 Log() << kFATAL <<
"CheckEventWithPrunedTree: started with undefined ROOT node" <<
Endl;
1091 while(current != NULL) {
1097 if (
e->GetNTargets() > 0) {
1120 for( EventConstList::const_iterator it = validationSample->begin();
1121 it != validationSample->end(); ++it ) {
1122 sumWeights += (*it)->GetWeight();
1135 Log() << kFATAL <<
"CountLeafNodes: started with undefined ROOT node" <<
Endl;
1164 Log() << kFATAL <<
"DescendTree: started with undefined ROOT node" <<
Endl;
1173 Log() << kFATAL <<
" Node with only one daughter?? Something went wrong" <<
Endl;
1177 Log() << kFATAL <<
" Node with only one daughter?? Something went wrong" <<
Endl;
1217 if(node == NULL)
return;
1220 node->
SetAlpha( std::numeric_limits<double>::infinity( ) );
1234 for (
UInt_t i =0; i < depth; i++) {
1255 UInt_t nSelectedVars = 0;
1256 while (nSelectedVars < useNvars) {
1261 if (useVariable[ivar] ==
kTRUE) {
1262 mapVariable[nSelectedVars] = ivar;
1267 if (nSelectedVars != useNvars) { std::cout <<
"Bug in TrainNode - GetRandisedVariables()... sorry" << std::endl; std::exit(1);}
1284 nSelS = std::vector< std::vector<Double_t> >(
cNvars);
1285 nSelB = std::vector< std::vector<Double_t> >(
cNvars);
1292 nSelS[ivar] = std::vector<Double_t>(
nBins[ivar], 0);
1293 nSelB[ivar] = std::vector<Double_t>(
nBins[ivar], 0);
1296 target[ivar] = std::vector<Double_t>(
nBins[ivar], 0);
1319 std::vector< std::vector<Double_t> >
nSelS;
1320 std::vector< std::vector<Double_t> >
nSelB;
1335 std::cout <<
"!!! ERROR TrainNodeInfo1+TrainNodeInfo2 failure. cNvars1 != cNvars2." << std::endl;
1342 ret.nSelS[ivar][ibin] =
nSelS[ivar][ibin] + other.
nSelS[ivar][ibin];
1343 ret.nSelB[ivar][ibin] =
nSelB[ivar][ibin] + other.
nSelB[ivar][ibin];
1346 ret.target[ivar][ibin] =
target[ivar][ibin] + other.
target[ivar][ibin];
1383 separationGain[ivar]=-1;
1389 UInt_t nevents = eventSample.size();
1397 std::vector<Double_t> fisherCoeff;
1406 useVariable[ivar] =
kTRUE;
1407 mapVariable[ivar] = ivar;
1423 useVarInFisher[ivar] =
kFALSE;
1424 mapVarInFisher[ivar] = ivar;
1427 std::vector<TMatrixDSym*>* covMatrices;
1430 Log() << kWARNING <<
" in TrainNodeFast, the covariance Matrices needed for the Fisher-Cuts returned error --> revert to just normal cuts for this node" <<
Endl;
1442 useVarInFisher[ivar] =
kTRUE;
1443 useVarInFisher[jvar] =
kTRUE;
1454 if (useVarInFisher[ivar] && useVariable[ivar]) {
1455 mapVarInFisher[nFisherVars++]=ivar;
1466 delete [] useVarInFisher;
1467 delete [] mapVarInFisher;
1489 for (
UInt_t ivar=0; ivar<cNvars; ivar++) {
1493 if (
fDataSetInfo->GetVariableInfo(ivar).GetVarType() ==
'I') {
1498 cutValues[ivar] =
new Double_t [nBins[ivar]];
1502 for (
UInt_t ivar=0; ivar < cNvars; ivar++) {
1509 useVariable[ivar]=
kFALSE;
1517 for (
UInt_t iev=0; iev<nevents; iev++) {
1521 result += fisherCoeff[jvar]*(eventSample[iev])->GetValueFast(jvar);
1522 if (result >
xmax[ivar])
xmax[ivar]=result;
1523 if (result <
xmin[ivar])
xmin[ivar]=result;
1527 for (
UInt_t ibin=0; ibin<nBins[ivar]; ibin++) {
1528 cutValues[ivar][ibin]=0;
1542 auto fvarInitCuts = [
this, &useVariable, &cutValues, &invBinWidth, &binWidth, &nBins, &
xmin, &
xmax](
UInt_t ivar = 0){
1544 if ( useVariable[ivar] ) {
1558 invBinWidth[ivar] = 1./binWidth[ivar];
1560 if (
fDataSetInfo->GetVariableInfo(ivar).GetVarType() ==
'I') { invBinWidth[ivar] = 1; binWidth[ivar] = 1; }
1568 for (
UInt_t icut=0; icut<nBins[ivar]-1; icut++) {
1569 cutValues[ivar][icut]=
xmin[ivar]+(
Double_t(icut+1))*binWidth[ivar];
1586 if(eventSample.size() >= cNvars*
fNCuts*nPartitions*2)
1591 auto f = [
this, &eventSample, &fisherCoeff, &useVariable, &invBinWidth,
1592 &nBins, &
xmin, &cNvars, &nPartitions](
UInt_t partition = 0){
1594 UInt_t start = 1.0*partition/nPartitions*eventSample.size();
1595 UInt_t end = (partition+1.0)/nPartitions*eventSample.size();
1601 Double_t eventWeight = eventSample[iev]->GetWeight();
1602 if (eventSample[iev]->GetClass() ==
fSigClass) {
1603 nodeInfof.
nTotS+=eventWeight;
1606 nodeInfof.
nTotB+=eventWeight;
1612 for (
UInt_t ivar=0; ivar < cNvars; ivar++) {
1615 if ( useVariable[ivar] ) {
1617 if (ivar <
fNvars) eventData = eventSample[iev]->GetValueFast(ivar);
1619 eventData = fisherCoeff[
fNvars];
1621 eventData += fisherCoeff[jvar]*(eventSample[iev])->GetValueFast(jvar);
1627 if (eventSample[iev]->GetClass() ==
fSigClass) {
1628 nodeInfof.
nSelS[ivar][iBin]+=eventWeight;
1632 nodeInfof.
nSelB[ivar][iBin]+=eventWeight;
1636 nodeInfof.
target[ivar][iBin] +=eventWeight*eventSample[iev]->GetTarget(0);
1637 nodeInfof.
target2[ivar][iBin]+=eventWeight*eventSample[iev]->GetTarget(0)*eventSample[iev]->GetTarget(0);
1649 auto redfunc = [nodeInfoInit](std::vector<TrainNodeInfo>
v) ->
TrainNodeInfo {
return std::accumulate(
v.begin(),
v.end(), nodeInfoInit); };
1658 auto fvarFillNodeInfo = [
this, &nodeInfo, &eventSample, &fisherCoeff, &useVariable, &invBinWidth, &nBins, &
xmin](
UInt_t ivar = 0){
1660 for(
UInt_t iev=0; iev<eventSample.size(); iev++) {
1663 Double_t eventWeight = eventSample[iev]->GetWeight();
1667 if (eventSample[iev]->GetClass() ==
fSigClass) {
1668 nodeInfo.
nTotS+=eventWeight;
1671 nodeInfo.
nTotB+=eventWeight;
1677 if ( useVariable[ivar] ) {
1679 if (ivar <
fNvars) eventData = eventSample[iev]->GetValueFast(ivar);
1681 eventData = fisherCoeff[
fNvars];
1683 eventData += fisherCoeff[jvar]*(eventSample[iev])->GetValueFast(jvar);
1689 if (eventSample[iev]->GetClass() ==
fSigClass) {
1690 nodeInfo.
nSelS[ivar][iBin]+=eventWeight;
1694 nodeInfo.
nSelB[ivar][iBin]+=eventWeight;
1698 nodeInfo.
target[ivar][iBin] +=eventWeight*eventSample[iev]->GetTarget(0);
1699 nodeInfo.
target2[ivar][iBin]+=eventWeight*eventSample[iev]->GetTarget(0)*eventSample[iev]->GetTarget(0);
1712 auto fvarCumulative = [&nodeInfo, &useVariable, &nBins,
this, &eventSample](
UInt_t ivar = 0){
1713 if (useVariable[ivar]) {
1714 for (
UInt_t ibin=1; ibin < nBins[ivar]; ibin++) {
1715 nodeInfo.
nSelS[ivar][ibin]+=nodeInfo.
nSelS[ivar][ibin-1];
1717 nodeInfo.
nSelB[ivar][ibin]+=nodeInfo.
nSelB[ivar][ibin-1];
1720 nodeInfo.
target[ivar][ibin] +=nodeInfo.
target[ivar][ibin-1] ;
1725 Log() << kFATAL <<
"Helge, you have a bug ....nodeInfo.nSelS_unw..+nodeInfo.nSelB_unw..= "
1727 <<
" while eventsample size = " << eventSample.size()
1730 double lastBins=nodeInfo.
nSelS[ivar][nBins[ivar]-1] +nodeInfo.
nSelB[ivar][nBins[ivar]-1];
1731 double totalSum=nodeInfo.
nTotS+nodeInfo.
nTotB;
1732 if (
TMath::Abs(lastBins-totalSum)/totalSum>0.01) {
1733 Log() << kFATAL <<
"Helge, you have another bug ....nodeInfo.nSelS+nodeInfo.nSelB= "
1735 <<
" while total number of events = " << totalSum
1746 auto fvarMaxSep = [&nodeInfo, &useVariable,
this, &separationGain, &cutIndex, &nBins] (
UInt_t ivar = 0){
1747 if (useVariable[ivar]) {
1749 for (
UInt_t iBin=0; iBin<nBins[ivar]-1; iBin++) {
1779 sepTmp =
fRegType->GetSeparationGain(nodeInfo.
nSelS[ivar][iBin]+nodeInfo.
nSelB[ivar][iBin],
1782 nodeInfo.
target[ivar][nBins[ivar]-1],nodeInfo.
target2[ivar][nBins[ivar]-1]);
1786 if (separationGain[ivar] < sepTmp) {
1787 separationGain[ivar] = sepTmp;
1788 cutIndex[ivar] = iBin;
1798 for (
UInt_t ivar=0; ivar < cNvars; ivar++) {
1799 if (useVariable[ivar] ) {
1800 if (separationGainTotal < separationGain[ivar]) {
1801 separationGainTotal = separationGain[ivar];
1821 if (nodeInfo.
nSelS[mxVar][cutIndex[mxVar]]/nodeInfo.
nTotS > nodeInfo.
nSelB[mxVar][cutIndex[mxVar]]/nodeInfo.
nTotB) cutType=
kTRUE;
1826 node->
SetCutValue(cutValues[mxVar][cutIndex[mxVar]]);
1848 separationGainTotal = 0;
1854 for (
UInt_t i=0; i<cNvars; i++) {
1861 delete [] cutValues[i];
1872 delete [] cutValues;
1877 delete [] useVariable;
1878 delete [] mapVariable;
1880 delete [] separationGain;
1885 delete [] invBinWidth;
1887 return separationGainTotal;
1896 Double_t separationGainTotal = -1, sepTmp;
1901 for (
UInt_t ivar=0; ivar <= fNvars; ivar++) {
1902 separationGain[ivar]=-1;
1909 Int_t nTotS_unWeighted, nTotB_unWeighted;
1910 UInt_t nevents = eventSample.size();
1918 std::vector<Double_t> fisherCoeff;
1921 if (fRandomisedTree) {
1923 GetRandomisedVariables(useVariable,mapVariable,tmp);
1926 for (
UInt_t ivar=0; ivar < fNvars; ivar++) {
1927 useVariable[ivar] =
kTRUE;
1928 mapVariable[ivar] = ivar;
1932 useVariable[fNvars] =
kFALSE;
1936 if (fUseFisherCuts) {
1937 useVariable[fNvars] =
kTRUE;
1943 for (
UInt_t ivar=0; ivar < fNvars; ivar++) {
1944 useVarInFisher[ivar] =
kFALSE;
1945 mapVarInFisher[ivar] = ivar;
1948 std::vector<TMatrixDSym*>* covMatrices;
1951 Log() <<
kWARNING <<
" in TrainNodeFast, the covariance Matrices needed for the Fisher-Cuts returned error --> revert to just normal cuts for this node" <<
Endl;
1959 for (
UInt_t ivar=0; ivar < fNvars; ivar++) {
1960 for (
UInt_t jvar=ivar+1; jvar < fNvars; jvar++) {
1961 if ( (
TMath::Abs( (*s)(ivar, jvar)) > fMinLinCorrForFisher) ||
1962 (
TMath::Abs( (*
b)(ivar, jvar)) > fMinLinCorrForFisher) ){
1963 useVarInFisher[ivar] =
kTRUE;
1964 useVarInFisher[jvar] =
kTRUE;
1972 for (
UInt_t ivar=0; ivar < fNvars; ivar++) {
1975 if (useVarInFisher[ivar] && useVariable[ivar]) {
1976 mapVarInFisher[nFisherVars++]=ivar;
1979 if (fUseExclusiveVars) useVariable[ivar] =
kFALSE;
1984 fisherCoeff = this->GetFisherCoefficients(eventSample, nFisherVars, mapVarInFisher);
1987 delete [] useVarInFisher;
1988 delete [] mapVarInFisher;
1995 if (fUseFisherCuts && fisherOK) cNvars++;
2012 for (
UInt_t ivar=0; ivar<cNvars; ivar++) {
2014 nBins[ivar] = fNCuts+1;
2015 if (ivar < fNvars) {
2016 if (fDataSetInfo->GetVariableInfo(ivar).GetVarType() ==
'I') {
2023 nSelS[ivar] =
new Double_t [nBins[ivar]];
2024 nSelB[ivar] =
new Double_t [nBins[ivar]];
2025 nSelS_unWeighted[ivar] =
new Double_t [nBins[ivar]];
2026 nSelB_unWeighted[ivar] =
new Double_t [nBins[ivar]];
2027 target[ivar] =
new Double_t [nBins[ivar]];
2028 target2[ivar] =
new Double_t [nBins[ivar]];
2029 cutValues[ivar] =
new Double_t [nBins[ivar]];
2038 for (
UInt_t ivar=0; ivar < cNvars; ivar++) {
2045 useVariable[ivar]=
kFALSE;
2053 for (
UInt_t iev=0; iev<nevents; iev++) {
2055 Double_t result = fisherCoeff[fNvars];
2056 for (
UInt_t jvar=0; jvar<fNvars; jvar++)
2057 result += fisherCoeff[jvar]*(eventSample[iev])->GetValueFast(jvar);
2058 if (result >
xmax[ivar])
xmax[ivar]=result;
2059 if (result <
xmin[ivar])
xmin[ivar]=result;
2062 for (
UInt_t ibin=0; ibin<nBins[ivar]; ibin++) {
2063 nSelS[ivar][ibin]=0;
2064 nSelB[ivar][ibin]=0;
2065 nSelS_unWeighted[ivar][ibin]=0;
2066 nSelB_unWeighted[ivar][ibin]=0;
2067 target[ivar][ibin]=0;
2068 target2[ivar][ibin]=0;
2069 cutValues[ivar][ibin]=0;
2076 for (
UInt_t ivar=0; ivar < cNvars; ivar++) {
2078 if ( useVariable[ivar] ) {
2092 invBinWidth[ivar] = 1./binWidth[ivar];
2093 if (ivar < fNvars) {
2094 if (fDataSetInfo->GetVariableInfo(ivar).GetVarType() ==
'I') { invBinWidth[ivar] = 1; binWidth[ivar] = 1; }
2102 for (
UInt_t icut=0; icut<nBins[ivar]-1; icut++) {
2103 cutValues[ivar][icut]=
xmin[ivar]+(
Double_t(icut+1))*binWidth[ivar];
2111 nTotS_unWeighted=0; nTotB_unWeighted=0;
2112 for (
UInt_t iev=0; iev<nevents; iev++) {
2114 Double_t eventWeight = eventSample[iev]->GetWeight();
2115 if (eventSample[iev]->
GetClass() == fSigClass) {
2117 nTotS_unWeighted++; }
2125 for (
UInt_t ivar=0; ivar < cNvars; ivar++) {
2128 if ( useVariable[ivar] ) {
2130 if (ivar < fNvars) eventData = eventSample[iev]->GetValueFast(ivar);
2132 eventData = fisherCoeff[fNvars];
2133 for (
UInt_t jvar=0; jvar<fNvars; jvar++)
2134 eventData += fisherCoeff[jvar]*(eventSample[iev])->GetValueFast(jvar);
2140 if (eventSample[iev]->
GetClass() == fSigClass) {
2141 nSelS[ivar][iBin]+=eventWeight;
2142 nSelS_unWeighted[ivar][iBin]++;
2145 nSelB[ivar][iBin]+=eventWeight;
2146 nSelB_unWeighted[ivar][iBin]++;
2148 if (DoRegression()) {
2149 target[ivar][iBin] +=eventWeight*eventSample[iev]->GetTarget(0);
2150 target2[ivar][iBin]+=eventWeight*eventSample[iev]->GetTarget(0)*eventSample[iev]->GetTarget(0);
2157 for (
UInt_t ivar=0; ivar < cNvars; ivar++) {
2158 if (useVariable[ivar]) {
2159 for (
UInt_t ibin=1; ibin < nBins[ivar]; ibin++) {
2160 nSelS[ivar][ibin]+=nSelS[ivar][ibin-1];
2161 nSelS_unWeighted[ivar][ibin]+=nSelS_unWeighted[ivar][ibin-1];
2162 nSelB[ivar][ibin]+=nSelB[ivar][ibin-1];
2163 nSelB_unWeighted[ivar][ibin]+=nSelB_unWeighted[ivar][ibin-1];
2164 if (DoRegression()) {
2165 target[ivar][ibin] +=target[ivar][ibin-1] ;
2166 target2[ivar][ibin]+=target2[ivar][ibin-1];
2169 if (nSelS_unWeighted[ivar][nBins[ivar]-1] +nSelB_unWeighted[ivar][nBins[ivar]-1] != eventSample.size()) {
2170 Log() <<
kFATAL <<
"Helge, you have a bug ....nSelS_unw..+nSelB_unw..= "
2171 << nSelS_unWeighted[ivar][nBins[ivar]-1] +nSelB_unWeighted[ivar][nBins[ivar]-1]
2172 <<
" while eventsample size = " << eventSample.size()
2175 double lastBins=nSelS[ivar][nBins[ivar]-1] +nSelB[ivar][nBins[ivar]-1];
2176 double totalSum=nTotS+nTotB;
2177 if (
TMath::Abs(lastBins-totalSum)/totalSum>0.01) {
2178 Log() <<
kFATAL <<
"Helge, you have another bug ....nSelS+nSelB= "
2180 <<
" while total number of events = " << totalSum
2188 for (
UInt_t ivar=0; ivar < cNvars; ivar++) {
2189 if (useVariable[ivar]) {
2190 for (
UInt_t iBin=0; iBin<nBins[ivar]-1; iBin++) {
2202 Double_t sl = nSelS_unWeighted[ivar][iBin];
2203 Double_t bl = nSelB_unWeighted[ivar][iBin];
2215 if ( ((sl+bl)>=fMinSize && (sr+br)>=fMinSize)
2216 && ((slW+blW)>=fMinSize && (srW+brW)>=fMinSize)
2219 if (DoRegression()) {
2220 sepTmp = fRegType->GetSeparationGain(nSelS[ivar][iBin]+nSelB[ivar][iBin],
2221 target[ivar][iBin],target2[ivar][iBin],
2223 target[ivar][nBins[ivar]-1],target2[ivar][nBins[ivar]-1]);
2225 sepTmp = fSepType->GetSeparationGain(nSelS[ivar][iBin], nSelB[ivar][iBin], nTotS, nTotB);
2227 if (separationGain[ivar] < sepTmp) {
2228 separationGain[ivar] = sepTmp;
2229 cutIndex[ivar] = iBin;
2237 for (
UInt_t ivar=0; ivar < cNvars; ivar++) {
2238 if (useVariable[ivar] ) {
2239 if (separationGainTotal < separationGain[ivar]) {
2240 separationGainTotal = separationGain[ivar];
2247 if (DoRegression()) {
2248 node->
SetSeparationIndex(fRegType->GetSeparationIndex(nTotS+nTotB,target[0][nBins[mxVar]-1],target2[0][nBins[mxVar]-1]));
2249 node->
SetResponse(target[0][nBins[mxVar]-1]/(nTotS+nTotB));
2250 if (
almost_equal_double(target2[0][nBins[mxVar]-1]/(nTotS+nTotB), target[0][nBins[mxVar]-1]/(nTotS+nTotB)*target[0][nBins[mxVar]-1]/(nTotS+nTotB))) {
2253 node->
SetRMS(
TMath::Sqrt(target2[0][nBins[mxVar]-1]/(nTotS+nTotB) - target[0][nBins[mxVar]-1]/(nTotS+nTotB)*target[0][nBins[mxVar]-1]/(nTotS+nTotB)));
2259 if (nSelS[mxVar][cutIndex[mxVar]]/nTotS > nSelB[mxVar][cutIndex[mxVar]]/nTotB) cutType=
kTRUE;
2264 node->
SetCutValue(cutValues[mxVar][cutIndex[mxVar]]);
2267 if (mxVar < (
Int_t) fNvars){
2269 fVariableImportance[mxVar] += separationGainTotal*separationGainTotal * (nTotS+nTotB) * (nTotS+nTotB) ;
2276 for (
UInt_t ivar=0; ivar<=fNvars; ivar++) {
2280 fVariableImportance[ivar] += fisherCoeff[ivar]*fisherCoeff[ivar]*separationGainTotal*separationGainTotal * (nTotS+nTotB) * (nTotS+nTotB) ;
2286 separationGainTotal = 0;
2302 for (
UInt_t i=0; i<cNvars; i++) {
2305 delete [] nSelS_unWeighted[i];
2306 delete [] nSelB_unWeighted[i];
2307 delete [] target[i];
2308 delete [] target2[i];
2309 delete [] cutValues[i];
2313 delete [] nSelS_unWeighted;
2314 delete [] nSelB_unWeighted;
2317 delete [] cutValues;
2322 delete [] useVariable;
2323 delete [] mapVariable;
2325 delete [] separationGain;
2330 delete [] invBinWidth;
2332 return separationGainTotal;
2342 std::vector<Double_t> fisherCoeff(
fNvars+1);
2365 for (
UInt_t ivar=0; ivar<nFisherVars; ivar++) { sumS[ivar] = sumB[ivar] = 0; }
2367 UInt_t nevents = eventSample.size();
2369 for (
UInt_t ievt=0; ievt<nevents; ievt++) {
2372 const Event * ev = eventSample[ievt];
2377 else sumOfWeightsB += weight;
2380 for (
UInt_t ivar=0; ivar<nFisherVars; ivar++) {
2384 for (
UInt_t ivar=0; ivar<nFisherVars; ivar++) {
2385 (*meanMatx)( ivar, 2 ) = sumS[ivar];
2386 (*meanMatx)( ivar, 0 ) = sumS[ivar]/sumOfWeightsS;
2388 (*meanMatx)( ivar, 2 ) += sumB[ivar];
2389 (*meanMatx)( ivar, 1 ) = sumB[ivar]/sumOfWeightsB;
2392 (*meanMatx)( ivar, 2 ) /= (sumOfWeightsS + sumOfWeightsB);
2404 assert( sumOfWeightsS > 0 && sumOfWeightsB > 0 );
2408 const Int_t nFisherVars2 = nFisherVars*nFisherVars;
2412 memset(sum2Sig,0,nFisherVars2*
sizeof(
Double_t));
2413 memset(sum2Bgd,0,nFisherVars2*
sizeof(
Double_t));
2416 for (
UInt_t ievt=0; ievt<nevents; ievt++) {
2420 const Event* ev = eventSample.at(ievt);
2430 if ( ev->
GetClass() ==
fSigClass ) sum2Sig[k] += ( (xval[
x] - (*meanMatx)(
x, 0))*(xval[
y] - (*meanMatx)(
y, 0)) )*weight;
2431 else sum2Bgd[k] += ( (xval[
x] - (*meanMatx)(
x, 1))*(xval[
y] - (*meanMatx)(
y, 1)) )*weight;
2439 (*with)(
x,
y) = sum2Sig[k]/sumOfWeightsS + sum2Bgd[k]/sumOfWeightsB;
2459 prodSig = ( ((*meanMatx)(
x, 0) - (*meanMatx)(
x, 2))*
2460 ((*meanMatx)(
y, 0) - (*meanMatx)(
y, 2)) );
2461 prodBgd = ( ((*meanMatx)(
x, 1) - (*meanMatx)(
x, 2))*
2462 ((*meanMatx)(
y, 1) - (*meanMatx)(
y, 2)) );
2464 (*betw)(
x,
y) = (sumOfWeightsS*prodSig + sumOfWeightsB*prodBgd) / (sumOfWeightsS + sumOfWeightsB);
2473 (*cov)(
x,
y) = (*with)(
x,
y) + (*betw)(
x,
y);
2488 Log() << kWARNING <<
"FisherCoeff matrix is almost singular with determinant="
2490 <<
" did you use the variables that are linear combinations or highly correlated?"
2494 Log() << kFATAL <<
"FisherCoeff matrix is singular with determinant="
2496 <<
" did you use the variables that are linear combinations?"
2503 Double_t xfact =
TMath::Sqrt( sumOfWeightsS*sumOfWeightsB ) / (sumOfWeightsS + sumOfWeightsB);
2506 std::vector<Double_t> diffMeans( nFisherVars );
2508 for (
UInt_t ivar=0; ivar<=
fNvars; ivar++) fisherCoeff[ivar] = 0;
2509 for (
UInt_t ivar=0; ivar<nFisherVars; ivar++) {
2510 for (
UInt_t jvar=0; jvar<nFisherVars; jvar++) {
2511 Double_t d = (*meanMatx)(jvar, 0) - (*meanMatx)(jvar, 1);
2512 fisherCoeff[mapVarInFisher[ivar]] += invCov(ivar, jvar)*
d;
2516 fisherCoeff[mapVarInFisher[ivar]] *= xfact;
2521 for (
UInt_t ivar=0; ivar<nFisherVars; ivar++){
2522 f0 += fisherCoeff[mapVarInFisher[ivar]]*((*meanMatx)(ivar, 0) + (*meanMatx)(ivar, 1));
2526 fisherCoeff[
fNvars] = f0;
2539 Int_t nTotS_unWeighted = 0, nTotB_unWeighted = 0;
2541 std::vector<TMVA::BDTEventWrapper> bdtEventSample;
2545 std::vector<Double_t> lCutValue(
fNvars, 0.0 );
2546 std::vector<Double_t> lSepGain(
fNvars, -1.0e6 );
2547 std::vector<Char_t> lCutType(
fNvars );
2552 for( std::vector<const TMVA::Event*>::const_iterator it = eventSample.begin(); it != eventSample.end(); ++it ) {
2554 nTotS += (*it)->GetWeight();
2558 nTotB += (*it)->GetWeight();
2564 std::vector<Char_t> useVariable(
fNvars);
2573 Int_t nSelectedVars = 0;
2579 if(useVariable[ivar] ==
Char_t(
kTRUE)) nSelectedVars++;
2587 if(!useVariable[ivar])
continue;
2591 std::sort( bdtEventSample.begin(),bdtEventSample.end() );
2594 Double_t bkgWeightCtr = 0.0, sigWeightCtr = 0.0;
2596 std::vector<TMVA::BDTEventWrapper>::iterator it = bdtEventSample.begin(), it_end = bdtEventSample.end();
2597 for( ; it != it_end; ++it ) {
2599 sigWeightCtr += (**it)->GetWeight();
2601 bkgWeightCtr += (**it)->GetWeight();
2603 it->SetCumulativeWeight(
false,bkgWeightCtr);
2604 it->SetCumulativeWeight(
true,sigWeightCtr);
2610 Double_t separationGain = -1.0, sepTmp = 0.0, cutValue = 0.0, dVal = 0.0, norm = 0.0;
2613 for( it = bdtEventSample.begin(); it != it_end; ++it ) {
2614 if( index == 0 ) { ++index;
continue; }
2615 if( *(*it) == NULL ) {
2616 Log() << kFATAL <<
"In TrainNodeFull(): have a null event! Where index="
2617 << index <<
", and parent node=" << node->
GetParent() <<
Endl;
2620 dVal = bdtEventSample[index].GetVal() - bdtEventSample[index-1].GetVal();
2621 norm =
TMath::Abs(bdtEventSample[index].GetVal() + bdtEventSample[index-1].GetVal());
2624 if( index >=
fMinSize && (nTotS_unWeighted + nTotB_unWeighted) - index >=
fMinSize &&
TMath::Abs(dVal/(0.5*norm + 1)) > fPMin ) {
2626 sepTmp =
fSepType->GetSeparationGain( it->GetCumulativeWeight(
true), it->GetCumulativeWeight(
false), sigWeightCtr, bkgWeightCtr );
2627 if( sepTmp > separationGain ) {
2628 separationGain = sepTmp;
2629 cutValue = it->GetVal() - 0.5*dVal;
2630 Double_t nSelS = it->GetCumulativeWeight(
true);
2631 Double_t nSelB = it->GetCumulativeWeight(
false);
2634 if( nSelS/sigWeightCtr > nSelB/bkgWeightCtr ) cutType =
kTRUE;
2640 lCutType[ivar] =
Char_t(cutType);
2641 lCutValue[ivar] = cutValue;
2642 lSepGain[ivar] = separationGain;
2645 Int_t iVarIndex = -1;
2647 if( lSepGain[ivar] > separationGain ) {
2649 separationGain = lSepGain[ivar];
2654 if(iVarIndex >= 0) {
2659 fVariableImportance[iVarIndex] += separationGain*separationGain * (nTotS+nTotB) * (nTotS+nTotB);
2662 separationGain = 0.0;
2665 return separationGain;
2693 Log() << kFATAL <<
"CheckEvent: started with undefined ROOT node" <<
Endl;
2702 Log() << kFATAL <<
"DT::CheckEvent: inconsistent tree structure" <<
Endl;
2723 Double_t sumsig=0, sumbkg=0, sumtot=0;
2724 for (
UInt_t ievt=0; ievt<eventSample.size(); ievt++) {
2725 if (eventSample[ievt]->GetClass() !=
fSigClass) sumbkg+=eventSample[ievt]->GetWeight();
2726 else sumsig+=eventSample[ievt]->GetWeight();
2727 sumtot+=eventSample[ievt]->GetWeight();
2730 if (sumtot!= (sumsig+sumbkg)){
2731 Log() << kFATAL <<
"<SamplePurity> sumtot != sumsig+sumbkg"
2732 << sumtot <<
" " << sumsig <<
" " << sumbkg <<
Endl;
2734 if (sumtot>0)
return sumsig/(sumsig + sumbkg);
2746 std::vector<Double_t> relativeImportance(
fNvars);
2754 if (
sum > std::numeric_limits<double>::epsilon())
2755 relativeImportance[i] /=
sum;
2757 relativeImportance[i] = 0;
2759 return relativeImportance;
2768 if (ivar <
fNvars)
return relativeImportance[ivar];
2770 Log() << kFATAL <<
"<GetVariableImportance>" <<
Endl
2771 <<
"--- ivar = " << ivar <<
" is out of range " <<
Endl;
bool almost_equal_double(double x, double y, int ulp=4)
bool almost_equal_float(float x, float y, int ulp=4)
int Int_t
Signed integer 4 bytes (int).
char Char_t
Character 1 byte (char).
unsigned int UInt_t
Unsigned integer 4 bytes (unsigned int).
unsigned long ULong_t
Unsigned long integer 4 bytes (unsigned long). Size depends on architecture.
bool Bool_t
Boolean (0=false, 1=true) (bool).
double Double_t
Double 8 bytes.
long long Long64_t
Portable signed long integer 8 bytes.
float Float_t
Float 4 bytes (float).
TMatrixT< Double_t > TMatrixD
static void SetVarIndex(Int_t iVar)
UInt_t CountNodes(Node *n=nullptr)
return the number of nodes in the tree. (make a new count --> takes time)
Node * GetLeftDaughter(Node *n)
get left daughter node current node "n"
Node * GetRightDaughter(Node *n)
get right daughter node current node "n"
BinaryTree(void)
constructor for a yet "empty" tree. Needs to be filled afterwards
void DeleteNode(Node *)
protected, recursive, function used by the class destructor and when Pruning
UInt_t fNNodes
total number of nodes in the tree (counted)
void SetTotalTreeDepth(Int_t depth)
virtual void ReadXML(void *node, UInt_t tmva_Version_Code=262657)
read attributes from XML
Executor & GetThreadExecutor()
Get executor class for multi-thread usage In case when MT is not enabled will return a serial executo...
static Config & Instance()
static function: returns TMVA instance
Class that contains all the data information.
void SetNEvents_unweighted(Float_t nev)
set the number of unweighted events that entered the node (during training), if traininfo defined
void SetNodeType(Int_t t)
set node type: 1 signal node, -1 bkg leave, 0 intermediate Node
void SetSeparationGain(Float_t sep)
set the separation, or information gained BY this node's selection, if traininfo defined
void SetNBkgEvents(Float_t b)
set the sum of the backgr weights in the node, if traininfo defined
void SetCutType(Bool_t t)
set true: if event variable > cutValue ==> signal , false otherwise
Double_t GetNSValidation() const
return number of signal events from the pruning validation sample, or -1 if traininfo undefined
void IncrementNEvents_unweighted()
increment the number of events that entered the node (during training), if traininfo defined
void SetFisherCoeff(Int_t ivar, Double_t coeff)
set fisher coefficients
void SetNSigEvents_unboosted(Float_t s)
set the sum of the unboosted signal events in the node, if traininfo defined
void SetAlphaMinSubtree(Double_t g)
set the minimum alpha in the tree rooted at this node, if traininfo defined
void IncrementNBkgEvents(Float_t b)
increment the sum of the backgr weights in the node, if traininfo defined
void SetNEvents_unboosted(Float_t nev)
set the number of unboosted events that entered the node (during training), if traininfo defined
Float_t GetNSigEvents(void) const
return the sum of the signal weights in the node, or -1 if traininfo undefined
DecisionTreeNode * GetLeft() const override
void SetTerminal(Bool_t s=kTRUE)
void SetLeft(Node *l) override
DecisionTreeNode * GetRight() const override
void SetResponse(Float_t r)
set the response of the node (for regression)
void SetSampleMax(UInt_t ivar, Float_t xmax)
set the maximum of variable ivar from the training sample that pass/end up in this node,...
void SetNBValidation(Double_t b)
set number of background events from the pruning validation sample, if traininfo defined
void IncrementNEvents(Float_t nev)
void SetPurity(void)
return the S/(S+B) (purity) for the node REM: even if nodes with purity 0.01 are very PURE background...
void SetSubTreeR(Double_t r)
set the resubstitution estimate, R(T_t), of the tree rooted at this node, if traininfo defined
void AddToSumTarget2(Float_t t2)
add to sum target 2, if traininfo defined
DecisionTreeNode * GetParent() const override
Double_t GetNodeR() const
return the node resubstitution estimate, R(t), for Cost Complexity pruning, or -1 if traininfo undefi...
void SetNFisherCoeff(Int_t nvars)
Short_t GetSelector() const
return index of variable used for discrimination at this node
void SetNSigEvents(Float_t s)
set the sum of the signal weights in the node, if traininfo defined
Float_t GetResponse(void) const
return the response of the node (for regression)
Float_t GetCutValue(void) const
return the cut value applied at this node
Bool_t GoesRight(const Event &) const override
test event if it descends the tree at this node to the right
Int_t GetNodeType(void) const
return node type: 1 signal node, -1 bkg leave, 0 intermediate Node
void IncrementNBkgEvents_unweighted()
increment the sum of the backgr weights in the node, if traininfo defined
void SetNSigEvents_unweighted(Float_t s)
set the sum of the unweighted signal events in the node, if traininfo defined
void SetRight(Node *r) override
Double_t GetNBValidation() const
return number of background events from the pruning validation sample, or -1 if traininfo undefined
void SetAlpha(Double_t alpha)
set the critical point alpha, if traininfo defined
void SetSeparationIndex(Float_t sep)
set the chosen index, measure of "purity" (separation between S and B) AT this node,...
void SetRMS(Float_t r)
set the RMS of the response of the node (for regression)
void IncrementNSigEvents_unweighted()
increment the sum of the signal weights in the node, if traininfo defined
void SetNBkgEvents_unboosted(Float_t b)
set the sum of the unboosted backgr events in the node, if traininfo defined
Float_t GetPurity(void) const
return S/(S+B) (purity) at this node (from training)
void IncrementNSigEvents(Float_t s)
increment the sum of the signal weights in the node, if traininfo defined
Float_t GetSampleMax(UInt_t ivar) const
return the maximum of variable ivar from the training sample that pass/end up in this node,...
void SetCutValue(Float_t c)
set the cut value applied at this node
Float_t GetNBkgEvents(void) const
return the sum of the backgr weights in the node, or -1 if traininfo undefined
Float_t GetSampleMin(UInt_t ivar) const
return the minimum of variable ivar from the training sample that pass/end up in this node,...
void SetSampleMin(UInt_t ivar, Float_t xmin)
set the minimum of variable ivar from the training sample that pass/end up in this node,...
void SetSelector(Short_t i)
set index of variable used for discrimination at this node
void SetNBkgEvents_unweighted(Float_t b)
set the sum of the unweighted backgr events in the node, if traininfo defined
void SetNSValidation(Double_t s)
set number of signal events from the pruning validation sample, if traininfo defined
void AddToSumTarget(Float_t t)
add to sum target, if traininfo defined
void SetNTerminal(Int_t n)
set number of terminal nodes in the subtree rooted here, if traininfo defined
void SetNEvents(Float_t nev)
set the number of events that entered the node (during training), if traininfo defined
Implementation of a Decision Tree.
Bool_t DoRegression() const
Int_t fNNodesBeforePruning
remember this one (in case of pruning, it allows to monitor the before/after
Double_t fMinSize
min number of events in node
UInt_t BuildTree(const EventConstList &eventSample, DecisionTreeNode *node=nullptr)
building the decision tree by recursively calling the splitting of one (root-) node into two daughter...
void FillTree(const EventList &eventSample)
fill the existing the decision tree structure by filling event in from the top node and see where the...
Double_t fMinNodeSize
min fraction of training events in node
void PruneNode(TMVA::DecisionTreeNode *node)
prune away the subtree below the node
TRandom3 * fMyTrandom
random number generator for randomised trees
DecisionTreeNode * GetRoot() const override
Int_t fTreeID
just an ID number given to the tree.. makes debugging easier as tree knows who he is.
void ApplyValidationSample(const EventConstList *validationSample) const
run the validation sample through the (pruned) tree and fill in the nodes the variables NSValidation ...
Double_t TrainNodeFull(const EventConstList &eventSample, DecisionTreeNode *node)
train a node by finding the single optimal cut for a single variable that best separates signal and b...
EPruneMethod fPruneMethod
method used for pruning
Bool_t fUseSearchTree
cut scan done with binary trees or simple event loop.
TMVA::DecisionTreeNode * GetEventNode(const TMVA::Event &e) const
get the pointer to the leaf node where a particular event ends up in... (used in gradient boosting)
SeparationBase * fSepType
the separation criteria
UInt_t fMaxDepth
max depth
void GetRandomisedVariables(Bool_t *useVariable, UInt_t *variableMap, UInt_t &nVars)
Int_t fUseNvars
the number of variables used in randomised trees;
void SetParentTreeInNodes(Node *n=nullptr)
descend a tree to find all its leaf nodes, fill max depth reached in the tree at the same time.
void DescendTree(Node *n=nullptr)
descend a tree to find all its leaf nodes
Double_t fPruneStrength
a parameter to set the "amount" of pruning..needs to be adjusted
static DecisionTree * CreateFromXML(void *node, UInt_t tmva_Version_Code=262657)
re-create a new tree (decision tree or search tree) from XML
UInt_t fSigClass
class which is treated as signal when building the tree
std::vector< const TMVA::Event * > EventConstList
Bool_t fUseFisherCuts
use multivariate splits using the Fisher criterium
Double_t fNodePurityLimit
purity limit to decide whether a node is signal
Double_t CheckEvent(const TMVA::Event *, Bool_t UseYesNoLeaf=kFALSE) const
the event e is put into the decision tree (starting at the root node) and the output is NodeType (sig...
Bool_t fUseExclusiveVars
individual variables already used in fisher criterium are not anymore analysed individually for node ...
static const Int_t fgRandomSeed
Int_t fNCuts
number of grid point in variable cut scans
UInt_t CleanTree(DecisionTreeNode *node=nullptr)
remove those last splits that result in two leaf nodes that are both of the type (i....
Double_t fMinLinCorrForFisher
the minimum linear correlation between two variables demanded for use in fisher criterium in node spl...
UInt_t fNvars
number of variables used to separate S and B
Bool_t fRandomisedTree
choose at each node splitting a random set of variables
virtual ~DecisionTree(void)
destructor
Types::EAnalysisType fAnalysisType
kClassification(=0=false) or kRegression(=1=true)
std::vector< Double_t > GetVariableImportance()
Return the relative variable importance, normalized to all variables together having the importance 1...
void CheckEventWithPrunedTree(const TMVA::Event *) const
pass a single validation event through a pruned decision tree on the way down the tree,...
void PruneNodeInPlace(TMVA::DecisionTreeNode *node)
prune a node temporarily (without actually deleting its descendants which allows testing the pruned t...
Double_t fMinSepGain
min number of separation gain to perform node splitting
Double_t TestPrunedTreeQuality(const DecisionTreeNode *dt=nullptr, Int_t mode=0) const
return the misclassification rate of a pruned tree a "pruned tree" may have set the variable "IsTermi...
std::vector< Double_t > fVariableImportance
the relative importance of the different variables
Double_t PruneTree(const EventConstList *validationSample=nullptr)
prune (get rid of internal nodes) the Decision tree to avoid overtraining several different pruning m...
void FillEvent(const TMVA::Event &event, TMVA::DecisionTreeNode *node)
fill the existing the decision tree structure by filling event in from the top node and see where the...
DataSetInfo * fDataSetInfo
void ClearTree()
clear the tree nodes (their S/N, Nevents etc), just keep the structure of the tree
Double_t SamplePurity(EventList eventSample)
calculates the purity S/(S+B) of a given event sample
Node * GetNode(ULong_t sequence, UInt_t depth)
retrieve node from the tree.
std::vector< Double_t > GetFisherCoefficients(const EventConstList &eventSample, UInt_t nFisherVars, UInt_t *mapVarInFisher)
calculate the fisher coefficients for the event sample and the variables used
UInt_t CountLeafNodes(TMVA::Node *n=nullptr)
return the number of terminal nodes in the sub-tree below Node n
Double_t TrainNodeFast(const EventConstList &eventSample, DecisionTreeNode *node)
Decide how to split a node using one of the variables that gives the best separation of signal/backgr...
Bool_t fUsePoissonNvars
use "fUseNvars" not as fixed number but as mean of a poisson distr. in each split
RegressionVariance * fRegType
the separation criteria used in Regression
DecisionTree(void)
default constructor using the GiniIndex as separation criterion, no restrictions on minium number of ...
Double_t GetSumWeights(const EventConstList *validationSample) const
calculate the normalization factor for a pruning validation sample
Double_t GetPruneStrength() const
Double_t GetOriginalWeight() const
Double_t GetWeight() const
return the event weight - depending on whether the flag IgnoreNegWeightsInTraining is or not.
Float_t GetValueFast(UInt_t ivar) const
Float_t GetTarget(UInt_t itgt) const
auto Map(F func, unsigned nTimes) -> std::vector< InvokeResult_t< F > >
Wrap TExecutor::Map functions.
auto MapReduce(F func, ROOT::TSeq< INTEGER > args, R redfunc) -> InvokeResult_t< F, INTEGER >
Wrap TExecutor::MapReduce functions.
unsigned int GetPoolSize() const
Node for the BinarySearch or Decision Trees.
std::vector< DecisionTreeNode * > PruneSequence
! the sequence of pruning locations in T_max that yields T
Double_t PruneStrength
! the regularization parameter for pruning
Calculate the "SeparationGain" for Regression analysis separation criteria used in various training a...
An interface to calculate the "SeparationGain" for different separation criteria used in various trai...
Singleton class for Global types used by TMVA.
Double_t Determinant() const override
Return the matrix determinant.
TMatrixT< Element > & Invert(Double_t *det=nullptr)
Invert the matrix and calculate its determinant.
Random number generator class based on M.
TSeq< unsigned int > TSeqU
MsgLogger & Endl(MsgLogger &ml)
Short_t Max(Short_t a, Short_t b)
Returns the largest of a and b.
Double_t Log(Double_t x)
Returns the natural logarithm of x.
Double_t Sqrt(Double_t x)
Returns the square root of x.
Short_t Min(Short_t a, Short_t b)
Returns the smallest of a and b.
Short_t Abs(Short_t d)
Returns the absolute value of parameter Short_t d.
BuildNodeInfo(Int_t fNvars, std::vector< Float_t > &inxmin, std::vector< Float_t > &inxmax)
std::vector< Float_t > xmin
BuildNodeInfo operator+(const BuildNodeInfo &other)
std::vector< Float_t > xmax
BuildNodeInfo(Int_t fNvars, const TMVA::Event *evt)
Double_t nTotB_unWeighted
std::vector< std::vector< Double_t > > target2
std::vector< std::vector< Double_t > > nSelB_unWeighted
std::vector< std::vector< Double_t > > nSelB
Double_t nTotS_unWeighted
std::vector< std::vector< Double_t > > target
std::vector< std::vector< Double_t > > nSelS_unWeighted
TrainNodeInfo operator+(const TrainNodeInfo &other)
std::vector< std::vector< Double_t > > nSelS
TrainNodeInfo(Int_t cNvars_, UInt_t *nBins_)
static uint64_t sum(uint64_t i)