102 || std::abs(x-y) < std::numeric_limits<float>::min();
110 || std::abs(x-y) < std::numeric_limits<double>::min();
123 fMinLinCorrForFisher (1),
124 fUseExclusiveVars (
kTRUE),
132 fPruneMethod (kNoPruning),
133 fNNodesBeforePruning(0),
134 fNodePurityLimit(0.5),
142 fAnalysisType (
Types::kClassification),
186 if (sepType == NULL) {
193 Log() << kWARNING <<
" You had chosen the training mode using optimal cuts, not\n" 194 <<
" based on a grid of " <<
fNCuts <<
" by setting the option NCuts < 0\n" 195 <<
" as this doesn't exist yet, I set it to " <<
fNCuts <<
" and use the grid" 264 Log() << kFATAL <<
"SetParentTreeNodes: started with undefined ROOT node" <<
Endl;
270 Log() << kFATAL <<
" Node with only one daughter?? Something went wrong" <<
Endl;
273 Log() << kFATAL <<
" Node with only one daughter?? Something went wrong" <<
Endl;
293 std::string
type(
"");
297 dt->
ReadXML( node, tmva_Version_Code );
308 struct BuildNodeInfo{
313 xmin = std::vector<Float_t>(nvars);
314 xmax = std::vector<Float_t>(nvars);
324 BuildNodeInfo(
Int_t fNvars, std::vector<Float_t>& inxmin, std::vector<Float_t>& inxmax){
327 xmin = std::vector<Float_t>(nvars);
328 xmax = std::vector<Float_t>(nvars);
332 xmin[ivar]=inxmin[ivar];
333 xmax[ivar]=inxmax[ivar];
347 std::vector<Float_t>
xmin;
348 std::vector<Float_t>
xmax;
352 BuildNodeInfo
operator+(
const BuildNodeInfo& other)
354 BuildNodeInfo ret(nvars, xmin, xmax);
355 if(nvars != other.nvars)
357 std::cout <<
"!!! ERROR BuildNodeInfo1+BuildNodeInfo2 failure. Nvars1 != Nvars2." << std::endl;
361 ret.suw = suw + other.suw;
362 ret.sub = sub + other.sub;
364 ret.buw = buw + other.buw;
365 ret.bub = bub + other.bub;
366 ret.target = target + other.target;
367 ret.target2 = target2 + other.target2;
370 for(
Int_t i=0; i<nvars; i++)
372 ret.xmin[i]=xmin[i]<other.xmin[i]?xmin[i]:other.xmin[i];
373 ret.xmax[i]=xmax[i]>other.xmax[i]?xmax[i]:other.xmax[i];
402 Log() << kDEBUG <<
"\tThe minimal node size MinNodeSize=" <<
fMinNodeSize <<
" fMinNodeSize="<<
fMinNodeSize<<
"% is translated to an actual number of events = "<<
fMinSize<<
" for the training sample size of " << eventSample.size() <<
Endl;
403 Log() << kDEBUG <<
"\tNote: This number will be taken as absolute minimum in the node, " <<
Endl;
404 Log() << kDEBUG <<
" \tin terms of 'weighted events' and unweighted ones !! " <<
Endl;
408 UInt_t nevents = eventSample.size();
411 if (
fNvars==0)
fNvars = eventSample[0]->GetNVariables();
414 else Log() << kFATAL <<
":<BuildTree> eventsample Size == 0 " <<
Endl;
425 auto f = [
this, &eventSample, &nPartitions](
UInt_t partition = 0){
427 Int_t start = 1.0*partition/nPartitions*eventSample.size();
428 Int_t end = (partition+1.0)/nPartitions*eventSample.size();
430 BuildNodeInfo nodeInfof(
fNvars, eventSample[0]);
432 for(
Int_t iev=start; iev<end; iev++){
437 nodeInfof.s += weight;
439 nodeInfof.sub += orgWeight;
442 nodeInfof.b += weight;
444 nodeInfof.bub += orgWeight;
448 nodeInfof.target +=weight*tgt;
449 nodeInfof.target2+=weight*tgt*tgt;
456 nodeInfof.xmin[ivar]=val;
457 nodeInfof.xmax[ivar]=val;
459 if (val < nodeInfof.xmin[ivar]) nodeInfof.xmin[ivar]=val;
460 if (val > nodeInfof.xmax[ivar]) nodeInfof.xmax[ivar]=val;
467 BuildNodeInfo nodeInfoInit(
fNvars, eventSample[0]);
470 auto redfunc = [nodeInfoInit](std::vector<BuildNodeInfo>
v) -> BuildNodeInfo {
return std::accumulate(
v.begin(),
v.end(), nodeInfoInit); };
474 if (nodeInfo.s+nodeInfo.b < 0) {
475 Log() << kWARNING <<
" One of the Decision Tree nodes has negative total number of signal or background events. " 476 <<
"(Nsig="<<nodeInfo.s<<
" Nbkg="<<nodeInfo.b<<
" Probaby you use a Monte Carlo with negative weights. That should in principle " 477 <<
"be fine as long as on average you end up with something positive. For this you have to make sure that the " 478 <<
"minimal number of (unweighted) events demanded for a tree node (currently you use: MinNodeSize="<<
fMinNodeSize 479 <<
"% of training events, you can set this via the BDT option string when booking the classifier) is large enough " 480 <<
"to allow for reasonable averaging!!!" << Endl
481 <<
" If this does not help.. maybe you want to try the option: NoNegWeightsInTraining which ignores events " 482 <<
"with negative weight in the training." <<
Endl;
484 for (
UInt_t i=0; i<eventSample.size(); i++) {
486 nBkg += eventSample[i]->GetWeight();
487 Log() << kDEBUG <<
"Event "<< i<<
" has (original) weight: " << eventSample[i]->GetWeight()/eventSample[i]->GetBoostWeight()
488 <<
" boostWeight: " << eventSample[i]->GetBoostWeight() <<
Endl;
491 Log() << kDEBUG <<
" that gives in total: " << nBkg<<
Endl;
542 node->
SetResponse(nodeInfo.target/(nodeInfo.s+nodeInfo.b));
543 if(
almost_equal_double(nodeInfo.target2/(nodeInfo.s+nodeInfo.b),nodeInfo.target/(nodeInfo.s+nodeInfo.b)*nodeInfo.target/(nodeInfo.s+nodeInfo.b)) ){
546 node->
SetRMS(
TMath::Sqrt(nodeInfo.target2/(nodeInfo.s+nodeInfo.b) - nodeInfo.target/(nodeInfo.s+nodeInfo.b)*nodeInfo.target/(nodeInfo.s+nodeInfo.b)));
559 std::vector<const TMVA::Event*> leftSample; leftSample.reserve(nevents);
560 std::vector<const TMVA::Event*> rightSample; rightSample.reserve(nevents);
563 Double_t nRightUnBoosted=0, nLeftUnBoosted=0;
565 for (
UInt_t ie=0; ie< nevents ; ie++) {
567 rightSample.push_back(eventSample[ie]);
568 nRight += eventSample[ie]->GetWeight();
569 nRightUnBoosted += eventSample[ie]->GetOriginalWeight();
572 leftSample.push_back(eventSample[ie]);
573 nLeft += eventSample[ie]->GetWeight();
574 nLeftUnBoosted += eventSample[ie]->GetOriginalWeight();
578 if (leftSample.empty() || rightSample.empty()) {
580 Log() << kERROR <<
"<TrainNode> all events went to the same branch" << Endl
581 <<
"--- Hence new node == old node ... check" << Endl
582 <<
"--- left:" << leftSample.size()
583 <<
" right:" << rightSample.size() << Endl
584 <<
" while the separation is thought to be " << separationGain
585 <<
"\n when cutting on variable " << node->
GetSelector()
587 << kFATAL <<
"--- this should never happen, please write a bug report to Helge.Voss@cern.ch" <<
Endl;
616 node->
SetResponse(nodeInfo.target/(nodeInfo.s+nodeInfo.b));
617 if(
almost_equal_double(nodeInfo.target2/(nodeInfo.s+nodeInfo.b), nodeInfo.target/(nodeInfo.s+nodeInfo.b)*nodeInfo.target/(nodeInfo.s+nodeInfo.b)) ) {
620 node->
SetRMS(
TMath::Sqrt(nodeInfo.target2/(nodeInfo.s+nodeInfo.b) - nodeInfo.target/(nodeInfo.s+nodeInfo.b)*nodeInfo.target/(nodeInfo.s+nodeInfo.b)));
659 Log() << kDEBUG <<
"\tThe minimal node size MinNodeSize=" <<
fMinNodeSize <<
" fMinNodeSize="<<
fMinNodeSize<<
"% is translated to an actual number of events = "<<
fMinSize<<
" for the training sample size of " << eventSample.size() <<
Endl;
660 Log() << kDEBUG <<
"\tNote: This number will be taken as absolute minimum in the node, " <<
Endl;
661 Log() << kDEBUG <<
" \tin terms of 'weighted events' and unweighted ones !! " <<
Endl;
665 UInt_t nevents = eventSample.size();
668 if (
fNvars==0)
fNvars = eventSample[0]->GetNVariables();
671 else Log() << kFATAL <<
":<BuildTree> eventsample Size == 0 " <<
Endl;
682 xmin[ivar]=xmax[ivar]=0;
687 for (
UInt_t iev=0; iev<eventSample.size(); iev++) {
704 target2+=weight*tgt*tgt;
710 if (iev==0) xmin[ivar]=xmax[ivar]=val;
711 if (val < xmin[ivar]) xmin[ivar]=val;
712 if (val > xmax[ivar]) xmax[ivar]=val;
718 Log() << kWARNING <<
" One of the Decision Tree nodes has negative total number of signal or background events. " 719 <<
"(Nsig="<<s<<
" Nbkg="<<
b<<
" Probaby you use a Monte Carlo with negative weights. That should in principle " 720 <<
"be fine as long as on average you end up with something positive. For this you have to make sure that the " 721 <<
"minimul number of (unweighted) events demanded for a tree node (currently you use: MinNodeSize="<<
fMinNodeSize 722 <<
"% of training events, you can set this via the BDT option string when booking the classifier) is large enough " 723 <<
"to allow for reasonable averaging!!!" << Endl
724 <<
" If this does not help.. maybe you want to try the option: NoNegWeightsInTraining which ignores events " 725 <<
"with negative weight in the training." <<
Endl;
727 for (
UInt_t i=0; i<eventSample.size(); i++) {
729 nBkg += eventSample[i]->GetWeight();
730 Log() << kDEBUG <<
"Event "<< i<<
" has (original) weight: " << eventSample[i]->GetWeight()/eventSample[i]->GetBoostWeight()
731 <<
" boostWeight: " << eventSample[i]->GetBoostWeight() <<
Endl;
734 Log() << kDEBUG <<
" that gives in total: " << nBkg<<
Endl;
801 std::vector<const TMVA::Event*> leftSample; leftSample.reserve(nevents);
802 std::vector<const TMVA::Event*> rightSample; rightSample.reserve(nevents);
805 Double_t nRightUnBoosted=0, nLeftUnBoosted=0;
807 for (
UInt_t ie=0; ie< nevents ; ie++) {
809 rightSample.push_back(eventSample[ie]);
810 nRight += eventSample[ie]->GetWeight();
811 nRightUnBoosted += eventSample[ie]->GetOriginalWeight();
814 leftSample.push_back(eventSample[ie]);
815 nLeft += eventSample[ie]->GetWeight();
816 nLeftUnBoosted += eventSample[ie]->GetOriginalWeight();
821 if (leftSample.empty() || rightSample.empty()) {
823 Log() << kERROR <<
"<TrainNode> all events went to the same branch" << Endl
824 <<
"--- Hence new node == old node ... check" << Endl
825 <<
"--- left:" << leftSample.size()
826 <<
" right:" << rightSample.size() << Endl
827 <<
" while the separation is thought to be " << separationGain
828 <<
"\n when cutting on variable " << node->
GetSelector()
830 << kFATAL <<
"--- this should never happen, please write a bug report to Helge.Voss@cern.ch" <<
Endl;
893 for (
UInt_t i=0; i<eventSample.size(); i++) {
990 Log() << kFATAL <<
"Selected pruning method not yet implemented " 994 if(!tool)
return 0.0;
998 if(validationSample == NULL){
999 Log() << kFATAL <<
"Cannot automate the pruning algorithm without an " 1000 <<
"independent validation sample!" <<
Endl;
1001 }
else if(validationSample->size() == 0) {
1002 Log() << kFATAL <<
"Cannot automate the pruning algorithm with " 1003 <<
"independent validation sample of ZERO events!" <<
Endl;
1010 Log() << kFATAL <<
"Error pruning tree! Check prune.log for more information." 1030 return pruneStrength;
1043 for (
UInt_t ievt=0; ievt < validationSample->size(); ievt++) {
1059 Log() << kFATAL <<
"TestPrunedTreeQuality: started with undefined ROOT node" <<
Endl;
1080 else if ( mode == 1 ) {
1085 throw std::string(
"Unknown ValidationQualityMode");
1099 if (current == NULL) {
1100 Log() << kFATAL <<
"CheckEventWithPrunedTree: started with undefined ROOT node" <<
Endl;
1103 while(current != NULL) {
1132 for( EventConstList::const_iterator it = validationSample->begin();
1133 it != validationSample->end(); ++it ) {
1134 sumWeights += (*it)->GetWeight();
1147 Log() << kFATAL <<
"CountLeafNodes: started with undefined ROOT node" <<
Endl;
1176 Log() << kFATAL <<
"DescendTree: started with undefined ROOT node" <<
Endl;
1185 Log() << kFATAL <<
" Node with only one daughter?? Something went wrong" <<
Endl;
1189 Log() << kFATAL <<
" Node with only one daughter?? Something went wrong" <<
Endl;
1229 if(node == NULL)
return;
1232 node->
SetAlpha( std::numeric_limits<double>::infinity( ) );
1246 for (
UInt_t i =0; i < depth; i++) {
1267 UInt_t nSelectedVars = 0;
1268 while (nSelectedVars < useNvars) {
1273 if (useVariable[ivar] ==
kTRUE) {
1274 mapVariable[nSelectedVars] = ivar;
1279 if (nSelectedVars != useNvars) { std::cout <<
"Bug in TrainNode - GetRandisedVariables()... sorry" << std::endl; std::exit(1);}
1289 struct TrainNodeInfo{
1296 nSelS = std::vector< std::vector<Double_t> >(cNvars);
1297 nSelB = std::vector< std::vector<Double_t> >(cNvars);
1298 nSelS_unWeighted = std::vector< std::vector<Double_t> >(cNvars);
1299 nSelB_unWeighted = std::vector< std::vector<Double_t> >(cNvars);
1300 target = std::vector< std::vector<Double_t> >(cNvars);
1301 target2 = std::vector< std::vector<Double_t> >(cNvars);
1303 for(
Int_t ivar=0; ivar<cNvars; ivar++){
1304 nSelS[ivar] = std::vector<Double_t>(nBins[ivar], 0);
1305 nSelB[ivar] = std::vector<Double_t>(nBins[ivar], 0);
1306 nSelS_unWeighted[ivar] = std::vector<Double_t>(nBins[ivar], 0);
1307 nSelB_unWeighted[ivar] = std::vector<Double_t>(nBins[ivar], 0);
1308 target[ivar] = std::vector<Double_t>(nBins[ivar], 0);
1309 target2[ivar] = std::vector<Double_t>(nBins[ivar], 0);
1331 std::vector< std::vector<Double_t> > nSelS;
1332 std::vector< std::vector<Double_t> > nSelB;
1333 std::vector< std::vector<Double_t> > nSelS_unWeighted;
1334 std::vector< std::vector<Double_t> > nSelB_unWeighted;
1335 std::vector< std::vector<Double_t> > target;
1336 std::vector< std::vector<Double_t> > target2;
1340 TrainNodeInfo
operator+(
const TrainNodeInfo& other)
1342 TrainNodeInfo ret(cNvars, nBins);
1345 if(cNvars != other.cNvars)
1347 std::cout <<
"!!! ERROR TrainNodeInfo1+TrainNodeInfo2 failure. cNvars1 != cNvars2." << std::endl;
1352 for (
Int_t ivar=0; ivar<cNvars; ivar++) {
1353 for (
UInt_t ibin=0; ibin<nBins[ivar]; ibin++) {
1354 ret.nSelS[ivar][ibin] = nSelS[ivar][ibin] + other.nSelS[ivar][ibin];
1355 ret.nSelB[ivar][ibin] = nSelB[ivar][ibin] + other.nSelB[ivar][ibin];
1356 ret.nSelS_unWeighted[ivar][ibin] = nSelS_unWeighted[ivar][ibin] + other.nSelS_unWeighted[ivar][ibin];
1357 ret.nSelB_unWeighted[ivar][ibin] = nSelB_unWeighted[ivar][ibin] + other.nSelB_unWeighted[ivar][ibin];
1358 ret.target[ivar][ibin] = target[ivar][ibin] + other.target[ivar][ibin];
1359 ret.target2[ivar][ibin] = target2[ivar][ibin] + other.target2[ivar][ibin];
1363 ret.nTotS = nTotS + other.nTotS;
1364 ret.nTotS_unWeighted = nTotS_unWeighted + other.nTotS_unWeighted;
1365 ret.nTotB = nTotB + other.nTotB;
1366 ret.nTotB_unWeighted = nTotB_unWeighted + other.nTotB_unWeighted;
1395 separationGain[ivar]=-1;
1401 UInt_t nevents = eventSample.size();
1409 std::vector<Double_t> fisherCoeff;
1418 useVariable[ivar] =
kTRUE;
1419 mapVariable[ivar] = ivar;
1435 useVarInFisher[ivar] =
kFALSE;
1436 mapVarInFisher[ivar] = ivar;
1439 std::vector<TMatrixDSym*>* covMatrices;
1442 Log() << kWARNING <<
" in TrainNodeFast, the covariance Matrices needed for the Fisher-Cuts returned error --> revert to just normal cuts for this node" <<
Endl;
1454 useVarInFisher[ivar] =
kTRUE;
1455 useVarInFisher[jvar] =
kTRUE;
1466 if (useVarInFisher[ivar] && useVariable[ivar]) {
1467 mapVarInFisher[nFisherVars++]=ivar;
1478 delete [] useVarInFisher;
1479 delete [] mapVarInFisher;
1501 for (
UInt_t ivar=0; ivar<cNvars; ivar++) {
1510 cutValues[ivar] =
new Double_t [nBins[ivar]];
1514 for (
UInt_t ivar=0; ivar < cNvars; ivar++) {
1521 useVariable[ivar]=
kFALSE;
1529 for (
UInt_t iev=0; iev<nevents; iev++) {
1533 result += fisherCoeff[jvar]*(eventSample[iev])->GetValueFast(jvar);
1534 if (result > xmax[ivar]) xmax[ivar]=result;
1535 if (result < xmin[ivar]) xmin[ivar]=result;
1539 for (
UInt_t ibin=0; ibin<nBins[ivar]; ibin++) {
1540 cutValues[ivar][ibin]=0;
1554 auto fvarInitCuts = [
this, &useVariable, &cutValues, &invBinWidth, &binWidth, &nBins, &
xmin, &
xmax](
UInt_t ivar = 0){
1556 if ( useVariable[ivar] ) {
1569 binWidth[ivar] = ( xmax[ivar] - xmin[ivar] ) /
Double_t(nBins[ivar]);
1570 invBinWidth[ivar] = 1./binWidth[ivar];
1580 for (
UInt_t icut=0; icut<nBins[ivar]-1; icut++) {
1581 cutValues[ivar][icut]=xmin[ivar]+(
Double_t(icut+1))*binWidth[ivar];
1593 TrainNodeInfo nodeInfo(cNvars, nBins);
1598 if(eventSample.size() >= cNvars*
fNCuts*nPartitions*2)
1603 auto f = [
this, &eventSample, &fisherCoeff, &useVariable, &invBinWidth,
1604 &nBins, &
xmin, &cNvars, &nPartitions](
UInt_t partition = 0){
1606 UInt_t start = 1.0*partition/nPartitions*eventSample.size();
1607 UInt_t end = (partition+1.0)/nPartitions*eventSample.size();
1609 TrainNodeInfo nodeInfof(cNvars, nBins);
1611 for(
UInt_t iev=start; iev<end; iev++) {
1613 Double_t eventWeight = eventSample[iev]->GetWeight();
1615 nodeInfof.nTotS+=eventWeight;
1616 nodeInfof.nTotS_unWeighted++; }
1618 nodeInfof.nTotB+=eventWeight;
1619 nodeInfof.nTotB_unWeighted++;
1624 for (
UInt_t ivar=0; ivar < cNvars; ivar++) {
1627 if ( useVariable[ivar] ) {
1629 if (ivar <
fNvars) eventData = eventSample[iev]->GetValueFast(ivar);
1631 eventData = fisherCoeff[
fNvars];
1633 eventData += fisherCoeff[jvar]*(eventSample[iev])->GetValueFast(jvar);
1640 nodeInfof.nSelS[ivar][iBin]+=eventWeight;
1641 nodeInfof.nSelS_unWeighted[ivar][iBin]++;
1644 nodeInfof.nSelB[ivar][iBin]+=eventWeight;
1645 nodeInfof.nSelB_unWeighted[ivar][iBin]++;
1648 nodeInfof.target[ivar][iBin] +=eventWeight*eventSample[iev]->GetTarget(0);
1649 nodeInfof.target2[ivar][iBin]+=eventWeight*eventSample[iev]->GetTarget(0)*eventSample[iev]->GetTarget(0);
1658 TrainNodeInfo nodeInfoInit(cNvars, nBins);
1661 auto redfunc = [nodeInfoInit](std::vector<TrainNodeInfo>
v) -> TrainNodeInfo {
return std::accumulate(
v.begin(),
v.end(), nodeInfoInit); };
1670 auto fvarFillNodeInfo = [
this, &nodeInfo, &eventSample, &fisherCoeff, &useVariable, &invBinWidth, &nBins, &
xmin](
UInt_t ivar = 0){
1672 for(
UInt_t iev=0; iev<eventSample.size(); iev++) {
1675 Double_t eventWeight = eventSample[iev]->GetWeight();
1680 nodeInfo.nTotS+=eventWeight;
1681 nodeInfo.nTotS_unWeighted++; }
1683 nodeInfo.nTotB+=eventWeight;
1684 nodeInfo.nTotB_unWeighted++;
1689 if ( useVariable[ivar] ) {
1691 if (ivar <
fNvars) eventData = eventSample[iev]->GetValueFast(ivar);
1693 eventData = fisherCoeff[
fNvars];
1695 eventData += fisherCoeff[jvar]*(eventSample[iev])->GetValueFast(jvar);
1702 nodeInfo.nSelS[ivar][iBin]+=eventWeight;
1703 nodeInfo.nSelS_unWeighted[ivar][iBin]++;
1706 nodeInfo.nSelB[ivar][iBin]+=eventWeight;
1707 nodeInfo.nSelB_unWeighted[ivar][iBin]++;
1710 nodeInfo.target[ivar][iBin] +=eventWeight*eventSample[iev]->GetTarget(0);
1711 nodeInfo.target2[ivar][iBin]+=eventWeight*eventSample[iev]->GetTarget(0)*eventSample[iev]->GetTarget(0);
1724 auto fvarCumulative = [&nodeInfo, &useVariable, &nBins,
this, &eventSample](
UInt_t ivar = 0){
1725 if (useVariable[ivar]) {
1726 for (
UInt_t ibin=1; ibin < nBins[ivar]; ibin++) {
1727 nodeInfo.nSelS[ivar][ibin]+=nodeInfo.nSelS[ivar][ibin-1];
1728 nodeInfo.nSelS_unWeighted[ivar][ibin]+=nodeInfo.nSelS_unWeighted[ivar][ibin-1];
1729 nodeInfo.nSelB[ivar][ibin]+=nodeInfo.nSelB[ivar][ibin-1];
1730 nodeInfo.nSelB_unWeighted[ivar][ibin]+=nodeInfo.nSelB_unWeighted[ivar][ibin-1];
1732 nodeInfo.target[ivar][ibin] +=nodeInfo.target[ivar][ibin-1] ;
1733 nodeInfo.target2[ivar][ibin]+=nodeInfo.target2[ivar][ibin-1];
1736 if (nodeInfo.nSelS_unWeighted[ivar][nBins[ivar]-1] +nodeInfo.nSelB_unWeighted[ivar][nBins[ivar]-1] != eventSample.size()) {
1737 Log() << kFATAL <<
"Helge, you have a bug ....nodeInfo.nSelS_unw..+nodeInfo.nSelB_unw..= " 1738 << nodeInfo.nSelS_unWeighted[ivar][nBins[ivar]-1] +nodeInfo.nSelB_unWeighted[ivar][nBins[ivar]-1]
1739 <<
" while eventsample size = " << eventSample.size()
1742 double lastBins=nodeInfo.nSelS[ivar][nBins[ivar]-1] +nodeInfo.nSelB[ivar][nBins[ivar]-1];
1743 double totalSum=nodeInfo.nTotS+nodeInfo.nTotB;
1744 if (
TMath::Abs(lastBins-totalSum)/totalSum>0.01) {
1745 Log() << kFATAL <<
"Helge, you have another bug ....nodeInfo.nSelS+nodeInfo.nSelB= " 1747 <<
" while total number of events = " << totalSum
1758 auto fvarMaxSep = [&nodeInfo, &useVariable,
this, &separationGain, &cutIndex, &nBins] (
UInt_t ivar = 0){
1759 if (useVariable[ivar]) {
1761 for (
UInt_t iBin=0; iBin<nBins[ivar]-1; iBin++) {
1773 Double_t sl = nodeInfo.nSelS_unWeighted[ivar][iBin];
1774 Double_t bl = nodeInfo.nSelB_unWeighted[ivar][iBin];
1777 Double_t slW = nodeInfo.nSelS[ivar][iBin];
1778 Double_t blW = nodeInfo.nSelB[ivar][iBin];
1792 nodeInfo.target[ivar][iBin],nodeInfo.target2[ivar][iBin],
1793 nodeInfo.nTotS+nodeInfo.nTotB,
1794 nodeInfo.target[ivar][nBins[ivar]-1],nodeInfo.target2[ivar][nBins[ivar]-1]);
1796 sepTmp =
fSepType->
GetSeparationGain(nodeInfo.nSelS[ivar][iBin], nodeInfo.nSelB[ivar][iBin], nodeInfo.nTotS, nodeInfo.nTotB);
1798 if (separationGain[ivar] < sepTmp) {
1799 separationGain[ivar] = sepTmp;
1800 cutIndex[ivar] = iBin;
1810 for (
UInt_t ivar=0; ivar < cNvars; ivar++) {
1811 if (useVariable[ivar] ) {
1812 if (separationGainTotal < separationGain[ivar]) {
1813 separationGainTotal = separationGain[ivar];
1822 node->
SetResponse(nodeInfo.target[0][nBins[mxVar]-1]/(nodeInfo.nTotS+nodeInfo.nTotB));
1823 if (
almost_equal_double(nodeInfo.target2[0][nBins[mxVar]-1]/(nodeInfo.nTotS+nodeInfo.nTotB), nodeInfo.target[0][nBins[mxVar]-1]/(nodeInfo.nTotS+nodeInfo.nTotB)*nodeInfo.target[0][nBins[mxVar]-1]/(nodeInfo.nTotS+nodeInfo.nTotB))) {
1827 node->
SetRMS(
TMath::Sqrt(nodeInfo.target2[0][nBins[mxVar]-1]/(nodeInfo.nTotS+nodeInfo.nTotB) - nodeInfo.target[0][nBins[mxVar]-1]/(nodeInfo.nTotS+nodeInfo.nTotB)*nodeInfo.target[0][nBins[mxVar]-1]/(nodeInfo.nTotS+nodeInfo.nTotB)));
1833 if (nodeInfo.nSelS[mxVar][cutIndex[mxVar]]/nodeInfo.nTotS > nodeInfo.nSelB[mxVar][cutIndex[mxVar]]/nodeInfo.nTotB) cutType=
kTRUE;
1838 node->
SetCutValue(cutValues[mxVar][cutIndex[mxVar]]);
1843 fVariableImportance[mxVar] += separationGainTotal*separationGainTotal * (nodeInfo.nTotS+nodeInfo.nTotB) * (nodeInfo.nTotS+nodeInfo.nTotB) ;
1854 fVariableImportance[ivar] += fisherCoeff[ivar]*fisherCoeff[ivar]*separationGainTotal*separationGainTotal * (nodeInfo.nTotS+nodeInfo.nTotB) * (nodeInfo.nTotS+nodeInfo.nTotB) ;
1860 separationGainTotal = 0;
1884 delete [] cutValues;
1889 delete [] useVariable;
1890 delete [] mapVariable;
1892 delete [] separationGain;
1897 delete [] invBinWidth;
1899 return separationGainTotal;
1908 Double_t separationGainTotal = -1, sepTmp;
1914 separationGain[ivar]=-1;
1921 Int_t nTotS_unWeighted, nTotB_unWeighted;
1922 UInt_t nevents = eventSample.size();
1930 std::vector<Double_t> fisherCoeff;
1939 useVariable[ivar] =
kTRUE;
1940 mapVariable[ivar] = ivar;
1956 useVarInFisher[ivar] =
kFALSE;
1957 mapVarInFisher[ivar] = ivar;
1960 std::vector<TMatrixDSym*>* covMatrices;
1963 Log() << kWARNING <<
" in TrainNodeFast, the covariance Matrices needed for the Fisher-Cuts returned error --> revert to just normal cuts for this node" <<
Endl;
1975 useVarInFisher[ivar] =
kTRUE;
1976 useVarInFisher[jvar] =
kTRUE;
1987 if (useVarInFisher[ivar] && useVariable[ivar]) {
1988 mapVarInFisher[nFisherVars++]=ivar;
1999 delete [] useVarInFisher;
2000 delete [] mapVarInFisher;
2024 for (
UInt_t ivar=0; ivar<cNvars; ivar++) {
2035 nSelS[ivar] =
new Double_t [nBins[ivar]];
2036 nSelB[ivar] =
new Double_t [nBins[ivar]];
2037 nSelS_unWeighted[ivar] =
new Double_t [nBins[ivar]];
2038 nSelB_unWeighted[ivar] =
new Double_t [nBins[ivar]];
2039 target[ivar] =
new Double_t [nBins[ivar]];
2040 target2[ivar] =
new Double_t [nBins[ivar]];
2041 cutValues[ivar] =
new Double_t [nBins[ivar]];
2050 for (
UInt_t ivar=0; ivar < cNvars; ivar++) {
2057 useVariable[ivar]=
kFALSE;
2065 for (
UInt_t iev=0; iev<nevents; iev++) {
2069 result += fisherCoeff[jvar]*(eventSample[iev])->GetValueFast(jvar);
2070 if (result > xmax[ivar]) xmax[ivar]=result;
2071 if (result < xmin[ivar]) xmin[ivar]=result;
2074 for (
UInt_t ibin=0; ibin<nBins[ivar]; ibin++) {
2075 nSelS[ivar][ibin]=0;
2076 nSelB[ivar][ibin]=0;
2077 nSelS_unWeighted[ivar][ibin]=0;
2078 nSelB_unWeighted[ivar][ibin]=0;
2079 target[ivar][ibin]=0;
2080 target2[ivar][ibin]=0;
2081 cutValues[ivar][ibin]=0;
2088 for (
UInt_t ivar=0; ivar < cNvars; ivar++) {
2090 if ( useVariable[ivar] ) {
2103 binWidth[ivar] = ( xmax[ivar] - xmin[ivar] ) /
Double_t(nBins[ivar]);
2104 invBinWidth[ivar] = 1./binWidth[ivar];
2114 for (
UInt_t icut=0; icut<nBins[ivar]-1; icut++) {
2115 cutValues[ivar][icut]=xmin[ivar]+(
Double_t(icut+1))*binWidth[ivar];
2123 nTotS_unWeighted=0; nTotB_unWeighted=0;
2124 for (
UInt_t iev=0; iev<nevents; iev++) {
2126 Double_t eventWeight = eventSample[iev]->GetWeight();
2129 nTotS_unWeighted++; }
2137 for (
UInt_t ivar=0; ivar < cNvars; ivar++) {
2140 if ( useVariable[ivar] ) {
2142 if (ivar <
fNvars) eventData = eventSample[iev]->GetValueFast(ivar);
2144 eventData = fisherCoeff[
fNvars];
2146 eventData += fisherCoeff[jvar]*(eventSample[iev])->GetValueFast(jvar);
2153 nSelS[ivar][iBin]+=eventWeight;
2154 nSelS_unWeighted[ivar][iBin]++;
2157 nSelB[ivar][iBin]+=eventWeight;
2158 nSelB_unWeighted[ivar][iBin]++;
2161 target[ivar][iBin] +=eventWeight*eventSample[iev]->GetTarget(0);
2162 target2[ivar][iBin]+=eventWeight*eventSample[iev]->GetTarget(0)*eventSample[iev]->GetTarget(0);
2169 for (
UInt_t ivar=0; ivar < cNvars; ivar++) {
2170 if (useVariable[ivar]) {
2171 for (
UInt_t ibin=1; ibin < nBins[ivar]; ibin++) {
2172 nSelS[ivar][ibin]+=nSelS[ivar][ibin-1];
2173 nSelS_unWeighted[ivar][ibin]+=nSelS_unWeighted[ivar][ibin-1];
2174 nSelB[ivar][ibin]+=nSelB[ivar][ibin-1];
2175 nSelB_unWeighted[ivar][ibin]+=nSelB_unWeighted[ivar][ibin-1];
2177 target[ivar][ibin] +=target[ivar][ibin-1] ;
2178 target2[ivar][ibin]+=target2[ivar][ibin-1];
2181 if (nSelS_unWeighted[ivar][nBins[ivar]-1] +nSelB_unWeighted[ivar][nBins[ivar]-1] != eventSample.size()) {
2182 Log() << kFATAL <<
"Helge, you have a bug ....nSelS_unw..+nSelB_unw..= " 2183 << nSelS_unWeighted[ivar][nBins[ivar]-1] +nSelB_unWeighted[ivar][nBins[ivar]-1]
2184 <<
" while eventsample size = " << eventSample.size()
2187 double lastBins=nSelS[ivar][nBins[ivar]-1] +nSelB[ivar][nBins[ivar]-1];
2188 double totalSum=nTotS+nTotB;
2189 if (
TMath::Abs(lastBins-totalSum)/totalSum>0.01) {
2190 Log() << kFATAL <<
"Helge, you have another bug ....nSelS+nSelB= " 2192 <<
" while total number of events = " << totalSum
2200 for (
UInt_t ivar=0; ivar < cNvars; ivar++) {
2201 if (useVariable[ivar]) {
2202 for (
UInt_t iBin=0; iBin<nBins[ivar]-1; iBin++) {
2214 Double_t sl = nSelS_unWeighted[ivar][iBin];
2215 Double_t bl = nSelB_unWeighted[ivar][iBin];
2233 target[ivar][iBin],target2[ivar][iBin],
2235 target[ivar][nBins[ivar]-1],target2[ivar][nBins[ivar]-1]);
2239 if (separationGain[ivar] < sepTmp) {
2240 separationGain[ivar] = sepTmp;
2241 cutIndex[ivar] = iBin;
2249 for (
UInt_t ivar=0; ivar < cNvars; ivar++) {
2250 if (useVariable[ivar] ) {
2251 if (separationGainTotal < separationGain[ivar]) {
2252 separationGainTotal = separationGain[ivar];
2261 node->
SetResponse(target[0][nBins[mxVar]-1]/(nTotS+nTotB));
2262 if (
almost_equal_double(target2[0][nBins[mxVar]-1]/(nTotS+nTotB), target[0][nBins[mxVar]-1]/(nTotS+nTotB)*target[0][nBins[mxVar]-1]/(nTotS+nTotB))) {
2265 node->
SetRMS(
TMath::Sqrt(target2[0][nBins[mxVar]-1]/(nTotS+nTotB) - target[0][nBins[mxVar]-1]/(nTotS+nTotB)*target[0][nBins[mxVar]-1]/(nTotS+nTotB)));
2271 if (nSelS[mxVar][cutIndex[mxVar]]/nTotS > nSelB[mxVar][cutIndex[mxVar]]/nTotB) cutType=
kTRUE;
2276 node->
SetCutValue(cutValues[mxVar][cutIndex[mxVar]]);
2281 fVariableImportance[mxVar] += separationGainTotal*separationGainTotal * (nTotS+nTotB) * (nTotS+nTotB) ;
2292 fVariableImportance[ivar] += fisherCoeff[ivar]*fisherCoeff[ivar]*separationGainTotal*separationGainTotal * (nTotS+nTotB) * (nTotS+nTotB) ;
2298 separationGainTotal = 0;
2314 for (
UInt_t i=0; i<cNvars; i++) {
2317 delete [] nSelS_unWeighted[i];
2318 delete [] nSelB_unWeighted[i];
2319 delete [] target[i];
2320 delete [] target2[i];
2321 delete [] cutValues[i];
2325 delete [] nSelS_unWeighted;
2326 delete [] nSelB_unWeighted;
2329 delete [] cutValues;
2334 delete [] useVariable;
2335 delete [] mapVariable;
2337 delete [] separationGain;
2342 delete [] invBinWidth;
2344 return separationGainTotal;
2354 std::vector<Double_t> fisherCoeff(
fNvars+1);
2377 for (
UInt_t ivar=0; ivar<nFisherVars; ivar++) { sumS[ivar] = sumB[ivar] = 0; }
2379 UInt_t nevents = eventSample.size();
2381 for (
UInt_t ievt=0; ievt<nevents; ievt++) {
2384 const Event * ev = eventSample[ievt];
2389 else sumOfWeightsB += weight;
2392 for (
UInt_t ivar=0; ivar<nFisherVars; ivar++) {
2393 sum[ivar] += ev->
GetValueFast( mapVarInFisher[ivar] )*weight;
2396 for (
UInt_t ivar=0; ivar<nFisherVars; ivar++) {
2397 (*meanMatx)( ivar, 2 ) = sumS[ivar];
2398 (*meanMatx)( ivar, 0 ) = sumS[ivar]/sumOfWeightsS;
2400 (*meanMatx)( ivar, 2 ) += sumB[ivar];
2401 (*meanMatx)( ivar, 1 ) = sumB[ivar]/sumOfWeightsB;
2404 (*meanMatx)( ivar, 2 ) /= (sumOfWeightsS + sumOfWeightsB);
2416 assert( sumOfWeightsS > 0 && sumOfWeightsB > 0 );
2420 const Int_t nFisherVars2 = nFisherVars*nFisherVars;
2424 memset(sum2Sig,0,nFisherVars2*
sizeof(
Double_t));
2425 memset(sum2Bgd,0,nFisherVars2*
sizeof(
Double_t));
2428 for (
UInt_t ievt=0; ievt<nevents; ievt++) {
2432 const Event* ev = eventSample.at(ievt);
2442 if ( ev->
GetClass() ==
fSigClass ) sum2Sig[k] += ( (xval[
x] - (*meanMatx)(
x, 0))*(xval[
y] - (*meanMatx)(
y, 0)) )*weight;
2443 else sum2Bgd[k] += ( (xval[
x] - (*meanMatx)(
x, 1))*(xval[
y] - (*meanMatx)(
y, 1)) )*weight;
2451 (*with)(
x,
y) = sum2Sig[k]/sumOfWeightsS + sum2Bgd[k]/sumOfWeightsB;
2471 prodSig = ( ((*meanMatx)(
x, 0) - (*meanMatx)(
x, 2))*
2472 ((*meanMatx)(
y, 0) - (*meanMatx)(
y, 2)) );
2473 prodBgd = ( ((*meanMatx)(
x, 1) - (*meanMatx)(
x, 2))*
2474 ((*meanMatx)(
y, 1) - (*meanMatx)(
y, 2)) );
2476 (*betw)(
x,
y) = (sumOfWeightsS*prodSig + sumOfWeightsB*prodBgd) / (sumOfWeightsS + sumOfWeightsB);
2485 (*cov)(
x,
y) = (*with)(
x,
y) + (*betw)(
x,
y);
2500 Log() << kWARNING <<
"FisherCoeff matrix is almost singular with determinant=" 2502 <<
" did you use the variables that are linear combinations or highly correlated?" 2506 Log() << kFATAL <<
"FisherCoeff matrix is singular with determinant=" 2508 <<
" did you use the variables that are linear combinations?" 2515 Double_t xfact =
TMath::Sqrt( sumOfWeightsS*sumOfWeightsB ) / (sumOfWeightsS + sumOfWeightsB);
2518 std::vector<Double_t> diffMeans( nFisherVars );
2520 for (
UInt_t ivar=0; ivar<=
fNvars; ivar++) fisherCoeff[ivar] = 0;
2521 for (
UInt_t ivar=0; ivar<nFisherVars; ivar++) {
2522 for (
UInt_t jvar=0; jvar<nFisherVars; jvar++) {
2523 Double_t d = (*meanMatx)(jvar, 0) - (*meanMatx)(jvar, 1);
2524 fisherCoeff[mapVarInFisher[ivar]] += invCov(ivar, jvar)*
d;
2528 fisherCoeff[mapVarInFisher[ivar]] *= xfact;
2533 for (
UInt_t ivar=0; ivar<nFisherVars; ivar++){
2534 f0 += fisherCoeff[mapVarInFisher[ivar]]*((*meanMatx)(ivar, 0) + (*meanMatx)(ivar, 1));
2538 fisherCoeff[
fNvars] = f0;
2551 Int_t nTotS_unWeighted = 0, nTotB_unWeighted = 0;
2553 std::vector<TMVA::BDTEventWrapper> bdtEventSample;
2557 std::vector<Double_t> lCutValue(
fNvars, 0.0 );
2558 std::vector<Double_t> lSepGain(
fNvars, -1.0e6 );
2559 std::vector<Char_t> lCutType(
fNvars );
2564 for( std::vector<const TMVA::Event*>::const_iterator it = eventSample.begin(); it != eventSample.end(); ++it ) {
2566 nTotS += (*it)->GetWeight();
2570 nTotB += (*it)->GetWeight();
2576 std::vector<Char_t> useVariable(
fNvars);
2585 Int_t nSelectedVars = 0;
2591 if(useVariable[ivar] ==
Char_t(
kTRUE)) nSelectedVars++;
2599 if(!useVariable[ivar])
continue;
2603 std::sort( bdtEventSample.begin(),bdtEventSample.end() );
2606 Double_t bkgWeightCtr = 0.0, sigWeightCtr = 0.0;
2608 std::vector<TMVA::BDTEventWrapper>::iterator it = bdtEventSample.begin(), it_end = bdtEventSample.end();
2609 for( ; it != it_end; ++it ) {
2611 sigWeightCtr += (**it)->GetWeight();
2613 bkgWeightCtr += (**it)->GetWeight();
2615 it->SetCumulativeWeight(
false,bkgWeightCtr);
2616 it->SetCumulativeWeight(
true,sigWeightCtr);
2622 Double_t separationGain = -1.0, sepTmp = 0.0, cutValue = 0.0, dVal = 0.0, norm = 0.0;
2625 for( it = bdtEventSample.begin(); it != it_end; ++it ) {
2626 if( index == 0 ) { ++index;
continue; }
2627 if( *(*it) == NULL ) {
2628 Log() << kFATAL <<
"In TrainNodeFull(): have a null event! Where index=" 2629 << index <<
", and parent node=" << node->
GetParent() <<
Endl;
2632 dVal = bdtEventSample[index].GetVal() - bdtEventSample[index-1].GetVal();
2633 norm =
TMath::Abs(bdtEventSample[index].GetVal() + bdtEventSample[index-1].GetVal());
2636 if( index >=
fMinSize && (nTotS_unWeighted + nTotB_unWeighted) - index >=
fMinSize &&
TMath::Abs(dVal/(0.5*norm + 1)) > fPMin ) {
2638 sepTmp =
fSepType->
GetSeparationGain( it->GetCumulativeWeight(
true), it->GetCumulativeWeight(
false), sigWeightCtr, bkgWeightCtr );
2639 if( sepTmp > separationGain ) {
2640 separationGain = sepTmp;
2641 cutValue = it->GetVal() - 0.5*dVal;
2642 Double_t nSelS = it->GetCumulativeWeight(
true);
2643 Double_t nSelB = it->GetCumulativeWeight(
false);
2646 if( nSelS/sigWeightCtr > nSelB/bkgWeightCtr ) cutType =
kTRUE;
2652 lCutType[ivar] =
Char_t(cutType);
2653 lCutValue[ivar] = cutValue;
2654 lSepGain[ivar] = separationGain;
2657 Int_t iVarIndex = -1;
2659 if( lSepGain[ivar] > separationGain ) {
2661 separationGain = lSepGain[ivar];
2666 if(iVarIndex >= 0) {
2671 fVariableImportance[iVarIndex] += separationGain*separationGain * (nTotS+nTotB) * (nTotS+nTotB);
2674 separationGain = 0.0;
2677 return separationGain;
2705 Log() << kFATAL <<
"CheckEvent: started with undefined ROOT node" <<
Endl;
2714 Log() << kFATAL <<
"DT::CheckEvent: inconsistent tree structure" <<
Endl;
2735 Double_t sumsig=0, sumbkg=0, sumtot=0;
2736 for (
UInt_t ievt=0; ievt<eventSample.size(); ievt++) {
2737 if (eventSample[ievt]->
GetClass() !=
fSigClass) sumbkg+=eventSample[ievt]->GetWeight();
2738 else sumsig+=eventSample[ievt]->GetWeight();
2739 sumtot+=eventSample[ievt]->GetWeight();
2742 if (sumtot!= (sumsig+sumbkg)){
2743 Log() << kFATAL <<
"<SamplePurity> sumtot != sumsig+sumbkg" 2744 << sumtot <<
" " << sumsig <<
" " << sumbkg <<
Endl;
2746 if (sumtot>0)
return sumsig/(sumsig + sumbkg);
2758 std::vector<Double_t> relativeImportance(
fNvars);
2767 relativeImportance[i] /=
sum;
2769 relativeImportance[i] = 0;
2771 return relativeImportance;
2780 if (ivar <
fNvars)
return relativeImportance[ivar];
2782 Log() << kFATAL <<
"<GetVariableImportance>" <<
Endl 2783 <<
"--- ivar = " << ivar <<
" is out of range " <<
Endl;
Float_t GetValueFast(UInt_t ivar) const
void SetNTerminal(Int_t n)
Double_t PruneStrength
quality measure for a pruned subtree T of T_max
virtual Double_t GetSeparationGain(const Double_t nSelS, const Double_t nSelB, const Double_t nTotS, const Double_t nTotB)
Separation Gain: the measure of how the quality of separation of the sample increases by splitting th...
DataSetInfo * fDataSetInfo
static long int sum(long int i)
Random number generator class based on M.
void SetSelector(Short_t i)
MsgLogger & Endl(MsgLogger &ml)
void SetFisherCoeff(Int_t ivar, Double_t coeff)
set fisher coefficients
Singleton class for Global types used by TMVA.
virtual Double_t Rndm()
Machine independent random number generator.
Float_t GetSumTarget() const
Double_t CheckEvent(const TMVA::Event *, Bool_t UseYesNoLeaf=kFALSE) const
the event e is put into the decision tree (starting at the root node) and the output is NodeType (sig...
Int_t fNNodesBeforePruning
Double_t GetNodePurityLimit() const
EPruneMethod fPruneMethod
static Config & Instance()
static function: returns TMVA instance
Calculate the "SeparationGain" for Regression analysis separation criteria used in various training a...
virtual DecisionTreeNode * GetParent() const
void IncrementNEvents_unweighted()
Float_t GetSumTarget2() const
void IncrementNEvents(Float_t nev)
Float_t GetSampleMax(UInt_t ivar) const
return the maximum of variable ivar from the training sample that pass/end up in this node ...
Short_t Min(Short_t a, Short_t b)
std::vector< Double_t > GetFisherCoefficients(const EventConstList &eventSample, UInt_t nFisherVars, UInt_t *mapVarInFisher)
calculate the fisher coefficients for the event sample and the variables used
std::vector< DecisionTreeNode * > PruneSequence
the regularization parameter for pruning
Bool_t IsTerminal() const
virtual void SetParentTree(TMVA::BinaryTree *t)
Double_t fNodePurityLimit
virtual void SetRight(Node *r)
virtual ~DecisionTree(void)
destructor
Double_t TestPrunedTreeQuality(const DecisionTreeNode *dt=NULL, Int_t mode=0) const
return the misclassification rate of a pruned tree a "pruned tree" may have set the variable "IsTermi...
Float_t GetNSigEvents(void) const
virtual Double_t Determinant() const
Return the matrix determinant.
virtual DecisionTreeNode * GetRoot() const
void CheckEventWithPrunedTree(const TMVA::Event *) const
pass a single validation event through a pruned decision tree on the way down the tree...
void DeleteNode(Node *)
protected, recursive, function used by the class destructor and when Pruning
void SetNSigEvents_unweighted(Float_t s)
virtual Double_t GetSeparationIndex(const Double_t n, const Double_t target, const Double_t target2)
Separation Index: a simple Variance.
void SetResponse(Float_t r)
void SetNBValidation(Double_t b)
std::vector< Double_t > GetVariableImportance()
Return the relative variable importance, normalized to all variables together having the importance 1...
void SetNFisherCoeff(Int_t nvars)
std::vector< const TMVA::Event * > EventConstList
TSeq< unsigned int > TSeqU
Base class for BinarySearch and Decision Trees.
static const Int_t fgRandomSeed
Float_t GetNBkgEvents(void) const
void FillTree(const EventList &eventSample)
fill the existing the decision tree structure by filling event in from the top node and see where the...
Float_t GetSampleMin(UInt_t ivar) const
return the minimum of variable ivar from the training sample that pass/end up in this node ...
Float_t GetCutValue(void) const
void IncrementNBkgEvents(Float_t b)
Double_t SamplePurity(EventList eventSample)
calculates the purity S/(S+B) of a given event sample
Double_t GetNodeR() const
std::vector< Double_t > fVariableImportance
void SetSeparationGain(Float_t sep)
Double_t GetSumWeights(const EventConstList *validationSample) const
calculate the normalization factor for a pruning validation sample
void ResetValidationData()
temporary stored node values (number of events, etc.) that originate not from the training but from t...
void SetNBkgEvents(Float_t b)
void SetNSValidation(Double_t s)
UInt_t GetNumThreadsInPool()
Class that contains all the data information.
Double_t GetSeparationGain(const Double_t nLeft, const Double_t targetLeft, const Double_t target2Left, const Double_t nTot, const Double_t targetTot, const Double_t target2Tot)
Separation Gain: the measure of how the quality of separation of the sample increases by splitting th...
UInt_t CountLeafNodes(TMVA::Node *n=NULL)
return the number of terminal nodes in the sub-tree below Node n
ROOT::TThreadExecutor & GetThreadExecutor()
TString operator+(const TString &s1, const TString &s2)
Use the special concatenation constructor.
void AddToSumTarget(Float_t t)
Double_t GetWeight() const
return the event weight - depending on whether the flag IgnoreNegWeightsInTraining is or not...
Double_t TrainNodeFast(const EventConstList &eventSample, DecisionTreeNode *node)
Decide how to split a node using one of the variables that gives the best separation of signal/backgr...
void DescendTree(Node *n=NULL)
descend a tree to find all its leaf nodes
TMatrixT< Element > & Invert(Double_t *det=0)
Invert the matrix and calculate its determinant.
void FillEvent(const TMVA::Event &event, TMVA::DecisionTreeNode *node)
fill the existing the decision tree structure by filling event in from the top node and see where the...
UInt_t GetNTargets() const
accessor to the number of targets
void SetNEvents(Float_t nev)
TMatrixT< Double_t > TMatrixD
Bool_t DoRegression() const
Double_t fMinLinCorrForFisher
void SetTotalTreeDepth(Int_t depth)
Float_t GetTarget(UInt_t itgt) const
Int_t GetNodeType(void) const
void SetSubTreeR(Double_t r)
virtual void SetLeft(Node *l)
void SetAlpha(Double_t alpha)
UInt_t CleanTree(DecisionTreeNode *node=NULL)
remove those last splits that result in two leaf nodes that are both of the type (i.e.
void SetSampleMin(UInt_t ivar, Float_t xmin)
set the minimum of variable ivar from the training sample that pass/end up in this node ...
void SetCutValue(Float_t c)
void GetRandomisedVariables(Bool_t *useVariable, UInt_t *variableMap, UInt_t &nVars)
Implementation of a Decision Tree.
Double_t TrainNodeFull(const EventConstList &eventSample, DecisionTreeNode *node)
train a node by finding the single optimal cut for a single variable that best separates signal and b...
void SetParentTreeInNodes(Node *n=NULL)
descend a tree to find all its leaf nodes, fill max depth reached in the tree at the same time...
void SetPurity(void)
return the S/(S+B) (purity) for the node REM: even if nodes with purity 0.01 are very PURE background...
bool almost_equal_float(float x, float y, int ulp=4)
void SetCutType(Bool_t t)
void IncrementNSigEvents_unweighted()
An interface to calculate the "SeparationGain" for different separation criteria used in various trai...
virtual void ReadXML(void *node, UInt_t tmva_Version_Code=TMVA_VERSION_CODE)
read attributes from XML
void PruneNodeInPlace(TMVA::DecisionTreeNode *node)
prune a node temporarily (without actually deleting its descendants which allows testing the pruned t...
TMVA::DecisionTreeNode * GetEventNode(const TMVA::Event &e) const
get the pointer to the leaf node where a particular event ends up in...
void ApplyValidationSample(const EventConstList *validationSample) const
run the validation sample through the (pruned) tree and fill in the nodes the variables NSValidation ...
constexpr Double_t E()
Base of natural log: .
virtual Double_t GetSeparationIndex(const Double_t s, const Double_t b)=0
bool almost_equal_double(double x, double y, int ulp=4)
static void SetVarIndex(Int_t iVar)
void AddToSumTarget2(Float_t t2)
Float_t GetPurity(void) const
void SetSampleMax(UInt_t ivar, Float_t xmax)
set the maximum of variable ivar from the training sample that pass/end up in this node ...
virtual Bool_t GoesRight(const Event &) const
test event if it descends the tree at this node to the right
Double_t GetNBValidation() const
Node * GetNode(ULong_t sequence, UInt_t depth)
retrieve node from the tree.
void IncrementNSigEvents(Float_t s)
void SetNodeType(Int_t t)
void ClearTree()
clear the tree nodes (their S/N, Nevents etc), just keep the structure of the tree ...
void SetAlphaMinSubtree(Double_t g)
Types::EAnalysisType fAnalysisType
static DecisionTree * CreateFromXML(void *node, UInt_t tmva_Version_Code=TMVA_VERSION_CODE)
re-create a new tree (decision tree or search tree) from XML
void SetNEvents_unboosted(Float_t nev)
static constexpr double s
you should not use this method at all Int_t Int_t Double_t Double_t Double_t e
UInt_t GetTotalTreeDepth() const
VariableInfo & GetVariableInfo(Int_t i)
void SetNSigEvents_unboosted(Float_t s)
void SetTerminal(Bool_t s=kTRUE)
RegressionVariance * fRegType
void SetNSigEvents(Float_t s)
UInt_t CountNodes(Node *n=NULL)
return the number of nodes in the tree. (make a new count –> takes time)
void SetNBkgEvents_unboosted(Float_t b)
SeparationBase * fSepType
void SetNBkgEvents_unweighted(Float_t b)
void IncrementNBkgEvents_unweighted()
Double_t PruneTree(const EventConstList *validationSample=NULL)
prune (get rid of internal nodes) the Decision tree to avoid overtraining several different pruning m...
auto MapReduce(F func, unsigned nTimes, R redfunc) -> typename std::result_of< F()>::type
This method behaves just like Map, but an additional redfunc function must be provided.
Node * GetRightDaughter(Node *n)
get right daughter node current node "n"
Node for the BinarySearch or Decision Trees.
auto Map(F func, unsigned nTimes) -> std::vector< typename std::result_of< F()>::type >
Execute func (with no arguments) nTimes in parallel.
Float_t GetResponse(void) const
Double_t GetNSValidation() const
Short_t Max(Short_t a, Short_t b)
Double_t GetOriginalWeight() const
UInt_t BuildTree(const EventConstList &eventSample, DecisionTreeNode *node=NULL)
building the decision tree by recursively calling the splitting of one (root-) node into two daughter...
you should not use this method at all Int_t Int_t Double_t Double_t Double_t Int_t Double_t Double_t Double_t Double_t b
virtual DecisionTreeNode * GetLeft() const
virtual DecisionTreeNode * GetRight() const
Node * GetLeftDaughter(Node *n)
get left daughter node current node "n"
void ClearNodeAndAllDaughters()
clear the nodes (their S/N, Nevents etc), just keep the structure of the tree
void SetSeparationIndex(Float_t sep)
Short_t GetSelector() const
virtual Int_t Poisson(Double_t mean)
Generates a random integer N according to a Poisson law.
Double_t Sqrt(Double_t x)
DecisionTree(void)
default constructor using the GiniIndex as separation criterion, no restrictions on minium number of ...
Double_t GetPruneStrength() const
static constexpr double sr
void SetNEvents_unweighted(Float_t nev)
void PruneNode(TMVA::DecisionTreeNode *node)
prune away the subtree below the node