97 return std::abs(
x-
y) < std::numeric_limits<float>::epsilon() * std::abs(
x+
y) * ulp
99 || std::abs(
x-
y) < std::numeric_limits<float>::min();
105 return std::abs(
x-
y) < std::numeric_limits<double>::epsilon() * std::abs(
x+
y) * ulp
107 || std::abs(
x-
y) < std::numeric_limits<double>::min();
120 fMinLinCorrForFisher (1),
121 fUseExclusiveVars (
kTRUE),
129 fPruneMethod (kNoPruning),
130 fNNodesBeforePruning(0),
131 fNodePurityLimit(0.5),
139 fAnalysisType (
Types::kClassification),
157 fMinLinCorrForFisher (1),
158 fUseExclusiveVars (
kTRUE),
162 fMinNodeSize (minSize),
166 fPruneMethod (kNoPruning),
167 fNNodesBeforePruning(0),
168 fNodePurityLimit(purityLimit),
169 fRandomisedTree (randomisedTree),
170 fUseNvars (useNvars),
171 fUsePoissonNvars(usePoissonNvars),
173 fMaxDepth (nMaxDepth),
176 fAnalysisType (
Types::kClassification),
177 fDataSetInfo (dataInfo)
179 if (sepType == NULL) {
186 Log() << kWARNING <<
" You had chosen the training mode using optimal cuts, not\n"
187 <<
" based on a grid of " <<
fNCuts <<
" by setting the option NCuts < 0\n"
188 <<
" as this doesn't exist yet, I set it to " <<
fNCuts <<
" and use the grid"
204 fUseFisherCuts (
d.fUseFisherCuts),
205 fMinLinCorrForFisher (
d.fMinLinCorrForFisher),
206 fUseExclusiveVars (
d.fUseExclusiveVars),
207 fSepType (
d.fSepType),
208 fRegType (
d.fRegType),
209 fMinSize (
d.fMinSize),
210 fMinNodeSize(
d.fMinNodeSize),
211 fMinSepGain (
d.fMinSepGain),
212 fUseSearchTree (
d.fUseSearchTree),
213 fPruneStrength (
d.fPruneStrength),
214 fPruneMethod (
d.fPruneMethod),
215 fNodePurityLimit(
d.fNodePurityLimit),
216 fRandomisedTree (
d.fRandomisedTree),
217 fUseNvars (
d.fUseNvars),
218 fUsePoissonNvars(
d.fUsePoissonNvars),
219 fMyTrandom (new
TRandom3(fgRandomSeed)),
220 fMaxDepth (
d.fMaxDepth),
221 fSigClass (
d.fSigClass),
223 fAnalysisType(
d.fAnalysisType),
224 fDataSetInfo (
d.fDataSetInfo)
240 if (fMyTrandom)
delete fMyTrandom;
241 if (fRegType)
delete fRegType;
253 Log() << kFATAL <<
"SetParentTreeNodes: started with undefined ROOT node" <<
Endl;
258 if ((this->GetLeftDaughter(
n) == NULL) && (this->GetRightDaughter(
n) != NULL) ) {
259 Log() << kFATAL <<
" Node with only one daughter?? Something went wrong" <<
Endl;
261 }
else if ((this->GetLeftDaughter(
n) != NULL) && (this->GetRightDaughter(
n) == NULL) ) {
262 Log() << kFATAL <<
" Node with only one daughter?? Something went wrong" <<
Endl;
266 if (this->GetLeftDaughter(
n) != NULL) {
267 this->SetParentTreeInNodes( this->GetLeftDaughter(
n) );
269 if (this->GetRightDaughter(
n) != NULL) {
270 this->SetParentTreeInNodes( this->GetRightDaughter(
n) );
273 n->SetParentTree(
this);
274 if (
n->GetDepth() > this->GetTotalTreeDepth()) this->SetTotalTreeDepth(
n->GetDepth());
282 std::string
type(
"");
286 dt->
ReadXML( node, tmva_Version_Code );
302 xmin = std::vector<Float_t>(nvars);
303 xmax = std::vector<Float_t>(nvars);
306 for (
Int_t ivar=0; ivar<fNvars; ivar++) {
316 xmin = std::vector<Float_t>(nvars);
317 xmax = std::vector<Float_t>(nvars);
320 for (
Int_t ivar=0; ivar<fNvars; ivar++) {
321 xmin[ivar]=inxmin[ivar];
322 xmax[ivar]=inxmax[ivar];
344 if(nvars != other.
nvars)
346 std::cout <<
"!!! ERROR BuildNodeInfo1+BuildNodeInfo2 failure. Nvars1 != Nvars2." << std::endl;
350 ret.
suw = suw + other.
suw;
351 ret.
sub = sub + other.
sub;
353 ret.
buw = buw + other.
buw;
354 ret.
bub = bub + other.
bub;
359 for(
Int_t i=0; i<nvars; i++)
386 this->GetRoot()->SetPos(
's');
387 this->GetRoot()->SetDepth(0);
388 this->GetRoot()->SetParentTree(
this);
389 fMinSize = fMinNodeSize/100. * eventSample.size();
391 Log() << kDEBUG <<
"\tThe minimal node size MinNodeSize=" << fMinNodeSize <<
" fMinNodeSize="<<fMinNodeSize<<
"% is translated to an actual number of events = "<< fMinSize<<
" for the training sample size of " << eventSample.size() <<
Endl;
392 Log() << kDEBUG <<
"\tNote: This number will be taken as absolute minimum in the node, " <<
Endl;
393 Log() << kDEBUG <<
" \tin terms of 'weighted events' and unweighted ones !! " <<
Endl;
397 UInt_t nevents = eventSample.size();
400 if (fNvars==0) fNvars = eventSample[0]->GetNVariables();
401 fVariableImportance.resize(fNvars);
403 else Log() << kFATAL <<
":<BuildTree> eventsample Size == 0 " <<
Endl;
414 auto f = [
this, &eventSample, &nPartitions](
UInt_t partition = 0){
416 Int_t start = 1.0*partition/nPartitions*eventSample.size();
417 Int_t end = (partition+1.0)/nPartitions*eventSample.size();
421 for(
Int_t iev=start; iev<end; iev++){
426 nodeInfof.
s += weight;
428 nodeInfof.
sub += orgWeight;
431 nodeInfof.
b += weight;
433 nodeInfof.
bub += orgWeight;
435 if ( DoRegression() ) {
437 nodeInfof.
target +=weight*tgt;
438 nodeInfof.
target2+=weight*tgt*tgt;
442 for (
UInt_t ivar=0; ivar<fNvars; ivar++) {
445 nodeInfof.
xmin[ivar]=val;
446 nodeInfof.
xmax[ivar]=val;
448 if (val < nodeInfof.
xmin[ivar]) nodeInfof.
xmin[ivar]=val;
449 if (val > nodeInfof.
xmax[ivar]) nodeInfof.
xmax[ivar]=val;
459 auto redfunc = [nodeInfoInit](std::vector<BuildNodeInfo>
v) ->
BuildNodeInfo {
return std::accumulate(
v.begin(),
v.end(), nodeInfoInit); };
463 if (nodeInfo.
s+nodeInfo.
b < 0) {
464 Log() << kWARNING <<
" One of the Decision Tree nodes has negative total number of signal or background events. "
465 <<
"(Nsig="<<nodeInfo.
s<<
" Nbkg="<<nodeInfo.
b<<
" Probaby you use a Monte Carlo with negative weights. That should in principle "
466 <<
"be fine as long as on average you end up with something positive. For this you have to make sure that the "
467 <<
"minimal number of (unweighted) events demanded for a tree node (currently you use: MinNodeSize="<<fMinNodeSize
468 <<
"% of training events, you can set this via the BDT option string when booking the classifier) is large enough "
469 <<
"to allow for reasonable averaging!!!" <<
Endl
470 <<
" If this does not help.. maybe you want to try the option: NoNegWeightsInTraining which ignores events "
471 <<
"with negative weight in the training." <<
Endl;
473 for (
UInt_t i=0; i<eventSample.size(); i++) {
474 if (eventSample[i]->GetClass() != fSigClass) {
475 nBkg += eventSample[i]->GetWeight();
476 Log() << kDEBUG <<
"Event "<< i<<
" has (original) weight: " << eventSample[i]->GetWeight()/eventSample[i]->GetBoostWeight()
477 <<
" boostWeight: " << eventSample[i]->GetBoostWeight() <<
Endl;
480 Log() << kDEBUG <<
" that gives in total: " << nBkg<<
Endl;
490 if (node == this->GetRoot()) {
497 for (
UInt_t ivar=0; ivar<fNvars; ivar++) {
513 if ((eventSample.size() >= 2*fMinSize && nodeInfo.
s+nodeInfo.
b >= 2*fMinSize) && node->
GetDepth() < fMaxDepth
514 && ( ( nodeInfo.
s!=0 && nodeInfo.
b !=0 && !DoRegression()) || ( (nodeInfo.
s+nodeInfo.
b)!=0 && DoRegression()) ) ) {
519 separationGain = this->TrainNodeFast(eventSample, node);
522 separationGain = this->TrainNodeFull(eventSample, node);
526 if (separationGain < std::numeric_limits<double>::epsilon()) {
529 if (DoRegression()) {
543 if (node->
GetDepth() > this->GetTotalTreeDepth()) this->SetTotalTreeDepth(node->
GetDepth());
548 std::vector<const TMVA::Event*> leftSample; leftSample.reserve(nevents);
549 std::vector<const TMVA::Event*> rightSample; rightSample.reserve(nevents);
552 Double_t nRightUnBoosted=0, nLeftUnBoosted=0;
554 for (
UInt_t ie=0; ie< nevents ; ie++) {
556 rightSample.push_back(eventSample[ie]);
557 nRight += eventSample[ie]->GetWeight();
558 nRightUnBoosted += eventSample[ie]->GetOriginalWeight();
561 leftSample.push_back(eventSample[ie]);
562 nLeft += eventSample[ie]->GetWeight();
563 nLeftUnBoosted += eventSample[ie]->GetOriginalWeight();
567 if (leftSample.empty() || rightSample.empty()) {
569 Log() << kERROR <<
"<TrainNode> all events went to the same branch" <<
Endl
570 <<
"--- Hence new node == old node ... check" <<
Endl
571 <<
"--- left:" << leftSample.size()
572 <<
" right:" << rightSample.size() <<
Endl
573 <<
" while the separation is thought to be " << separationGain
574 <<
"\n when cutting on variable " << node->
GetSelector()
576 << kFATAL <<
"--- this should never happen, please write a bug report to Helge.Voss@cern.ch" <<
Endl;
597 this->BuildTree(rightSample, rightNode);
598 this->BuildTree(leftSample, leftNode );
603 if (DoRegression()) {
624 if (node->
GetDepth() > this->GetTotalTreeDepth()) this->SetTotalTreeDepth(node->
GetDepth());
643 this->GetRoot()->SetPos(
's');
644 this->GetRoot()->SetDepth(0);
645 this->GetRoot()->SetParentTree(
this);
646 fMinSize = fMinNodeSize/100. * eventSample.size();
648 Log() << kDEBUG <<
"\tThe minimal node size MinNodeSize=" << fMinNodeSize <<
" fMinNodeSize="<<fMinNodeSize<<
"% is translated to an actual number of events = "<< fMinSize<<
" for the training sample size of " << eventSample.size() <<
Endl;
649 Log() << kDEBUG <<
"\tNote: This number will be taken as absolute minimum in the node, " <<
Endl;
650 Log() << kDEBUG <<
" \tin terms of 'weighted events' and unweighted ones !! " <<
Endl;
654 UInt_t nevents = eventSample.size();
657 if (fNvars==0) fNvars = eventSample[0]->GetNVariables();
658 fVariableImportance.resize(fNvars);
660 else Log() <<
kFATAL <<
":<BuildTree> eventsample Size == 0 " <<
Endl;
670 for (
UInt_t ivar=0; ivar<fNvars; ivar++) {
676 for (
UInt_t iev=0; iev<eventSample.size(); iev++) {
690 if ( DoRegression() ) {
693 target2+=weight*tgt*tgt;
697 for (
UInt_t ivar=0; ivar<fNvars; ivar++) {
699 if (iev==0)
xmin[ivar]=
xmax[ivar]=val;
700 if (val <
xmin[ivar])
xmin[ivar]=val;
701 if (val >
xmax[ivar])
xmax[ivar]=val;
707 Log() <<
kWARNING <<
" One of the Decision Tree nodes has negative total number of signal or background events. "
708 <<
"(Nsig="<<s<<
" Nbkg="<<
b<<
" Probaby you use a Monte Carlo with negative weights. That should in principle "
709 <<
"be fine as long as on average you end up with something positive. For this you have to make sure that the "
710 <<
"minimul number of (unweighted) events demanded for a tree node (currently you use: MinNodeSize="<<fMinNodeSize
711 <<
"% of training events, you can set this via the BDT option string when booking the classifier) is large enough "
712 <<
"to allow for reasonable averaging!!!" <<
Endl
713 <<
" If this does not help.. maybe you want to try the option: NoNegWeightsInTraining which ignores events "
714 <<
"with negative weight in the training." <<
Endl;
716 for (
UInt_t i=0; i<eventSample.size(); i++) {
717 if (eventSample[i]->
GetClass() != fSigClass) {
718 nBkg += eventSample[i]->GetWeight();
719 Log() <<
kDEBUG <<
"Event "<< i<<
" has (original) weight: " << eventSample[i]->GetWeight()/eventSample[i]->GetBoostWeight()
720 <<
" boostWeight: " << eventSample[i]->GetBoostWeight() <<
Endl;
733 if (node == this->GetRoot()) {
740 for (
UInt_t ivar=0; ivar<fNvars; ivar++) {
759 if ((eventSample.size() >= 2*fMinSize && s+
b >= 2*fMinSize) && node->
GetDepth() < fMaxDepth
760 && ( ( s!=0 &&
b !=0 && !DoRegression()) || ( (s+
b)!=0 && DoRegression()) ) ) {
763 separationGain = this->TrainNodeFast(eventSample, node);
765 separationGain = this->TrainNodeFull(eventSample, node);
767 if (separationGain < std::numeric_limits<double>::epsilon()) {
771 if (DoRegression()) {
786 if (node->
GetDepth() > this->GetTotalTreeDepth()) this->SetTotalTreeDepth(node->
GetDepth());
790 std::vector<const TMVA::Event*> leftSample; leftSample.reserve(nevents);
791 std::vector<const TMVA::Event*> rightSample; rightSample.reserve(nevents);
794 Double_t nRightUnBoosted=0, nLeftUnBoosted=0;
796 for (
UInt_t ie=0; ie< nevents ; ie++) {
798 rightSample.push_back(eventSample[ie]);
799 nRight += eventSample[ie]->GetWeight();
800 nRightUnBoosted += eventSample[ie]->GetOriginalWeight();
803 leftSample.push_back(eventSample[ie]);
804 nLeft += eventSample[ie]->GetWeight();
805 nLeftUnBoosted += eventSample[ie]->GetOriginalWeight();
810 if (leftSample.empty() || rightSample.empty()) {
812 Log() <<
kERROR <<
"<TrainNode> all events went to the same branch" <<
Endl
813 <<
"--- Hence new node == old node ... check" <<
Endl
814 <<
"--- left:" << leftSample.size()
815 <<
" right:" << rightSample.size() <<
Endl
816 <<
" while the separation is thought to be " << separationGain
817 <<
"\n when cutting on variable " << node->
GetSelector()
819 <<
kFATAL <<
"--- this should never happen, please write a bug report to Helge.Voss@cern.ch" <<
Endl;
840 this->BuildTree(rightSample, rightNode);
841 this->BuildTree(leftSample, leftNode );
846 if (DoRegression()) {
867 if (node->
GetDepth() > this->GetTotalTreeDepth()) this->SetTotalTreeDepth(node->
GetDepth());
882 for (
UInt_t i=0; i<eventSample.size(); i++) {
883 this->FillEvent(*(eventSample[i]),NULL);
895 node = this->GetRoot();
901 if (
event.GetClass() == fSigClass) {
925 if (this->GetRoot()!=NULL) this->GetRoot()->ClearNodeAndAllDaughters();
940 node = this->GetRoot();
949 if (
l->GetNodeType() *
r->GetNodeType() > 0) {
951 this->PruneNode(node);
955 return this->CountNodes();
969 if( fPruneMethod == kNoPruning )
return 0.0;
971 if (fPruneMethod == kExpectedErrorPruning)
974 else if (fPruneMethod == kCostComplexityPruning)
979 Log() << kFATAL <<
"Selected pruning method not yet implemented "
983 if(!tool)
return 0.0;
987 if(validationSample == NULL){
988 Log() << kFATAL <<
"Cannot automate the pruning algorithm without an "
989 <<
"independent validation sample!" <<
Endl;
990 }
else if(validationSample->size() == 0) {
991 Log() << kFATAL <<
"Cannot automate the pruning algorithm with "
992 <<
"independent validation sample of ZERO events!" <<
Endl;
999 Log() << kFATAL <<
"Error pruning tree! Check prune.log for more information."
1019 return pruneStrength;
1031 GetRoot()->ResetValidationData();
1032 for (
UInt_t ievt=0; ievt < validationSample->size(); ievt++) {
1033 CheckEventWithPrunedTree((*validationSample)[ievt]);
1046 n = this->GetRoot();
1048 Log() << kFATAL <<
"TestPrunedTreeQuality: started with undefined ROOT node" <<
Endl;
1053 if(
n->GetLeft() != NULL &&
n->GetRight() != NULL && !
n->IsTerminal() ) {
1054 return (TestPrunedTreeQuality(
n->GetLeft(),
mode ) +
1055 TestPrunedTreeQuality(
n->GetRight(),
mode ));
1058 if (DoRegression()) {
1059 Double_t sumw =
n->GetNSValidation() +
n->GetNBValidation();
1060 return n->GetSumTarget2() - 2*
n->GetSumTarget()*
n->GetResponse() + sumw*
n->GetResponse()*
n->GetResponse();
1064 if (
n->GetPurity() > this->GetNodePurityLimit())
1065 return n->GetNBValidation();
1067 return n->GetNSValidation();
1069 else if (
mode == 1 ) {
1071 return (
n->GetPurity() *
n->GetNBValidation() + (1.0 -
n->GetPurity()) *
n->GetNSValidation());
1074 throw std::string(
"Unknown ValidationQualityMode");
1088 if (current == NULL) {
1089 Log() << kFATAL <<
"CheckEventWithPrunedTree: started with undefined ROOT node" <<
Endl;
1092 while(current != NULL) {
1093 if(
e->GetClass() == fSigClass)
1098 if (
e->GetNTargets() > 0) {
1121 for( EventConstList::const_iterator it = validationSample->begin();
1122 it != validationSample->end(); ++it ) {
1123 sumWeights += (*it)->GetWeight();
1134 n = this->GetRoot();
1136 Log() << kFATAL <<
"CountLeafNodes: started with undefined ROOT node" <<
Endl;
1143 if ((this->GetLeftDaughter(
n) == NULL) && (this->GetRightDaughter(
n) == NULL) ) {
1147 if (this->GetLeftDaughter(
n) != NULL) {
1148 countLeafs += this->CountLeafNodes( this->GetLeftDaughter(
n) );
1150 if (this->GetRightDaughter(
n) != NULL) {
1151 countLeafs += this->CountLeafNodes( this->GetRightDaughter(
n) );
1163 n = this->GetRoot();
1165 Log() << kFATAL <<
"DescendTree: started with undefined ROOT node" <<
Endl;
1170 if ((this->GetLeftDaughter(
n) == NULL) && (this->GetRightDaughter(
n) == NULL) ) {
1173 else if ((this->GetLeftDaughter(
n) == NULL) && (this->GetRightDaughter(
n) != NULL) ) {
1174 Log() << kFATAL <<
" Node with only one daughter?? Something went wrong" <<
Endl;
1177 else if ((this->GetLeftDaughter(
n) != NULL) && (this->GetRightDaughter(
n) == NULL) ) {
1178 Log() << kFATAL <<
" Node with only one daughter?? Something went wrong" <<
Endl;
1182 if (this->GetLeftDaughter(
n) != NULL) {
1183 this->DescendTree( this->GetLeftDaughter(
n) );
1185 if (this->GetRightDaughter(
n) != NULL) {
1186 this->DescendTree( this->GetRightDaughter(
n) );
1205 this->DeleteNode(
l);
1206 this->DeleteNode(
r);
1218 if(node == NULL)
return;
1221 node->
SetAlpha( std::numeric_limits<double>::infinity( ) );
1233 Node* current = this->GetRoot();
1235 for (
UInt_t i =0; i < depth; i++) {
1237 if ( tmp & sequence) current = this->GetRightDaughter(current);
1238 else current = this->GetLeftDaughter(current);
1248 for (
UInt_t ivar=0; ivar<fNvars; ivar++) useVariable[ivar]=
kFALSE;
1254 else useNvars = fUseNvars;
1256 UInt_t nSelectedVars = 0;
1257 while (nSelectedVars < useNvars) {
1258 Double_t bla = fMyTrandom->Rndm()*fNvars;
1261 for (
UInt_t ivar=0; ivar < fNvars; ivar++) {
1262 if (useVariable[ivar] ==
kTRUE) {
1263 mapVariable[nSelectedVars] = ivar;
1268 if (nSelectedVars != useNvars) { std::cout <<
"Bug in TrainNode - GetRandisedVariables()... sorry" << std::endl; std::exit(1);}
1285 nSelS = std::vector< std::vector<Double_t> >(cNvars);
1286 nSelB = std::vector< std::vector<Double_t> >(cNvars);
1287 nSelS_unWeighted = std::vector< std::vector<Double_t> >(cNvars);
1288 nSelB_unWeighted = std::vector< std::vector<Double_t> >(cNvars);
1289 target = std::vector< std::vector<Double_t> >(cNvars);
1290 target2 = std::vector< std::vector<Double_t> >(cNvars);
1292 for(
Int_t ivar=0; ivar<cNvars; ivar++){
1293 nSelS[ivar] = std::vector<Double_t>(nBins[ivar], 0);
1294 nSelB[ivar] = std::vector<Double_t>(nBins[ivar], 0);
1295 nSelS_unWeighted[ivar] = std::vector<Double_t>(nBins[ivar], 0);
1296 nSelB_unWeighted[ivar] = std::vector<Double_t>(nBins[ivar], 0);
1297 target[ivar] = std::vector<Double_t>(nBins[ivar], 0);
1298 target2[ivar] = std::vector<Double_t>(nBins[ivar], 0);
1320 std::vector< std::vector<Double_t> >
nSelS;
1321 std::vector< std::vector<Double_t> >
nSelB;
1334 if(cNvars != other.
cNvars)
1336 std::cout <<
"!!! ERROR TrainNodeInfo1+TrainNodeInfo2 failure. cNvars1 != cNvars2." << std::endl;
1341 for (
Int_t ivar=0; ivar<cNvars; ivar++) {
1342 for (
UInt_t ibin=0; ibin<nBins[ivar]; ibin++) {
1343 ret.
nSelS[ivar][ibin] = nSelS[ivar][ibin] + other.
nSelS[ivar][ibin];
1344 ret.
nSelB[ivar][ibin] = nSelB[ivar][ibin] + other.
nSelB[ivar][ibin];
1348 ret.
target2[ivar][ibin] = target2[ivar][ibin] + other.
target2[ivar][ibin];
1383 for (
UInt_t ivar=0; ivar <= fNvars; ivar++) {
1384 separationGain[ivar]=-1;
1390 UInt_t nevents = eventSample.size();
1398 std::vector<Double_t> fisherCoeff;
1401 if (fRandomisedTree) {
1403 GetRandomisedVariables(useVariable,mapVariable,tmp);
1406 for (
UInt_t ivar=0; ivar < fNvars; ivar++) {
1407 useVariable[ivar] =
kTRUE;
1408 mapVariable[ivar] = ivar;
1412 useVariable[fNvars] =
kFALSE;
1416 if (fUseFisherCuts) {
1417 useVariable[fNvars] =
kTRUE;
1423 for (
UInt_t ivar=0; ivar < fNvars; ivar++) {
1424 useVarInFisher[ivar] =
kFALSE;
1425 mapVarInFisher[ivar] = ivar;
1428 std::vector<TMatrixDSym*>* covMatrices;
1431 Log() << kWARNING <<
" in TrainNodeFast, the covariance Matrices needed for the Fisher-Cuts returned error --> revert to just normal cuts for this node" <<
Endl;
1439 for (
UInt_t ivar=0; ivar < fNvars; ivar++) {
1440 for (
UInt_t jvar=ivar+1; jvar < fNvars; jvar++) {
1441 if ( (
TMath::Abs( (*s)(ivar, jvar)) > fMinLinCorrForFisher) ||
1442 (
TMath::Abs( (*
b)(ivar, jvar)) > fMinLinCorrForFisher) ){
1443 useVarInFisher[ivar] =
kTRUE;
1444 useVarInFisher[jvar] =
kTRUE;
1452 for (
UInt_t ivar=0; ivar < fNvars; ivar++) {
1455 if (useVarInFisher[ivar] && useVariable[ivar]) {
1456 mapVarInFisher[nFisherVars++]=ivar;
1459 if (fUseExclusiveVars) useVariable[ivar] =
kFALSE;
1464 fisherCoeff = this->GetFisherCoefficients(eventSample, nFisherVars, mapVarInFisher);
1467 delete [] useVarInFisher;
1468 delete [] mapVarInFisher;
1475 if (fUseFisherCuts && fisherOK) cNvars++;
1490 for (
UInt_t ivar=0; ivar<cNvars; ivar++) {
1492 nBins[ivar] = fNCuts+1;
1493 if (ivar < fNvars) {
1494 if (fDataSetInfo->GetVariableInfo(ivar).GetVarType() ==
'I') {
1499 cutValues[ivar] =
new Double_t [nBins[ivar]];
1503 for (
UInt_t ivar=0; ivar < cNvars; ivar++) {
1510 useVariable[ivar]=
kFALSE;
1518 for (
UInt_t iev=0; iev<nevents; iev++) {
1521 for (
UInt_t jvar=0; jvar<fNvars; jvar++)
1522 result += fisherCoeff[jvar]*(eventSample[iev])->GetValueFast(jvar);
1528 for (
UInt_t ibin=0; ibin<nBins[ivar]; ibin++) {
1529 cutValues[ivar][ibin]=0;
1543 auto fvarInitCuts = [
this, &useVariable, &cutValues, &invBinWidth, &binWidth, &nBins, &
xmin, &
xmax](
UInt_t ivar = 0){
1545 if ( useVariable[ivar] ) {
1559 invBinWidth[ivar] = 1./binWidth[ivar];
1560 if (ivar < fNvars) {
1561 if (fDataSetInfo->GetVariableInfo(ivar).GetVarType() ==
'I') { invBinWidth[ivar] = 1; binWidth[ivar] = 1; }
1569 for (
UInt_t icut=0; icut<nBins[ivar]-1; icut++) {
1570 cutValues[ivar][icut]=
xmin[ivar]+(
Double_t(icut+1))*binWidth[ivar];
1587 if(eventSample.size() >= cNvars*fNCuts*nPartitions*2)
1592 auto f = [
this, &eventSample, &fisherCoeff, &useVariable, &invBinWidth,
1593 &nBins, &
xmin, &cNvars, &nPartitions](
UInt_t partition = 0){
1595 UInt_t start = 1.0*partition/nPartitions*eventSample.size();
1596 UInt_t end = (partition+1.0)/nPartitions*eventSample.size();
1600 for(
UInt_t iev=start; iev<end; iev++) {
1602 Double_t eventWeight = eventSample[iev]->GetWeight();
1603 if (eventSample[iev]->GetClass() == fSigClass) {
1604 nodeInfof.
nTotS+=eventWeight;
1607 nodeInfof.
nTotB+=eventWeight;
1613 for (
UInt_t ivar=0; ivar < cNvars; ivar++) {
1616 if ( useVariable[ivar] ) {
1618 if (ivar < fNvars) eventData = eventSample[iev]->GetValueFast(ivar);
1620 eventData = fisherCoeff[fNvars];
1621 for (
UInt_t jvar=0; jvar<fNvars; jvar++)
1622 eventData += fisherCoeff[jvar]*(eventSample[iev])->GetValueFast(jvar);
1628 if (eventSample[iev]->GetClass() == fSigClass) {
1629 nodeInfof.
nSelS[ivar][iBin]+=eventWeight;
1633 nodeInfof.
nSelB[ivar][iBin]+=eventWeight;
1636 if (DoRegression()) {
1637 nodeInfof.
target[ivar][iBin] +=eventWeight*eventSample[iev]->GetTarget(0);
1638 nodeInfof.
target2[ivar][iBin]+=eventWeight*eventSample[iev]->GetTarget(0)*eventSample[iev]->GetTarget(0);
1650 auto redfunc = [nodeInfoInit](std::vector<TrainNodeInfo>
v) ->
TrainNodeInfo {
return std::accumulate(
v.begin(),
v.end(), nodeInfoInit); };
1659 auto fvarFillNodeInfo = [
this, &nodeInfo, &eventSample, &fisherCoeff, &useVariable, &invBinWidth, &nBins, &
xmin](
UInt_t ivar = 0){
1661 for(
UInt_t iev=0; iev<eventSample.size(); iev++) {
1664 Double_t eventWeight = eventSample[iev]->GetWeight();
1668 if (eventSample[iev]->GetClass() == fSigClass) {
1669 nodeInfo.
nTotS+=eventWeight;
1672 nodeInfo.
nTotB+=eventWeight;
1678 if ( useVariable[ivar] ) {
1680 if (ivar < fNvars) eventData = eventSample[iev]->GetValueFast(ivar);
1682 eventData = fisherCoeff[fNvars];
1683 for (
UInt_t jvar=0; jvar<fNvars; jvar++)
1684 eventData += fisherCoeff[jvar]*(eventSample[iev])->GetValueFast(jvar);
1690 if (eventSample[iev]->GetClass() == fSigClass) {
1691 nodeInfo.
nSelS[ivar][iBin]+=eventWeight;
1695 nodeInfo.
nSelB[ivar][iBin]+=eventWeight;
1698 if (DoRegression()) {
1699 nodeInfo.
target[ivar][iBin] +=eventWeight*eventSample[iev]->GetTarget(0);
1700 nodeInfo.
target2[ivar][iBin]+=eventWeight*eventSample[iev]->GetTarget(0)*eventSample[iev]->GetTarget(0);
1713 auto fvarCumulative = [&nodeInfo, &useVariable, &nBins,
this, &eventSample](
UInt_t ivar = 0){
1714 if (useVariable[ivar]) {
1715 for (
UInt_t ibin=1; ibin < nBins[ivar]; ibin++) {
1716 nodeInfo.
nSelS[ivar][ibin]+=nodeInfo.
nSelS[ivar][ibin-1];
1718 nodeInfo.
nSelB[ivar][ibin]+=nodeInfo.
nSelB[ivar][ibin-1];
1720 if (DoRegression()) {
1721 nodeInfo.
target[ivar][ibin] +=nodeInfo.
target[ivar][ibin-1] ;
1726 Log() << kFATAL <<
"Helge, you have a bug ....nodeInfo.nSelS_unw..+nodeInfo.nSelB_unw..= "
1728 <<
" while eventsample size = " << eventSample.size()
1731 double lastBins=nodeInfo.
nSelS[ivar][nBins[ivar]-1] +nodeInfo.
nSelB[ivar][nBins[ivar]-1];
1732 double totalSum=nodeInfo.
nTotS+nodeInfo.
nTotB;
1733 if (
TMath::Abs(lastBins-totalSum)/totalSum>0.01) {
1734 Log() << kFATAL <<
"Helge, you have another bug ....nodeInfo.nSelS+nodeInfo.nSelB= "
1736 <<
" while total number of events = " << totalSum
1747 auto fvarMaxSep = [&nodeInfo, &useVariable,
this, &separationGain, &cutIndex, &nBins] (
UInt_t ivar = 0){
1748 if (useVariable[ivar]) {
1750 for (
UInt_t iBin=0; iBin<nBins[ivar]-1; iBin++) {
1775 if ( ((sl+bl)>=fMinSize && (sr+br)>=fMinSize)
1776 && ((slW+blW)>=fMinSize && (srW+brW)>=fMinSize)
1779 if (DoRegression()) {
1780 sepTmp = fRegType->GetSeparationGain(nodeInfo.
nSelS[ivar][iBin]+nodeInfo.
nSelB[ivar][iBin],
1783 nodeInfo.
target[ivar][nBins[ivar]-1],nodeInfo.
target2[ivar][nBins[ivar]-1]);
1785 sepTmp = fSepType->GetSeparationGain(nodeInfo.
nSelS[ivar][iBin], nodeInfo.
nSelB[ivar][iBin], nodeInfo.
nTotS, nodeInfo.
nTotB);
1787 if (separationGain[ivar] < sepTmp) {
1788 separationGain[ivar] = sepTmp;
1789 cutIndex[ivar] = iBin;
1799 for (
UInt_t ivar=0; ivar < cNvars; ivar++) {
1800 if (useVariable[ivar] ) {
1801 if (separationGainTotal < separationGain[ivar]) {
1802 separationGainTotal = separationGain[ivar];
1809 if (DoRegression()) {
1822 if (nodeInfo.
nSelS[mxVar][cutIndex[mxVar]]/nodeInfo.
nTotS > nodeInfo.
nSelB[mxVar][cutIndex[mxVar]]/nodeInfo.
nTotB) cutType=
kTRUE;
1827 node->
SetCutValue(cutValues[mxVar][cutIndex[mxVar]]);
1830 if (mxVar < (
Int_t) fNvars){
1832 fVariableImportance[mxVar] += separationGainTotal*separationGainTotal * (nodeInfo.
nTotS+nodeInfo.
nTotB) * (nodeInfo.
nTotS+nodeInfo.
nTotB) ;
1839 for (
UInt_t ivar=0; ivar<=fNvars; ivar++) {
1843 fVariableImportance[ivar] += fisherCoeff[ivar]*fisherCoeff[ivar]*separationGainTotal*separationGainTotal * (nodeInfo.
nTotS+nodeInfo.
nTotB) * (nodeInfo.
nTotS+nodeInfo.
nTotB) ;
1849 separationGainTotal = 0;
1855 for (
UInt_t i=0; i<cNvars; i++) {
1862 delete [] cutValues[i];
1873 delete [] cutValues;
1878 delete [] useVariable;
1879 delete [] mapVariable;
1881 delete [] separationGain;
1886 delete [] invBinWidth;
1888 return separationGainTotal;
1897 Double_t separationGainTotal = -1, sepTmp;
1902 for (
UInt_t ivar=0; ivar <= fNvars; ivar++) {
1903 separationGain[ivar]=-1;
1910 Int_t nTotS_unWeighted, nTotB_unWeighted;
1911 UInt_t nevents = eventSample.size();
1919 std::vector<Double_t> fisherCoeff;
1922 if (fRandomisedTree) {
1924 GetRandomisedVariables(useVariable,mapVariable,tmp);
1927 for (
UInt_t ivar=0; ivar < fNvars; ivar++) {
1928 useVariable[ivar] =
kTRUE;
1929 mapVariable[ivar] = ivar;
1933 useVariable[fNvars] =
kFALSE;
1937 if (fUseFisherCuts) {
1938 useVariable[fNvars] =
kTRUE;
1944 for (
UInt_t ivar=0; ivar < fNvars; ivar++) {
1945 useVarInFisher[ivar] =
kFALSE;
1946 mapVarInFisher[ivar] = ivar;
1949 std::vector<TMatrixDSym*>* covMatrices;
1952 Log() <<
kWARNING <<
" in TrainNodeFast, the covariance Matrices needed for the Fisher-Cuts returned error --> revert to just normal cuts for this node" <<
Endl;
1960 for (
UInt_t ivar=0; ivar < fNvars; ivar++) {
1961 for (
UInt_t jvar=ivar+1; jvar < fNvars; jvar++) {
1962 if ( (
TMath::Abs( (*s)(ivar, jvar)) > fMinLinCorrForFisher) ||
1963 (
TMath::Abs( (*
b)(ivar, jvar)) > fMinLinCorrForFisher) ){
1964 useVarInFisher[ivar] =
kTRUE;
1965 useVarInFisher[jvar] =
kTRUE;
1973 for (
UInt_t ivar=0; ivar < fNvars; ivar++) {
1976 if (useVarInFisher[ivar] && useVariable[ivar]) {
1977 mapVarInFisher[nFisherVars++]=ivar;
1980 if (fUseExclusiveVars) useVariable[ivar] =
kFALSE;
1985 fisherCoeff = this->GetFisherCoefficients(eventSample, nFisherVars, mapVarInFisher);
1988 delete [] useVarInFisher;
1989 delete [] mapVarInFisher;
1996 if (fUseFisherCuts && fisherOK) cNvars++;
2013 for (
UInt_t ivar=0; ivar<cNvars; ivar++) {
2015 nBins[ivar] = fNCuts+1;
2016 if (ivar < fNvars) {
2017 if (fDataSetInfo->GetVariableInfo(ivar).GetVarType() ==
'I') {
2024 nSelS[ivar] =
new Double_t [nBins[ivar]];
2025 nSelB[ivar] =
new Double_t [nBins[ivar]];
2026 nSelS_unWeighted[ivar] =
new Double_t [nBins[ivar]];
2027 nSelB_unWeighted[ivar] =
new Double_t [nBins[ivar]];
2029 target2[ivar] =
new Double_t [nBins[ivar]];
2030 cutValues[ivar] =
new Double_t [nBins[ivar]];
2039 for (
UInt_t ivar=0; ivar < cNvars; ivar++) {
2046 useVariable[ivar]=
kFALSE;
2054 for (
UInt_t iev=0; iev<nevents; iev++) {
2057 for (
UInt_t jvar=0; jvar<fNvars; jvar++)
2058 result += fisherCoeff[jvar]*(eventSample[iev])->GetValueFast(jvar);
2063 for (
UInt_t ibin=0; ibin<nBins[ivar]; ibin++) {
2064 nSelS[ivar][ibin]=0;
2065 nSelB[ivar][ibin]=0;
2066 nSelS_unWeighted[ivar][ibin]=0;
2067 nSelB_unWeighted[ivar][ibin]=0;
2069 target2[ivar][ibin]=0;
2070 cutValues[ivar][ibin]=0;
2077 for (
UInt_t ivar=0; ivar < cNvars; ivar++) {
2079 if ( useVariable[ivar] ) {
2093 invBinWidth[ivar] = 1./binWidth[ivar];
2094 if (ivar < fNvars) {
2095 if (fDataSetInfo->GetVariableInfo(ivar).GetVarType() ==
'I') { invBinWidth[ivar] = 1; binWidth[ivar] = 1; }
2103 for (
UInt_t icut=0; icut<nBins[ivar]-1; icut++) {
2104 cutValues[ivar][icut]=
xmin[ivar]+(
Double_t(icut+1))*binWidth[ivar];
2112 nTotS_unWeighted=0; nTotB_unWeighted=0;
2113 for (
UInt_t iev=0; iev<nevents; iev++) {
2115 Double_t eventWeight = eventSample[iev]->GetWeight();
2116 if (eventSample[iev]->
GetClass() == fSigClass) {
2118 nTotS_unWeighted++; }
2126 for (
UInt_t ivar=0; ivar < cNvars; ivar++) {
2129 if ( useVariable[ivar] ) {
2131 if (ivar < fNvars) eventData = eventSample[iev]->GetValueFast(ivar);
2133 eventData = fisherCoeff[fNvars];
2134 for (
UInt_t jvar=0; jvar<fNvars; jvar++)
2135 eventData += fisherCoeff[jvar]*(eventSample[iev])->GetValueFast(jvar);
2141 if (eventSample[iev]->
GetClass() == fSigClass) {
2142 nSelS[ivar][iBin]+=eventWeight;
2143 nSelS_unWeighted[ivar][iBin]++;
2146 nSelB[ivar][iBin]+=eventWeight;
2147 nSelB_unWeighted[ivar][iBin]++;
2149 if (DoRegression()) {
2150 target[ivar][iBin] +=eventWeight*eventSample[iev]->GetTarget(0);
2151 target2[ivar][iBin]+=eventWeight*eventSample[iev]->GetTarget(0)*eventSample[iev]->GetTarget(0);
2158 for (
UInt_t ivar=0; ivar < cNvars; ivar++) {
2159 if (useVariable[ivar]) {
2160 for (
UInt_t ibin=1; ibin < nBins[ivar]; ibin++) {
2161 nSelS[ivar][ibin]+=nSelS[ivar][ibin-1];
2162 nSelS_unWeighted[ivar][ibin]+=nSelS_unWeighted[ivar][ibin-1];
2163 nSelB[ivar][ibin]+=nSelB[ivar][ibin-1];
2164 nSelB_unWeighted[ivar][ibin]+=nSelB_unWeighted[ivar][ibin-1];
2165 if (DoRegression()) {
2167 target2[ivar][ibin]+=target2[ivar][ibin-1];
2170 if (nSelS_unWeighted[ivar][nBins[ivar]-1] +nSelB_unWeighted[ivar][nBins[ivar]-1] != eventSample.size()) {
2171 Log() <<
kFATAL <<
"Helge, you have a bug ....nSelS_unw..+nSelB_unw..= "
2172 << nSelS_unWeighted[ivar][nBins[ivar]-1] +nSelB_unWeighted[ivar][nBins[ivar]-1]
2173 <<
" while eventsample size = " << eventSample.size()
2176 double lastBins=nSelS[ivar][nBins[ivar]-1] +nSelB[ivar][nBins[ivar]-1];
2177 double totalSum=nTotS+nTotB;
2178 if (
TMath::Abs(lastBins-totalSum)/totalSum>0.01) {
2179 Log() <<
kFATAL <<
"Helge, you have another bug ....nSelS+nSelB= "
2181 <<
" while total number of events = " << totalSum
2189 for (
UInt_t ivar=0; ivar < cNvars; ivar++) {
2190 if (useVariable[ivar]) {
2191 for (
UInt_t iBin=0; iBin<nBins[ivar]-1; iBin++) {
2203 Double_t sl = nSelS_unWeighted[ivar][iBin];
2204 Double_t bl = nSelB_unWeighted[ivar][iBin];
2216 if ( ((sl+bl)>=fMinSize && (sr+br)>=fMinSize)
2217 && ((slW+blW)>=fMinSize && (srW+brW)>=fMinSize)
2220 if (DoRegression()) {
2221 sepTmp = fRegType->GetSeparationGain(nSelS[ivar][iBin]+nSelB[ivar][iBin],
2222 target[ivar][iBin],target2[ivar][iBin],
2224 target[ivar][nBins[ivar]-1],target2[ivar][nBins[ivar]-1]);
2226 sepTmp = fSepType->GetSeparationGain(nSelS[ivar][iBin], nSelB[ivar][iBin], nTotS, nTotB);
2228 if (separationGain[ivar] < sepTmp) {
2229 separationGain[ivar] = sepTmp;
2230 cutIndex[ivar] = iBin;
2238 for (
UInt_t ivar=0; ivar < cNvars; ivar++) {
2239 if (useVariable[ivar] ) {
2240 if (separationGainTotal < separationGain[ivar]) {
2241 separationGainTotal = separationGain[ivar];
2248 if (DoRegression()) {
2249 node->
SetSeparationIndex(fRegType->GetSeparationIndex(nTotS+nTotB,
target[0][nBins[mxVar]-1],target2[0][nBins[mxVar]-1]));
2251 if (
almost_equal_double(target2[0][nBins[mxVar]-1]/(nTotS+nTotB),
target[0][nBins[mxVar]-1]/(nTotS+nTotB)*
target[0][nBins[mxVar]-1]/(nTotS+nTotB))) {
2254 node->
SetRMS(
TMath::Sqrt(target2[0][nBins[mxVar]-1]/(nTotS+nTotB) -
target[0][nBins[mxVar]-1]/(nTotS+nTotB)*
target[0][nBins[mxVar]-1]/(nTotS+nTotB)));
2260 if (nSelS[mxVar][cutIndex[mxVar]]/nTotS > nSelB[mxVar][cutIndex[mxVar]]/nTotB) cutType=
kTRUE;
2265 node->
SetCutValue(cutValues[mxVar][cutIndex[mxVar]]);
2268 if (mxVar < (
Int_t) fNvars){
2270 fVariableImportance[mxVar] += separationGainTotal*separationGainTotal * (nTotS+nTotB) * (nTotS+nTotB) ;
2277 for (
UInt_t ivar=0; ivar<=fNvars; ivar++) {
2281 fVariableImportance[ivar] += fisherCoeff[ivar]*fisherCoeff[ivar]*separationGainTotal*separationGainTotal * (nTotS+nTotB) * (nTotS+nTotB) ;
2287 separationGainTotal = 0;
2303 for (
UInt_t i=0; i<cNvars; i++) {
2306 delete [] nSelS_unWeighted[i];
2307 delete [] nSelB_unWeighted[i];
2309 delete [] target2[i];
2310 delete [] cutValues[i];
2314 delete [] nSelS_unWeighted;
2315 delete [] nSelB_unWeighted;
2318 delete [] cutValues;
2323 delete [] useVariable;
2324 delete [] mapVariable;
2326 delete [] separationGain;
2331 delete [] invBinWidth;
2333 return separationGainTotal;
2343 std::vector<Double_t> fisherCoeff(fNvars+1);
2366 for (
UInt_t ivar=0; ivar<nFisherVars; ivar++) { sumS[ivar] = sumB[ivar] = 0; }
2368 UInt_t nevents = eventSample.size();
2370 for (
UInt_t ievt=0; ievt<nevents; ievt++) {
2373 const Event * ev = eventSample[ievt];
2377 if (ev->
GetClass() == fSigClass) sumOfWeightsS += weight;
2378 else sumOfWeightsB += weight;
2381 for (
UInt_t ivar=0; ivar<nFisherVars; ivar++) {
2385 for (
UInt_t ivar=0; ivar<nFisherVars; ivar++) {
2386 (*meanMatx)( ivar, 2 ) = sumS[ivar];
2387 (*meanMatx)( ivar, 0 ) = sumS[ivar]/sumOfWeightsS;
2389 (*meanMatx)( ivar, 2 ) += sumB[ivar];
2390 (*meanMatx)( ivar, 1 ) = sumB[ivar]/sumOfWeightsB;
2393 (*meanMatx)( ivar, 2 ) /= (sumOfWeightsS + sumOfWeightsB);
2405 assert( sumOfWeightsS > 0 && sumOfWeightsB > 0 );
2409 const Int_t nFisherVars2 = nFisherVars*nFisherVars;
2413 memset(sum2Sig,0,nFisherVars2*
sizeof(
Double_t));
2414 memset(sum2Bgd,0,nFisherVars2*
sizeof(
Double_t));
2417 for (
UInt_t ievt=0; ievt<nevents; ievt++) {
2421 const Event* ev = eventSample.at(ievt);
2431 if ( ev->
GetClass() == fSigClass ) sum2Sig[k] += ( (xval[
x] - (*meanMatx)(
x, 0))*(xval[
y] - (*meanMatx)(
y, 0)) )*weight;
2432 else sum2Bgd[k] += ( (xval[
x] - (*meanMatx)(
x, 1))*(xval[
y] - (*meanMatx)(
y, 1)) )*weight;
2440 (*with)(
x,
y) = sum2Sig[k]/sumOfWeightsS + sum2Bgd[k]/sumOfWeightsB;
2460 prodSig = ( ((*meanMatx)(
x, 0) - (*meanMatx)(
x, 2))*
2461 ((*meanMatx)(
y, 0) - (*meanMatx)(
y, 2)) );
2462 prodBgd = ( ((*meanMatx)(
x, 1) - (*meanMatx)(
x, 2))*
2463 ((*meanMatx)(
y, 1) - (*meanMatx)(
y, 2)) );
2465 (*betw)(
x,
y) = (sumOfWeightsS*prodSig + sumOfWeightsB*prodBgd) / (sumOfWeightsS + sumOfWeightsB);
2474 (*cov)(
x,
y) = (*with)(
x,
y) + (*betw)(
x,
y);
2489 Log() << kWARNING <<
"FisherCoeff matrix is almost singular with determinant="
2491 <<
" did you use the variables that are linear combinations or highly correlated?"
2495 Log() << kFATAL <<
"FisherCoeff matrix is singular with determinant="
2497 <<
" did you use the variables that are linear combinations?"
2504 Double_t xfact =
TMath::Sqrt( sumOfWeightsS*sumOfWeightsB ) / (sumOfWeightsS + sumOfWeightsB);
2507 std::vector<Double_t> diffMeans( nFisherVars );
2509 for (
UInt_t ivar=0; ivar<=fNvars; ivar++) fisherCoeff[ivar] = 0;
2510 for (
UInt_t ivar=0; ivar<nFisherVars; ivar++) {
2511 for (
UInt_t jvar=0; jvar<nFisherVars; jvar++) {
2512 Double_t d = (*meanMatx)(jvar, 0) - (*meanMatx)(jvar, 1);
2513 fisherCoeff[mapVarInFisher[ivar]] += invCov(ivar, jvar)*
d;
2517 fisherCoeff[mapVarInFisher[ivar]] *= xfact;
2522 for (
UInt_t ivar=0; ivar<nFisherVars; ivar++){
2523 f0 += fisherCoeff[mapVarInFisher[ivar]]*((*meanMatx)(ivar, 0) + (*meanMatx)(ivar, 1));
2527 fisherCoeff[fNvars] = f0;
2540 Int_t nTotS_unWeighted = 0, nTotB_unWeighted = 0;
2542 std::vector<TMVA::BDTEventWrapper> bdtEventSample;
2546 std::vector<Double_t> lCutValue( fNvars, 0.0 );
2547 std::vector<Double_t> lSepGain( fNvars, -1.0e6 );
2548 std::vector<Char_t> lCutType( fNvars );
2553 for( std::vector<const TMVA::Event*>::const_iterator it = eventSample.begin(); it != eventSample.end(); ++it ) {
2554 if((*it)->GetClass() == fSigClass) {
2555 nTotS += (*it)->GetWeight();
2559 nTotB += (*it)->GetWeight();
2565 std::vector<Char_t> useVariable(fNvars);
2569 if (fRandomisedTree) {
2570 if (fUseNvars ==0 ) {
2574 Int_t nSelectedVars = 0;
2575 while (nSelectedVars < fUseNvars) {
2576 Double_t bla = fMyTrandom->Rndm()*fNvars;
2579 for (
UInt_t ivar=0; ivar < fNvars; ivar++) {
2580 if(useVariable[ivar] ==
Char_t(
kTRUE)) nSelectedVars++;
2587 for(
UInt_t ivar = 0; ivar < fNvars; ivar++ ) {
2588 if(!useVariable[ivar])
continue;
2592 std::sort( bdtEventSample.begin(),bdtEventSample.end() );
2595 Double_t bkgWeightCtr = 0.0, sigWeightCtr = 0.0;
2597 std::vector<TMVA::BDTEventWrapper>::iterator it = bdtEventSample.begin(), it_end = bdtEventSample.end();
2598 for( ; it != it_end; ++it ) {
2599 if((**it)->GetClass() == fSigClass )
2600 sigWeightCtr += (**it)->GetWeight();
2602 bkgWeightCtr += (**it)->GetWeight();
2604 it->SetCumulativeWeight(
false,bkgWeightCtr);
2605 it->SetCumulativeWeight(
true,sigWeightCtr);
2611 Double_t separationGain = -1.0, sepTmp = 0.0, cutValue = 0.0, dVal = 0.0, norm = 0.0;
2614 for( it = bdtEventSample.begin(); it != it_end; ++it ) {
2616 if( *(*it) == NULL ) {
2617 Log() << kFATAL <<
"In TrainNodeFull(): have a null event! Where index="
2621 dVal = bdtEventSample[
index].GetVal() - bdtEventSample[
index-1].GetVal();
2625 if(
index >= fMinSize && (nTotS_unWeighted + nTotB_unWeighted) -
index >= fMinSize &&
TMath::Abs(dVal/(0.5*norm + 1)) > fPMin ) {
2627 sepTmp = fSepType->GetSeparationGain( it->GetCumulativeWeight(
true), it->GetCumulativeWeight(
false), sigWeightCtr, bkgWeightCtr );
2628 if( sepTmp > separationGain ) {
2629 separationGain = sepTmp;
2630 cutValue = it->GetVal() - 0.5*dVal;
2631 Double_t nSelS = it->GetCumulativeWeight(
true);
2632 Double_t nSelB = it->GetCumulativeWeight(
false);
2635 if( nSelS/sigWeightCtr > nSelB/bkgWeightCtr ) cutType =
kTRUE;
2641 lCutType[ivar] =
Char_t(cutType);
2642 lCutValue[ivar] = cutValue;
2643 lSepGain[ivar] = separationGain;
2646 Int_t iVarIndex = -1;
2647 for(
UInt_t ivar = 0; ivar < fNvars; ivar++ ) {
2648 if( lSepGain[ivar] > separationGain ) {
2650 separationGain = lSepGain[ivar];
2655 if(iVarIndex >= 0) {
2660 fVariableImportance[iVarIndex] += separationGain*separationGain * (nTotS+nTotB) * (nTotS+nTotB);
2663 separationGain = 0.0;
2666 return separationGain;
2694 Log() << kFATAL <<
"CheckEvent: started with undefined ROOT node" <<
Endl;
2703 Log() << kFATAL <<
"DT::CheckEvent: inconsistent tree structure" <<
Endl;
2708 if (DoRegression()) {
2724 Double_t sumsig=0, sumbkg=0, sumtot=0;
2725 for (
UInt_t ievt=0; ievt<eventSample.size(); ievt++) {
2726 if (eventSample[ievt]->GetClass() != fSigClass) sumbkg+=eventSample[ievt]->GetWeight();
2727 else sumsig+=eventSample[ievt]->GetWeight();
2728 sumtot+=eventSample[ievt]->GetWeight();
2731 if (sumtot!= (sumsig+sumbkg)){
2732 Log() << kFATAL <<
"<SamplePurity> sumtot != sumsig+sumbkg"
2733 << sumtot <<
" " << sumsig <<
" " << sumbkg <<
Endl;
2735 if (sumtot>0)
return sumsig/(sumsig + sumbkg);
2747 std::vector<Double_t> relativeImportance(fNvars);
2749 for (
UInt_t i=0; i< fNvars; i++) {
2750 sum += fVariableImportance[i];
2751 relativeImportance[i] = fVariableImportance[i];
2754 for (
UInt_t i=0; i< fNvars; i++) {
2755 if (
sum > std::numeric_limits<double>::epsilon())
2756 relativeImportance[i] /=
sum;
2758 relativeImportance[i] = 0;
2760 return relativeImportance;
2768 std::vector<Double_t> relativeImportance = this->GetVariableImportance();
2769 if (ivar < fNvars)
return relativeImportance[ivar];
2771 Log() << kFATAL <<
"<GetVariableImportance>" <<
Endl
2772 <<
"--- ivar = " << ivar <<
" is out of range " <<
Endl;
bool almost_equal_double(double x, double y, int ulp=4)
bool almost_equal_float(float x, float y, int ulp=4)
Option_t Option_t TPoint TPoint const char GetTextMagnitude GetFillStyle GetLineColor GetLineWidth GetMarkerStyle GetTextAlign GetTextColor GetTextSize void char Point_t Rectangle_t WindowAttributes_t Float_t Float_t Float_t Int_t Int_t UInt_t UInt_t Rectangle_t Int_t Int_t Window_t TString Int_t GCValues_t GetPrimarySelectionOwner GetDisplay GetScreen GetColormap GetNativeEvent const char const char dpyName wid window const char font_name cursor keysym reg const char only_if_exist regb h Point_t winding char text const char depth char const char Int_t count const char ColorStruct_t color const char Pixmap_t Pixmap_t PictureAttributes_t attr const char char ret_data h unsigned char height h Atom_t Int_t ULong_t ULong_t unsigned char prop_list Atom_t Atom_t target
Option_t Option_t TPoint TPoint const char GetTextMagnitude GetFillStyle GetLineColor GetLineWidth GetMarkerStyle GetTextAlign GetTextColor GetTextSize void char Point_t Rectangle_t WindowAttributes_t Float_t r
Option_t Option_t TPoint TPoint const char GetTextMagnitude GetFillStyle GetLineColor GetLineWidth GetMarkerStyle GetTextAlign GetTextColor GetTextSize void char Point_t Rectangle_t WindowAttributes_t Float_t Float_t Float_t Int_t Int_t UInt_t UInt_t Rectangle_t result
Option_t Option_t TPoint TPoint const char GetTextMagnitude GetFillStyle GetLineColor GetLineWidth GetMarkerStyle GetTextAlign GetTextColor GetTextSize void char Point_t Rectangle_t WindowAttributes_t index
Option_t Option_t TPoint TPoint const char mode
Option_t Option_t TPoint TPoint const char GetTextMagnitude GetFillStyle GetLineColor GetLineWidth GetMarkerStyle GetTextAlign GetTextColor GetTextSize void char Point_t Rectangle_t WindowAttributes_t Float_t Float_t Float_t Int_t Int_t UInt_t UInt_t Rectangle_t Int_t Int_t Window_t TString Int_t GCValues_t GetPrimarySelectionOwner GetDisplay GetScreen GetColormap GetNativeEvent const char const char dpyName wid window const char font_name cursor keysym reg const char only_if_exist regb h Point_t winding char text const char depth char const char Int_t count const char ColorStruct_t color const char Pixmap_t Pixmap_t PictureAttributes_t attr const char char ret_data h unsigned char height h Atom_t Int_t ULong_t ULong_t unsigned char prop_list Atom_t Atom_t Atom_t Time_t type
TMatrixT< Double_t > TMatrixD
static void SetVarIndex(Int_t iVar)
Base class for BinarySearch and Decision Trees.
UInt_t fNNodes
total number of nodes in the tree (counted)
virtual void ReadXML(void *node, UInt_t tmva_Version_Code=262657)
read attributes from XML
Executor & GetThreadExecutor()
Get executor class for multi-thread usage In case when MT is not enabled will return a serial executo...
static Config & Instance()
static function: returns TMVA instance
Class that contains all the data information.
void SetNEvents_unweighted(Float_t nev)
set the number of unweighted events that entered the node (during training)
void SetNodeType(Int_t t)
set node type: 1 signal node, -1 bkg leave, 0 intermediate Node
void SetSeparationGain(Float_t sep)
set the separation, or information gained BY this nodes selection
void SetNBkgEvents(Float_t b)
set the sum of the backgr weights in the node
void SetCutType(Bool_t t)
set true: if event variable > cutValue ==> signal , false otherwise
Double_t GetNSValidation() const
void IncrementNEvents_unweighted()
increment the number of events that entered the node (during training)
void SetFisherCoeff(Int_t ivar, Double_t coeff)
set fisher coefficients
void SetNSigEvents_unboosted(Float_t s)
set the sum of the unboosted signal events in the node
void SetAlphaMinSubtree(Double_t g)
void IncrementNBkgEvents(Float_t b)
increment the sum of the backgr weights in the node
void SetNEvents_unboosted(Float_t nev)
set the number of unboosted events that entered the node (during training)
Float_t GetNSigEvents(void) const
return the sum of the signal weights in the node
virtual void SetLeft(Node *l)
void SetTerminal(Bool_t s=kTRUE)
void SetResponse(Float_t r)
set the response of the node (for regression)
void SetSampleMax(UInt_t ivar, Float_t xmax)
set the maximum of variable ivar from the training sample that pass/end up in this node
void SetNBValidation(Double_t b)
void IncrementNEvents(Float_t nev)
void SetPurity(void)
return the S/(S+B) (purity) for the node REM: even if nodes with purity 0.01 are very PURE background...
void SetSubTreeR(Double_t r)
void AddToSumTarget2(Float_t t2)
virtual DecisionTreeNode * GetLeft() const
Double_t GetNodeR() const
virtual Bool_t GoesRight(const Event &) const
test event if it descends the tree at this node to the right
void SetNFisherCoeff(Int_t nvars)
Short_t GetSelector() const
return index of variable used for discrimination at this node
void SetNSigEvents(Float_t s)
set the sum of the signal weights in the node
Float_t GetResponse(void) const
return the response of the node (for regression)
Float_t GetCutValue(void) const
return the cut value applied at this node
Int_t GetNodeType(void) const
return node type: 1 signal node, -1 bkg leave, 0 intermediate Node
void IncrementNBkgEvents_unweighted()
increment the sum of the backgr weights in the node
void SetNSigEvents_unweighted(Float_t s)
set the sum of the unweighted signal events in the node
Double_t GetNBValidation() const
void SetAlpha(Double_t alpha)
void SetSeparationIndex(Float_t sep)
set the chosen index, measure of "purity" (separation between S and B) AT this node
virtual void SetRight(Node *r)
void SetRMS(Float_t r)
set the RMS of the response of the node (for regression)
void IncrementNSigEvents_unweighted()
increment the sum of the signal weights in the node
void SetNBkgEvents_unboosted(Float_t b)
set the sum of the unboosted backgr events in the node
Float_t GetPurity(void) const
return S/(S+B) (purity) at this node (from training)
void IncrementNSigEvents(Float_t s)
increment the sum of the signal weights in the node
Float_t GetSampleMax(UInt_t ivar) const
return the maximum of variable ivar from the training sample that pass/end up in this node
void SetCutValue(Float_t c)
set the cut value applied at this node
Float_t GetNBkgEvents(void) const
return the sum of the backgr weights in the node
Float_t GetSampleMin(UInt_t ivar) const
return the minimum of variable ivar from the training sample that pass/end up in this node
void SetSampleMin(UInt_t ivar, Float_t xmin)
set the minimum of variable ivar from the training sample that pass/end up in this node
void SetSelector(Short_t i)
set index of variable used for discrimination at this node
virtual DecisionTreeNode * GetParent() const
void SetNBkgEvents_unweighted(Float_t b)
set the sum of the unweighted backgr events in the node
void SetNSValidation(Double_t s)
void AddToSumTarget(Float_t t)
void SetNTerminal(Int_t n)
void SetNEvents(Float_t nev)
set the number of events that entered the node (during training)
virtual DecisionTreeNode * GetRight() const
Implementation of a Decision Tree.
UInt_t BuildTree(const EventConstList &eventSample, DecisionTreeNode *node=nullptr)
building the decision tree by recursively calling the splitting of one (root-) node into two daughter...
void FillTree(const EventList &eventSample)
fill the existing the decision tree structure by filling event in from the top node and see where the...
void PruneNode(TMVA::DecisionTreeNode *node)
prune away the subtree below the node
void ApplyValidationSample(const EventConstList *validationSample) const
run the validation sample through the (pruned) tree and fill in the nodes the variables NSValidation ...
Double_t TrainNodeFull(const EventConstList &eventSample, DecisionTreeNode *node)
train a node by finding the single optimal cut for a single variable that best separates signal and b...
TMVA::DecisionTreeNode * GetEventNode(const TMVA::Event &e) const
get the pointer to the leaf node where a particular event ends up in... (used in gradient boosting)
void GetRandomisedVariables(Bool_t *useVariable, UInt_t *variableMap, UInt_t &nVars)
void SetParentTreeInNodes(Node *n=nullptr)
descend a tree to find all its leaf nodes, fill max depth reached in the tree at the same time.
void DescendTree(Node *n=nullptr)
descend a tree to find all its leaf nodes
static DecisionTree * CreateFromXML(void *node, UInt_t tmva_Version_Code=262657)
re-create a new tree (decision tree or search tree) from XML
std::vector< const TMVA::Event * > EventConstList
Double_t CheckEvent(const TMVA::Event *, Bool_t UseYesNoLeaf=kFALSE) const
the event e is put into the decision tree (starting at the root node) and the output is NodeType (sig...
static const Int_t fgRandomSeed
Int_t fNCuts
number of grid point in variable cut scans
UInt_t CleanTree(DecisionTreeNode *node=nullptr)
remove those last splits that result in two leaf nodes that are both of the type (i....
virtual ~DecisionTree(void)
destructor
Types::EAnalysisType fAnalysisType
kClassification(=0=false) or kRegression(=1=true)
std::vector< Double_t > GetVariableImportance()
Return the relative variable importance, normalized to all variables together having the importance 1...
void CheckEventWithPrunedTree(const TMVA::Event *) const
pass a single validation event through a pruned decision tree on the way down the tree,...
void PruneNodeInPlace(TMVA::DecisionTreeNode *node)
prune a node temporarily (without actually deleting its descendants which allows testing the pruned t...
Double_t TestPrunedTreeQuality(const DecisionTreeNode *dt=nullptr, Int_t mode=0) const
return the misclassification rate of a pruned tree a "pruned tree" may have set the variable "IsTermi...
Double_t PruneTree(const EventConstList *validationSample=nullptr)
prune (get rid of internal nodes) the Decision tree to avoid overtraining several different pruning m...
void FillEvent(const TMVA::Event &event, TMVA::DecisionTreeNode *node)
fill the existing the decision tree structure by filling event in from the top node and see where the...
void ClearTree()
clear the tree nodes (their S/N, Nevents etc), just keep the structure of the tree
Double_t SamplePurity(EventList eventSample)
calculates the purity S/(S+B) of a given event sample
Node * GetNode(ULong_t sequence, UInt_t depth)
retrieve node from the tree.
std::vector< Double_t > GetFisherCoefficients(const EventConstList &eventSample, UInt_t nFisherVars, UInt_t *mapVarInFisher)
calculate the fisher coefficients for the event sample and the variables used
UInt_t CountLeafNodes(TMVA::Node *n=nullptr)
return the number of terminal nodes in the sub-tree below Node n
Double_t TrainNodeFast(const EventConstList &eventSample, DecisionTreeNode *node)
Decide how to split a node using one of the variables that gives the best separation of signal/backgr...
RegressionVariance * fRegType
the separation criteria used in Regression
DecisionTree(void)
default constructor using the GiniIndex as separation criterion, no restrictions on minium number of ...
Double_t GetSumWeights(const EventConstList *validationSample) const
calculate the normalization factor for a pruning validation sample
Double_t GetOriginalWeight() const
Double_t GetWeight() const
return the event weight - depending on whether the flag IgnoreNegWeightsInTraining is or not.
Float_t GetValueFast(UInt_t ivar) const
Float_t GetTarget(UInt_t itgt) const
auto Map(F func, unsigned nTimes) -> std::vector< InvokeResult_t< F > >
Wrap TExecutor::Map functions.
auto MapReduce(F func, ROOT::TSeq< INTEGER > args, R redfunc) -> InvokeResult_t< F, INTEGER >
Wrap TExecutor::MapReduce functions.
unsigned int GetPoolSize() const
Node for the BinarySearch or Decision Trees.
std::vector< DecisionTreeNode * > PruneSequence
the regularization parameter for pruning
Double_t PruneStrength
quality measure for a pruned subtree T of T_max
Calculate the "SeparationGain" for Regression analysis separation criteria used in various training a...
An interface to calculate the "SeparationGain" for different separation criteria used in various trai...
Singleton class for Global types used by TMVA.
Double_t Determinant() const override
Return the matrix determinant.
TMatrixT< Element > & Invert(Double_t *det=nullptr)
Invert the matrix and calculate its determinant.
Random number generator class based on M.
TSeq< unsigned int > TSeqU
MsgLogger & Endl(MsgLogger &ml)
Short_t Max(Short_t a, Short_t b)
Returns the largest of a and b.
Double_t Log(Double_t x)
Returns the natural logarithm of x.
Double_t Sqrt(Double_t x)
Returns the square root of x.
Short_t Min(Short_t a, Short_t b)
Returns the smallest of a and b.
Short_t Abs(Short_t d)
Returns the absolute value of parameter Short_t d.
BuildNodeInfo(Int_t fNvars, std::vector< Float_t > &inxmin, std::vector< Float_t > &inxmax)
std::vector< Float_t > xmin
BuildNodeInfo operator+(const BuildNodeInfo &other)
std::vector< Float_t > xmax
BuildNodeInfo(Int_t fNvars, const TMVA::Event *evt)
Double_t nTotB_unWeighted
std::vector< std::vector< Double_t > > target2
std::vector< std::vector< Double_t > > nSelB_unWeighted
std::vector< std::vector< Double_t > > nSelB
Double_t nTotS_unWeighted
std::vector< std::vector< Double_t > > target
std::vector< std::vector< Double_t > > nSelS_unWeighted
TrainNodeInfo operator+(const TrainNodeInfo &other)
std::vector< std::vector< Double_t > > nSelS
TrainNodeInfo(Int_t cNvars_, UInt_t *nBins_)
static uint64_t sum(uint64_t i)