103 || std::abs(x-y) < std::numeric_limits<float>::min();
111 || std::abs(x-y) < std::numeric_limits<double>::min();
119 fMinLinCorrForFisher (1),
120 fUseExclusiveVars (
kTRUE),
128 fPruneMethod (kNoPruning),
129 fNNodesBeforePruning(0),
130 fNodePurityLimit(0.5),
138 fAnalysisType (
Types::kClassification),
178 if (sepType ==
NULL) {
185 Log() <<
kWARNING <<
" You had choosen the training mode using optimal cuts, not\n" 186 <<
" based on a grid of " <<
fNCuts <<
" by setting the option NCuts < 0\n" 187 <<
" as this doesn't exist yet, I set it to " <<
fNCuts <<
" and use the grid" 251 Log() <<
kFATAL <<
"SetParentTreeNodes: started with undefined ROOT node" <<
Endl;
257 Log() <<
kFATAL <<
" Node with only one daughter?? Something went wrong" <<
Endl;
260 Log() <<
kFATAL <<
" Node with only one daughter?? Something went wrong" <<
Endl;
280 std::string
type(
"");
284 dt->
ReadXML( node, tmva_Version_Code );
307 Log() <<
kDEBUG <<
"\tThe minimal node size MinNodeSize=" <<
fMinNodeSize <<
" fMinNodeSize="<<
fMinNodeSize<<
"% is translated to an actual number of events = "<<
fMinSize<<
" for the training sample size of " << eventSample.size() <<
Endl;
308 Log() <<
kDEBUG <<
"\tNote: This number will be taken as absolute minimum in the node, " <<
Endl;
309 Log() <<
kDEBUG <<
" \tin terms of 'weighted events' and unweighted ones !! " <<
Endl;
313 UInt_t nevents = eventSample.size();
316 if (
fNvars==0)
fNvars = eventSample[0]->GetNVariables();
319 else Log() <<
kFATAL <<
":<BuildTree> eventsample Size == 0 " <<
Endl;
328 xmin[ivar]=xmax[ivar]=0;
330 for (
UInt_t iev=0; iev<eventSample.size(); iev++) {
347 target2+=weight*tgt*tgt;
352 if (iev==0) xmin[ivar]=xmax[ivar]=val;
353 if (val < xmin[ivar]) xmin[ivar]=val;
354 if (val > xmax[ivar]) xmax[ivar]=val;
360 Log() <<
kWARNING <<
" One of the Decision Tree nodes has negative total number of signal or background events. " 361 <<
"(Nsig="<<s<<
" Nbkg="<<
b<<
" Probaby you use a Monte Carlo with negative weights. That should in principle " 362 <<
"be fine as long as on average you end up with something positive. For this you have to make sure that the " 363 <<
"minimul number of (unweighted) events demanded for a tree node (currently you use: MinNodeSize="<<
fMinNodeSize 364 <<
"% of training events, you can set this via the BDT option string when booking the classifier) is large enough " 365 <<
"to allow for reasonable averaging!!!" << Endl
366 <<
" If this does not help.. maybe you want to try the option: NoNegWeightsInTraining which ignores events " 367 <<
"with negative weight in the training." <<
Endl;
369 for (
UInt_t i=0; i<eventSample.size(); i++) {
371 nBkg += eventSample[i]->GetWeight();
372 Log() <<
kDEBUG <<
"Event "<< i<<
" has (original) weight: " << eventSample[i]->GetWeight()/eventSample[i]->GetBoostWeight()
373 <<
" boostWeight: " << eventSample[i]->GetBoostWeight() <<
Endl;
440 std::vector<const TMVA::Event*> leftSample; leftSample.reserve(nevents);
441 std::vector<const TMVA::Event*> rightSample; rightSample.reserve(nevents);
444 Double_t nRightUnBoosted=0, nLeftUnBoosted=0;
446 for (
UInt_t ie=0; ie< nevents ; ie++) {
448 rightSample.push_back(eventSample[ie]);
449 nRight += eventSample[ie]->GetWeight();
450 nRightUnBoosted += eventSample[ie]->GetOriginalWeight();
453 leftSample.push_back(eventSample[ie]);
454 nLeft += eventSample[ie]->GetWeight();
455 nLeftUnBoosted += eventSample[ie]->GetOriginalWeight();
460 if (leftSample.empty() || rightSample.empty()) {
462 Log() <<
kERROR <<
"<TrainNode> all events went to the same branch" << Endl
463 <<
"--- Hence new node == old node ... check" << Endl
464 <<
"--- left:" << leftSample.size()
465 <<
" right:" << rightSample.size() << Endl
466 <<
" while the separation is thought to be " << separationGain
467 <<
"\n when cutting on variable " << node->
GetSelector()
469 <<
kFATAL <<
"--- this should never happen, please write a bug report to Helge.Voss@cern.ch" <<
Endl;
533 for (
UInt_t i=0; i<eventSample.size(); i++) {
567 this->
FillEvent(event,dynamic_cast<TMVA::DecisionTreeNode*>(node->
GetLeft())) ;
636 Log() <<
kFATAL <<
"Selected pruning method not yet implemented " 640 if(!tool)
return 0.0;
644 if(validationSample ==
NULL){
645 Log() <<
kFATAL <<
"Cannot automate the pruning algorithm without an " 646 <<
"independent validation sample!" <<
Endl;
647 }
else if(validationSample->size() == 0) {
648 Log() <<
kFATAL <<
"Cannot automate the pruning algorithm with " 649 <<
"independent validation sample of ZERO events!" <<
Endl;
656 Log() <<
kFATAL <<
"Error pruning tree! Check prune.log for more information." 676 return pruneStrength;
689 for (
UInt_t ievt=0; ievt < validationSample->size(); ievt++) {
705 Log() <<
kFATAL <<
"TestPrunedTreeQuality: started with undefined ROOT node" <<
Endl;
726 else if ( mode == 1 ) {
731 throw std::string(
"Unknown ValidationQualityMode");
745 if (current ==
NULL) {
746 Log() <<
kFATAL <<
"CheckEventWithPrunedTree: started with undefined ROOT node" <<
Endl;
749 while(current !=
NULL) {
778 for( EventConstList::const_iterator it = validationSample->begin();
779 it != validationSample->end(); ++it ) {
780 sumWeights += (*it)->GetWeight();
795 Log() <<
kFATAL <<
"CountLeafNodes: started with undefined ROOT node" <<
Endl;
824 Log() <<
kFATAL <<
"DescendTree: started with undefined ROOT node" <<
Endl;
833 Log() <<
kFATAL <<
" Node with only one daughter?? Something went wrong" <<
Endl;
837 Log() <<
kFATAL <<
" Node with only one daughter?? Something went wrong" <<
Endl;
877 if(node ==
NULL)
return;
880 node->
SetAlpha( std::numeric_limits<double>::infinity( ) );
894 for (
UInt_t i =0; i < depth; i++) {
917 while (nSelectedVars < useNvars) {
922 if (useVariable[ivar] ==
kTRUE) {
923 mapVariable[nSelectedVars] = ivar;
928 if (nSelectedVars != useNvars) { std::cout <<
"Bug in TrainNode - GetRandisedVariables()... sorry" << std::endl; std::exit(1);}
943 Double_t separationGainTotal = -1, sepTmp;
948 separationGain[ivar]=-1;
954 Int_t nTotS_unWeighted, nTotB_unWeighted;
955 UInt_t nevents = eventSample.size();
963 std::vector<Double_t> fisherCoeff;
971 useVariable[ivar] =
kTRUE;
972 mapVariable[ivar] = ivar;
986 useVarInFisher[ivar] =
kFALSE;
987 mapVarInFisher[ivar] = ivar;
990 std::vector<TMatrixDSym*>* covMatrices;
993 Log() <<
kWARNING <<
" in TrainNodeFast, the covariance Matrices needed for the Fisher-Cuts returned error --> revert to just normal cuts for this node" <<
Endl;
1005 useVarInFisher[ivar] =
kTRUE;
1006 useVarInFisher[jvar] =
kTRUE;
1017 if (useVarInFisher[ivar] && useVariable[ivar]) {
1018 mapVarInFisher[nFisherVars++]=ivar;
1029 delete [] useVarInFisher;
1030 delete [] mapVarInFisher;
1050 for (
UInt_t ivar=0; ivar<cNvars; ivar++) {
1058 nSelS[ivar] =
new Double_t [nBins[ivar]];
1059 nSelB[ivar] =
new Double_t [nBins[ivar]];
1060 nSelS_unWeighted[ivar] =
new Double_t [nBins[ivar]];
1061 nSelB_unWeighted[ivar] =
new Double_t [nBins[ivar]];
1062 target[ivar] =
new Double_t [nBins[ivar]];
1063 target2[ivar] =
new Double_t [nBins[ivar]];
1064 cutValues[ivar] =
new Double_t [nBins[ivar]];
1071 for (
UInt_t ivar=0; ivar < cNvars; ivar++) {
1078 useVariable[ivar]=
kFALSE;
1086 for (
UInt_t iev=0; iev<nevents; iev++) {
1090 result += fisherCoeff[jvar]*(eventSample[iev])->GetValue(jvar);
1091 if (result > xmax[ivar]) xmax[ivar]=
result;
1092 if (result < xmin[ivar]) xmin[ivar]=
result;
1095 for (
UInt_t ibin=0; ibin<nBins[ivar]; ibin++) {
1096 nSelS[ivar][ibin]=0;
1097 nSelB[ivar][ibin]=0;
1098 nSelS_unWeighted[ivar][ibin]=0;
1099 nSelB_unWeighted[ivar][ibin]=0;
1100 target[ivar][ibin]=0;
1101 target2[ivar][ibin]=0;
1102 cutValues[ivar][ibin]=0;
1107 for (
UInt_t ivar=0; ivar < cNvars; ivar++) {
1109 if ( useVariable[ivar] ) {
1122 binWidth[ivar] = ( xmax[ivar] - xmin[ivar] ) /
Double_t(nBins[ivar]);
1123 invBinWidth[ivar] = 1./binWidth[ivar];
1133 for (
UInt_t icut=0; icut<nBins[ivar]-1; icut++) {
1134 cutValues[ivar][icut]=xmin[ivar]+(
Double_t(icut+1))*binWidth[ivar];
1141 nTotS_unWeighted=0; nTotB_unWeighted=0;
1142 for (
UInt_t iev=0; iev<nevents; iev++) {
1144 Double_t eventWeight = eventSample[iev]->GetWeight();
1147 nTotS_unWeighted++; }
1154 for (
UInt_t ivar=0; ivar < cNvars; ivar++) {
1157 if ( useVariable[ivar] ) {
1159 if (ivar <
fNvars) eventData = eventSample[iev]->GetValue(ivar);
1161 eventData = fisherCoeff[
fNvars];
1163 eventData += fisherCoeff[jvar]*(eventSample[iev])->GetValue(jvar);
1169 nSelS[ivar][iBin]+=eventWeight;
1170 nSelS_unWeighted[ivar][iBin]++;
1173 nSelB[ivar][iBin]+=eventWeight;
1174 nSelB_unWeighted[ivar][iBin]++;
1177 target[ivar][iBin] +=eventWeight*eventSample[iev]->GetTarget(0);
1178 target2[ivar][iBin]+=eventWeight*eventSample[iev]->GetTarget(0)*eventSample[iev]->GetTarget(0);
1184 for (
UInt_t ivar=0; ivar < cNvars; ivar++) {
1185 if (useVariable[ivar]) {
1186 for (
UInt_t ibin=1; ibin < nBins[ivar]; ibin++) {
1187 nSelS[ivar][ibin]+=nSelS[ivar][ibin-1];
1188 nSelS_unWeighted[ivar][ibin]+=nSelS_unWeighted[ivar][ibin-1];
1189 nSelB[ivar][ibin]+=nSelB[ivar][ibin-1];
1190 nSelB_unWeighted[ivar][ibin]+=nSelB_unWeighted[ivar][ibin-1];
1192 target[ivar][ibin] +=target[ivar][ibin-1] ;
1193 target2[ivar][ibin]+=target2[ivar][ibin-1];
1196 if (nSelS_unWeighted[ivar][nBins[ivar]-1] +nSelB_unWeighted[ivar][nBins[ivar]-1] != eventSample.size()) {
1197 Log() <<
kFATAL <<
"Helge, you have a bug ....nSelS_unw..+nSelB_unw..= " 1198 << nSelS_unWeighted[ivar][nBins[ivar]-1] +nSelB_unWeighted[ivar][nBins[ivar]-1]
1199 <<
" while eventsample size = " << eventSample.size()
1202 double lastBins=nSelS[ivar][nBins[ivar]-1] +nSelB[ivar][nBins[ivar]-1];
1203 double totalSum=nTotS+nTotB;
1204 if (
TMath::Abs(lastBins-totalSum)/totalSum>0.01) {
1205 Log() <<
kFATAL <<
"Helge, you have another bug ....nSelS+nSelB= " 1207 <<
" while total number of events = " << totalSum
1214 for (
UInt_t ivar=0; ivar < cNvars; ivar++) {
1215 if (useVariable[ivar]) {
1216 for (
UInt_t iBin=0; iBin<nBins[ivar]-1; iBin++) {
1228 Double_t sl = nSelS_unWeighted[ivar][iBin];
1229 Double_t bl = nSelB_unWeighted[ivar][iBin];
1247 target[ivar][iBin],target2[ivar][iBin],
1249 target[ivar][nBins[ivar]-1],target2[ivar][nBins[ivar]-1]);
1253 if (separationGain[ivar] < sepTmp) {
1254 separationGain[ivar] = sepTmp;
1255 cutIndex[ivar] = iBin;
1264 for (
UInt_t ivar=0; ivar < cNvars; ivar++) {
1265 if (useVariable[ivar] ) {
1266 if (separationGainTotal < separationGain[ivar]) {
1267 separationGainTotal = separationGain[ivar];
1276 node->
SetResponse(target[0][nBins[mxVar]-1]/(nTotS+nTotB));
1277 if (
almost_equal_double(target2[0][nBins[mxVar]-1]/(nTotS+nTotB), target[0][nBins[mxVar]-1]/(nTotS+nTotB)*target[0][nBins[mxVar]-1]/(nTotS+nTotB))) {
1280 node->
SetRMS(
TMath::Sqrt(target2[0][nBins[mxVar]-1]/(nTotS+nTotB) - target[0][nBins[mxVar]-1]/(nTotS+nTotB)*target[0][nBins[mxVar]-1]/(nTotS+nTotB)));
1286 if (nSelS[mxVar][cutIndex[mxVar]]/nTotS > nSelB[mxVar][cutIndex[mxVar]]/nTotB) cutType=
kTRUE;
1291 node->
SetCutValue(cutValues[mxVar][cutIndex[mxVar]]);
1296 fVariableImportance[mxVar] += separationGainTotal*separationGainTotal * (nTotS+nTotB) * (nTotS+nTotB) ;
1307 fVariableImportance[ivar] += fisherCoeff[ivar]*fisherCoeff[ivar]*separationGainTotal*separationGainTotal * (nTotS+nTotB) * (nTotS+nTotB) ;
1313 separationGainTotal = 0;
1330 for (
UInt_t i=0; i<cNvars; i++) {
1333 delete [] nSelS_unWeighted[i];
1334 delete [] nSelB_unWeighted[i];
1335 delete [] target[i];
1336 delete [] target2[i];
1337 delete [] cutValues[i];
1341 delete [] nSelS_unWeighted;
1342 delete [] nSelB_unWeighted;
1345 delete [] cutValues;
1350 delete [] useVariable;
1351 delete [] mapVariable;
1353 delete [] separationGain;
1358 delete [] invBinWidth;
1360 return separationGainTotal;
1370 std::vector<Double_t> fisherCoeff(
fNvars+1);
1393 for (
UInt_t ivar=0; ivar<nFisherVars; ivar++) { sumS[ivar] = sumB[ivar] = 0; }
1395 UInt_t nevents = eventSample.size();
1397 for (
UInt_t ievt=0; ievt<nevents; ievt++) {
1400 const Event * ev = eventSample[ievt];
1405 else sumOfWeightsB += weight;
1408 for (
UInt_t ivar=0; ivar<nFisherVars; ivar++) {
1409 sum[ivar] += ev->
GetValue( mapVarInFisher[ivar] )*weight;
1412 for (
UInt_t ivar=0; ivar<nFisherVars; ivar++) {
1413 (*meanMatx)( ivar, 2 ) = sumS[ivar];
1414 (*meanMatx)( ivar, 0 ) = sumS[ivar]/sumOfWeightsS;
1416 (*meanMatx)( ivar, 2 ) += sumB[ivar];
1417 (*meanMatx)( ivar, 1 ) = sumB[ivar]/sumOfWeightsB;
1420 (*meanMatx)( ivar, 2 ) /= (sumOfWeightsS + sumOfWeightsB);
1432 assert( sumOfWeightsS > 0 && sumOfWeightsB > 0 );
1436 const Int_t nFisherVars2 = nFisherVars*nFisherVars;
1440 memset(sum2Sig,0,nFisherVars2*
sizeof(
Double_t));
1441 memset(sum2Bgd,0,nFisherVars2*
sizeof(
Double_t));
1444 for (
UInt_t ievt=0; ievt<nevents; ievt++) {
1448 const Event* ev = eventSample.at(ievt);
1458 if ( ev->
GetClass() ==
fSigClass ) sum2Sig[k] += ( (xval[
x] - (*meanMatx)(
x, 0))*(xval[
y] - (*meanMatx)(
y, 0)) )*weight;
1459 else sum2Bgd[k] += ( (xval[
x] - (*meanMatx)(
x, 1))*(xval[
y] - (*meanMatx)(
y, 1)) )*weight;
1467 (*with)(
x,
y) = sum2Sig[k]/sumOfWeightsS + sum2Bgd[k]/sumOfWeightsB;
1487 prodSig = ( ((*meanMatx)(
x, 0) - (*meanMatx)(
x, 2))*
1488 ((*meanMatx)(
y, 0) - (*meanMatx)(
y, 2)) );
1489 prodBgd = ( ((*meanMatx)(
x, 1) - (*meanMatx)(
x, 2))*
1490 ((*meanMatx)(
y, 1) - (*meanMatx)(
y, 2)) );
1492 (*betw)(
x,
y) = (sumOfWeightsS*prodSig + sumOfWeightsB*prodBgd) / (sumOfWeightsS + sumOfWeightsB);
1501 (*cov)(
x,
y) = (*with)(
x,
y) + (*betw)(
x,
y);
1516 Log() <<
kWARNING <<
"FisherCoeff matrix is almost singular with deterninant=" 1518 <<
" did you use the variables that are linear combinations or highly correlated?" 1522 Log() <<
kFATAL <<
"FisherCoeff matrix is singular with determinant=" 1524 <<
" did you use the variables that are linear combinations?" 1531 Double_t xfact =
TMath::Sqrt( sumOfWeightsS*sumOfWeightsB ) / (sumOfWeightsS + sumOfWeightsB);
1534 std::vector<Double_t> diffMeans( nFisherVars );
1536 for (
UInt_t ivar=0; ivar<=
fNvars; ivar++) fisherCoeff[ivar] = 0;
1537 for (
UInt_t ivar=0; ivar<nFisherVars; ivar++) {
1538 for (
UInt_t jvar=0; jvar<nFisherVars; jvar++) {
1539 Double_t d = (*meanMatx)(jvar, 0) - (*meanMatx)(jvar, 1);
1540 fisherCoeff[mapVarInFisher[ivar]] += invCov(ivar, jvar)*d;
1544 fisherCoeff[mapVarInFisher[ivar]] *= xfact;
1549 for (
UInt_t ivar=0; ivar<nFisherVars; ivar++){
1550 f0 += fisherCoeff[mapVarInFisher[ivar]]*((*meanMatx)(ivar, 0) + (*meanMatx)(ivar, 1));
1554 fisherCoeff[
fNvars] = f0;
1568 Int_t nTotS_unWeighted = 0, nTotB_unWeighted = 0;
1570 std::vector<TMVA::BDTEventWrapper> bdtEventSample;
1573 std::vector<Double_t> lCutValue(
fNvars, 0.0 );
1574 std::vector<Double_t> lSepGain(
fNvars, -1.0e6 );
1575 std::vector<Char_t> lCutType(
fNvars );
1580 for( std::vector<const TMVA::Event*>::const_iterator it = eventSample.begin(); it != eventSample.end(); ++it ) {
1582 nTotS += (*it)->GetWeight();
1586 nTotB += (*it)->GetWeight();
1592 std::vector<Char_t> useVariable(
fNvars);
1601 Int_t nSelectedVars = 0;
1607 if(useVariable[ivar] ==
Char_t(
kTRUE)) nSelectedVars++;
1616 if(!useVariable[ivar])
continue;
1618 std::sort( bdtEventSample.begin(),bdtEventSample.end() );
1620 Double_t bkgWeightCtr = 0.0, sigWeightCtr = 0.0;
1621 std::vector<TMVA::BDTEventWrapper>::iterator it = bdtEventSample.begin(), it_end = bdtEventSample.end();
1622 for( ; it != it_end; ++it ) {
1624 sigWeightCtr += (**it)->GetWeight();
1626 bkgWeightCtr += (**it)->GetWeight();
1628 it->SetCumulativeWeight(
false,bkgWeightCtr);
1629 it->SetCumulativeWeight(
true,sigWeightCtr);
1635 Double_t separationGain = -1.0, sepTmp = 0.0, cutValue = 0.0, dVal = 0.0,
norm = 0.0;
1637 for( it = bdtEventSample.begin(); it != it_end; ++it ) {
1638 if( index == 0 ) { ++index;
continue; }
1639 if( *(*it) ==
NULL ) {
1640 Log() <<
kFATAL <<
"In TrainNodeFull(): have a null event! Where index=" 1641 << index <<
", and parent node=" << node->
GetParent() <<
Endl;
1644 dVal = bdtEventSample[index].GetVal() - bdtEventSample[index-1].GetVal();
1645 norm =
TMath::Abs(bdtEventSample[index].GetVal() + bdtEventSample[index-1].GetVal());
1649 sepTmp =
fSepType->
GetSeparationGain( it->GetCumulativeWeight(
true), it->GetCumulativeWeight(
false), sigWeightCtr, bkgWeightCtr );
1650 if( sepTmp > separationGain ) {
1651 separationGain = sepTmp;
1652 cutValue = it->GetVal() - 0.5*dVal;
1653 Double_t nSelS = it->GetCumulativeWeight(
true);
1654 Double_t nSelB = it->GetCumulativeWeight(
false);
1657 if( nSelS/sigWeightCtr > nSelB/bkgWeightCtr ) cutType =
kTRUE;
1663 lCutType[ivar] =
Char_t(cutType);
1664 lCutValue[ivar] = cutValue;
1665 lSepGain[ivar] = separationGain;
1669 Int_t iVarIndex = -1;
1671 if( lSepGain[ivar] > separationGain ) {
1673 separationGain = lSepGain[ivar];
1677 if(iVarIndex >= 0) {
1682 fVariableImportance[iVarIndex] += separationGain*separationGain * (nTotS+nTotB) * (nTotS+nTotB);
1685 separationGain = 0.0;
1688 return separationGain;
1716 Log() <<
kFATAL <<
"CheckEvent: started with undefined ROOT node" <<
Endl;
1725 Log() <<
kFATAL <<
"DT::CheckEvent: inconsistent tree structure" <<
Endl;
1744 Double_t sumsig=0, sumbkg=0, sumtot=0;
1745 for (
UInt_t ievt=0; ievt<eventSample.size(); ievt++) {
1746 if (eventSample[ievt]->
GetClass() !=
fSigClass) sumbkg+=eventSample[ievt]->GetWeight();
1747 else sumsig+=eventSample[ievt]->GetWeight();
1748 sumtot+=eventSample[ievt]->GetWeight();
1751 if (sumtot!= (sumsig+sumbkg)){
1752 Log() <<
kFATAL <<
"<SamplePurity> sumtot != sumsig+sumbkg" 1753 << sumtot <<
" " << sumsig <<
" " << sumbkg <<
Endl;
1755 if (sumtot>0)
return sumsig/(sumsig + sumbkg);
1767 std::vector<Double_t> relativeImportance(
fNvars);
1776 relativeImportance[i] /=
sum;
1778 relativeImportance[i] = 0;
1780 return relativeImportance;
1789 if (ivar <
fNvars)
return relativeImportance[ivar];
1792 <<
"--- ivar = " << ivar <<
" is out of range " <<
Endl;
void SetNTerminal(Int_t n)
Double_t PruneStrength
quality measure for a pruned subtree T of T_max
DataSetInfo * fDataSetInfo
static long int sum(long int i)
Random number generator class based on M.
void SetSelector(Short_t i)
MsgLogger & Endl(MsgLogger &ml)
void SetFisherCoeff(Int_t ivar, Double_t coeff)
set fisher coefficients
virtual Double_t Rndm()
Machine independent random number generator.
Float_t GetSumTarget() const
Double_t CheckEvent(const TMVA::Event *, Bool_t UseYesNoLeaf=kFALSE) const
the event e is put into the decision tree (starting at the root node) and the output is NodeType (sig...
Int_t fNNodesBeforePruning
Double_t GetNodePurityLimit() const
EPruneMethod fPruneMethod
virtual DecisionTreeNode * GetParent() const
void IncrementNEvents_unweighted()
Float_t GetSumTarget2() const
Double_t GetSeparationGain(const Double_t &nLeft, const Double_t &targetLeft, const Double_t &target2Left, const Double_t &nTot, const Double_t &targetTot, const Double_t &target2Tot)
Separation Gain: the measure of how the quality of separation of the sample increases by splitting th...
void IncrementNEvents(Float_t nev)
Float_t GetSampleMax(UInt_t ivar) const
return the maximum of variable ivar from the training sample that pass/end up in this node ...
Short_t Min(Short_t a, Short_t b)
std::vector< Double_t > GetFisherCoefficients(const EventConstList &eventSample, UInt_t nFisherVars, UInt_t *mapVarInFisher)
calculate the fisher coefficients for the event sample and the variables used
std::vector< DecisionTreeNode * > PruneSequence
the regularization parameter for pruning
Bool_t IsTerminal() const
virtual void SetParentTree(TMVA::BinaryTree *t)
Double_t fNodePurityLimit
virtual void SetRight(Node *r)
virtual ~DecisionTree(void)
destructor
Double_t TestPrunedTreeQuality(const DecisionTreeNode *dt=NULL, Int_t mode=0) const
return the misclassification rate of a pruned tree a "pruned tree" may have set the variable "IsTermi...
Float_t GetNSigEvents(void) const
virtual Double_t Determinant() const
Return the matrix determinant.
virtual DecisionTreeNode * GetRoot() const
void CheckEventWithPrunedTree(const TMVA::Event *) const
pass a single validation event throught a pruned decision tree on the way down the tree...
void DeleteNode(Node *)
protected, recursive, function used by the class destructor and when Pruning
void SetNSigEvents_unweighted(Float_t s)
void SetResponse(Float_t r)
void SetNBValidation(Double_t b)
std::vector< Double_t > GetVariableImportance()
Return the relative variable importance, normalized to all variables together having the importance 1...
virtual Double_t GetSeparationIndex(const Double_t &n, const Double_t &target, const Double_t &target2)
Separation Index: a simple Variance.
void SetNFisherCoeff(Int_t nvars)
std::vector< const TMVA::Event * > EventConstList
static const Int_t fgRandomSeed
Float_t GetNBkgEvents(void) const
void FillTree(const EventList &eventSample)
Float_t GetSampleMin(UInt_t ivar) const
return the minimum of variable ivar from the training sample that pass/end up in this node ...
Float_t GetCutValue(void) const
void IncrementNBkgEvents(Float_t b)
Double_t SamplePurity(EventList eventSample)
calculates the purity S/(S+B) of a given event sample
Double_t GetNodeR() const
std::vector< Double_t > fVariableImportance
void SetSeparationGain(Float_t sep)
Double_t GetSumWeights(const EventConstList *validationSample) const
calculate the normalization factor for a pruning validation sample
void ResetValidationData()
temporary stored node values (number of events, etc.) that originate not from the training but from t...
void SetNBkgEvents(Float_t b)
void SetNSValidation(Double_t s)
UInt_t CountLeafNodes(TMVA::Node *n=NULL)
return the number of terminal nodes in the sub-tree below Node n
void AddToSumTarget(Float_t t)
Double_t GetWeight() const
return the event weight - depending on whether the flag IgnoreNegWeightsInTraining is or not...
Double_t TrainNodeFast(const EventConstList &eventSample, DecisionTreeNode *node)
Decide how to split a node using one of the variables that gives the best separation of signal/backgr...
void DescendTree(Node *n=NULL)
descend a tree to find all its leaf nodes
TMatrixT< Element > & Invert(Double_t *det=0)
Invert the matrix and calculate its determinant.
void FillEvent(const TMVA::Event &event, TMVA::DecisionTreeNode *node)
fill the existing the decision tree structure by filling event in from the top node and see where the...
UInt_t GetNTargets() const
accessor to the number of targets
void SetNEvents(Float_t nev)
TMatrixT< Double_t > TMatrixD
Bool_t DoRegression() const
Double_t fMinLinCorrForFisher
void SetTotalTreeDepth(Int_t depth)
Float_t GetTarget(UInt_t itgt) const
Int_t GetNodeType(void) const
void SetSubTreeR(Double_t r)
virtual void SetLeft(Node *l)
void SetAlpha(Double_t alpha)
UInt_t CleanTree(DecisionTreeNode *node=NULL)
remove those last splits that result in two leaf nodes that are both of the type (i.e.
void SetSampleMin(UInt_t ivar, Float_t xmin)
set the minimum of variable ivar from the training sample that pass/end up in this node ...
void SetCutValue(Float_t c)
void GetRandomisedVariables(Bool_t *useVariable, UInt_t *variableMap, UInt_t &nVars)
Double_t TrainNodeFull(const EventConstList &eventSample, DecisionTreeNode *node)
void SetParentTreeInNodes(Node *n=NULL)
descend a tree to find all its leaf nodes, fill max depth reached in the tree at the same time...
void SetPurity(void)
return the S/(S+B) (purity) for the node REM: even if nodes with purity 0.01 are very PURE background...
bool almost_equal_float(float x, float y, int ulp=4)
default constructor using the GiniIndex as separation criterion, no restrictions on minium number of ...
void SetCutType(Bool_t t)
void IncrementNSigEvents_unweighted()
virtual void ReadXML(void *node, UInt_t tmva_Version_Code=TMVA_VERSION_CODE)
read attributes from XML
void PruneNodeInPlace(TMVA::DecisionTreeNode *node)
prune a node temporaily (without actually deleting its decendants which allows testing the pruned tre...
TMVA::DecisionTreeNode * GetEventNode(const TMVA::Event &e) const
get the pointer to the leaf node where a particular event ends up in...
void ApplyValidationSample(const EventConstList *validationSample) const
run the validation sample through the (pruned) tree and fill in the nodes the variables NSValidation ...
bool almost_equal_double(double x, double y, int ulp=4)
Float_t GetValue(UInt_t ivar) const
return value of i'th variable
static void SetVarIndex(Int_t iVar)
void AddToSumTarget2(Float_t t2)
virtual Double_t GetSeparationGain(const Double_t &nSelS, const Double_t &nSelB, const Double_t &nTotS, const Double_t &nTotB)
Separation Gain: the measure of how the quality of separation of the sample increases by splitting th...
Float_t GetPurity(void) const
void SetSampleMax(UInt_t ivar, Float_t xmax)
set the maximum of variable ivar from the training sample that pass/end up in this node ...
virtual Bool_t GoesRight(const Event &) const
test event if it decends the tree at this node to the right
Double_t GetNBValidation() const
Node * GetNode(ULong_t sequence, UInt_t depth)
retrieve node from the tree.
void IncrementNSigEvents(Float_t s)
void SetNodeType(Int_t t)
void ClearTree()
clear the tree nodes (their S/N, Nevents etc), just keep the structure of the tree ...
void SetAlphaMinSubtree(Double_t g)
Types::EAnalysisType fAnalysisType
static DecisionTree * CreateFromXML(void *node, UInt_t tmva_Version_Code=TMVA_VERSION_CODE)
re-create a new tree (decision tree or search tree) from XML
void SetNEvents_unboosted(Float_t nev)
you should not use this method at all Int_t Int_t Double_t Double_t Double_t e
UInt_t GetTotalTreeDepth() const
VariableInfo & GetVariableInfo(Int_t i)
void SetNSigEvents_unboosted(Float_t s)
void SetTerminal(Bool_t s=kTRUE)
RegressionVariance * fRegType
void SetNSigEvents(Float_t s)
UInt_t CountNodes(Node *n=NULL)
return the number of nodes in the tree. (make a new count –> takes time)
void SetNBkgEvents_unboosted(Float_t b)
SeparationBase * fSepType
void SetNBkgEvents_unweighted(Float_t b)
void IncrementNBkgEvents_unweighted()
Double_t PruneTree(const EventConstList *validationSample=NULL)
prune (get rid of internal nodes) the Decision tree to avoid overtraining serveral different pruning ...
Node * GetRightDaughter(Node *n)
get right daughter node current node "n"
Float_t GetResponse(void) const
Double_t GetNSValidation() const
virtual Double_t GetSeparationIndex(const Double_t &s, const Double_t &b)=0
Short_t Max(Short_t a, Short_t b)
Double_t GetOriginalWeight() const
UInt_t BuildTree(const EventConstList &eventSample, DecisionTreeNode *node=NULL)
building the decision tree by recursively calling the splitting of one (root-) node into two daughter...
you should not use this method at all Int_t Int_t Double_t Double_t Double_t Int_t Double_t Double_t Double_t Double_t b
virtual DecisionTreeNode * GetLeft() const
virtual DecisionTreeNode * GetRight() const
Node * GetLeftDaughter(Node *n)
get left daughter node current node "n"
void ClearNodeAndAllDaughters()
clear the nodes (their S/N, Nevents etc), just keep the structure of the tree
void SetSeparationIndex(Float_t sep)
Short_t GetSelector() const
virtual Int_t Poisson(Double_t mean)
Generates a random integer N according to a Poisson law.
Double_t Sqrt(Double_t x)
Double_t GetPruneStrength() const
double norm(double *x, double *p)
void SetNEvents_unweighted(Float_t nev)
void PruneNode(TMVA::DecisionTreeNode *node)
prune away the subtree below the node