57 : fLearningModel ( kFull )
58 , fImportanceCut ( 0 )
59 , fLinQuantile ( 0.025 )
61 , fAverageSupport ( 0.8 )
62 , fAverageRuleSigma( 0.4 )
66 , fRuleMinDist ( 1
e-3 )
67 , fNRulesGenerated ( 0 )
69 , fEventCacheOK ( true )
73 , fRuleMapEvents ( 0 )
83 : fAverageSupport ( 1 )
96 : fLearningModel ( kFull )
97 , fImportanceCut ( 0 )
98 , fLinQuantile ( 0.025 )
100 , fImportanceRef ( 1.0 )
101 , fAverageSupport ( 0.8 )
102 , fAverageRuleSigma( 0.4 )
106 , fRuleMinDist ( 1
e-3 )
107 , fNRulesGenerated ( 0 )
109 , fEventCacheOK ( true )
110 , fRuleMapOK ( true )
113 , fRuleMapEvents ( 0 )
124 for ( std::vector<Rule *>::iterator itrRule = fRules.begin(); itrRule != fRules.end(); ++itrRule ) {
136 SetAverageRuleSigma(0.4);
138 UInt_t nvars = GetMethodBase()->GetNvar();
139 fVarImportance.clear();
143 fVarImportance.resize( nvars,0.0 );
144 fLinPDFB.resize( nvars,0 );
145 fLinPDFS.resize( nvars,0 );
146 fImportanceRef = 1.0;
147 for (
UInt_t i=0; i<nvars; i++) {
148 fLinTermOK.push_back(
kTRUE);
155 fLogger->SetMinType(t);
164 return ( fRuleFit==0 ? 0:fRuleFit->GetMethodRuleFit());
173 return ( fRuleFit==0 ? 0:fRuleFit->GetMethodBase());
181 MakeRules( fRuleFit->GetForest() );
200 Int_t ncoeffs = fRules.size();
201 if (ncoeffs<1)
return 0;
205 for (
Int_t i=0; i<ncoeffs; i++) {
206 val = fRules[i]->GetCoefficient();
218 UInt_t nrules = fRules.size();
219 for (
UInt_t i=0; i<nrules; i++) {
220 fRules[i]->SetCoefficient(0.0);
229 UInt_t nrules = fRules.size();
230 if (
v.size()!=nrules) {
231 Log() << kFATAL <<
"<SetCoefficients> - BUG TRAP - input vector wrong size! It is = " <<
v.size()
232 <<
" when it should be = " << nrules <<
Endl;
234 for (
UInt_t i=0; i<nrules; i++) {
235 fRules[i]->SetCoefficient(
v[i]);
244 UInt_t nrules = fRules.size();
246 if (nrules==0)
return;
248 for (
UInt_t i=0; i<nrules; i++) {
249 v[i] = (fRules[i]->GetCoefficient());
258 return &(fRuleFit->GetTrainingEvents());
266 return fRuleFit->GetTrainingEvent(i);
274 Log() << kVERBOSE <<
"Removing similar rules; distance = " << fRuleMinDist <<
Endl;
276 UInt_t nrulesIn = fRules.size();
278 std::vector< Char_t > removeMe( nrulesIn,
false );
284 for (
UInt_t i=0; i<nrulesIn; i++) {
287 for (
UInt_t k=i+1; k<nrulesIn; k++) {
293 remind = (
r>0.5 ? k:i);
300 if (!removeMe[remind]) {
301 removeMe[remind] =
true;
311 for (
UInt_t i=0; i<nrulesIn; i++) {
313 theRule = fRules[ind];
314 fRules.erase( fRules.begin() + ind );
320 UInt_t nrulesOut = fRules.size();
321 Log() << kVERBOSE <<
"Removed " << nrulesIn - nrulesOut <<
" out of " << nrulesIn <<
" rules" <<
Endl;
329 UInt_t nrules = fRules.size();
330 if (nrules==0)
return;
331 Log() << kVERBOSE <<
"Removing rules with relative importance < " << fImportanceCut <<
Endl;
332 if (fImportanceCut<=0)
return;
338 for (
UInt_t i=0; i<nrules; i++) {
339 if (fRules[ind]->GetRelImportance()<fImportanceCut) {
340 therule = fRules[ind];
341 fRules.erase( fRules.begin() + ind );
347 Log() << kINFO <<
"Removed " << nrules-ind <<
" out of a total of " << nrules
348 <<
" rules with importance < " << fImportanceCut <<
Endl;
356 UInt_t nlin = fLinNorm.size();
358 Log() << kVERBOSE <<
"Removing linear terms with relative importance < " << fImportanceCut <<
Endl;
361 for (
UInt_t i=0; i<nlin; i++) {
362 fLinTermOK.push_back( (fLinImportance[i]/fImportanceRef > fImportanceCut) );
371 Log() << kVERBOSE <<
"Evaluating Rule support" <<
Endl;
378 SetAverageRuleSigma(0.4);
379 const std::vector<const Event *> *events = GetTrainingEvents();
383 if ((nrules>0) && (events->size()>0)) {
384 for ( std::vector< Rule * >::iterator itrRule=fRules.begin(); itrRule!=fRules.end(); ++itrRule ) {
388 for ( std::vector<const Event * >::const_iterator itrEvent=events->begin(); itrEvent!=events->end(); ++itrEvent ) {
389 if ((*itrRule)->EvalEvent( *(*itrEvent) )) {
390 ew = (*itrEvent)->GetWeight();
392 if (GetMethodRuleFit()->DataInfo().IsSignal(*itrEvent)) ssig += ew;
397 s = s/fRuleFit->GetNEveEff();
399 t = (t<0 ? 0:
sqrt(t));
404 (*itrRule)->SetSupport(s);
405 (*itrRule)->SetNorm(t);
406 (*itrRule)->SetSSB( ssb );
407 (*itrRule)->SetSSBNeve(
Double_t(ssig+sbkg));
410 fAverageSupport = stot/nrules;
411 fAverageRuleSigma =
TMath::Sqrt(fAverageSupport*(1.0-fAverageSupport));
412 Log() << kVERBOSE <<
"Standard deviation of support = " << fAverageRuleSigma <<
Endl;
413 Log() << kVERBOSE <<
"Average rule support = " << fAverageSupport <<
Endl;
422 Double_t maxRuleImp = CalcRuleImportance();
423 Double_t maxLinImp = CalcLinImportance();
424 Double_t maxImp = (maxRuleImp>maxLinImp ? maxRuleImp : maxLinImp);
425 SetImportanceRef( maxImp );
433 for (
UInt_t i=0; i<fRules.size(); i++ ) {
434 fRules[i]->SetImportanceRef(impref);
436 fImportanceRef = impref;
445 Int_t nrules = fRules.size();
446 for (
int i=0; i<nrules; i++ ) {
447 fRules[i]->CalcImportance();
448 imp = fRules[i]->GetImportance();
449 if (imp>maxImp) maxImp = imp;
451 for (
Int_t i=0; i<nrules; i++ ) {
452 fRules[i]->SetImportanceRef(maxImp);
464 UInt_t nvars = fLinCoefficients.size();
465 fLinImportance.resize(nvars,0.0);
466 if (!DoLinear())
return maxImp;
476 for (
UInt_t i=0; i<nvars; i++ ) {
477 imp = fAverageRuleSigma*
TMath::Abs(fLinCoefficients[i]);
478 fLinImportance[i] = imp;
479 if (imp>maxImp) maxImp = imp;
489 Log() << kVERBOSE <<
"Compute variable importance" <<
Endl;
491 UInt_t nrules = fRules.size();
492 if (GetMethodBase()==0) Log() << kFATAL <<
"RuleEnsemble::CalcVarImportance() - should not be here!" <<
Endl;
493 UInt_t nvars = GetMethodBase()->GetNvar();
496 fVarImportance.resize(nvars,0);
499 for (
UInt_t ind=0; ind<nrules; ind++ ) {
500 rimp = fRules[ind]->GetImportance();
501 nvarsUsed = fRules[ind]->GetNumVarsUsed();
503 Log() << kFATAL <<
"<CalcVarImportance> Variables for importance calc!!!??? A BUG!" <<
Endl;
504 rimpN = (nvarsUsed > 0 ? rimp/nvarsUsed:0.0);
505 for (
UInt_t iv=0; iv<nvars; iv++ ) {
506 if (fRules[ind]->ContainsVariable(iv)) {
507 fVarImportance[iv] += rimpN;
514 for (
UInt_t iv=0; iv<fLinTermOK.size(); iv++ ) {
515 if (fLinTermOK[iv]) fVarImportance[iv] += fLinImportance[iv];
522 for (
UInt_t iv=0; iv<nvars; iv++ ) {
523 if ( fVarImportance[iv] > maximp ) maximp = fVarImportance[iv];
526 for (
UInt_t iv=0; iv<nvars; iv++ ) {
527 fVarImportance[iv] *= 1.0/maximp;
541 fRules.resize(rules.size());
542 for (
UInt_t i=0; i<fRules.size(); i++) {
543 fRules[i] = rules[i];
555 if (!DoRules())
return;
564 UInt_t ntrees = forest.size();
565 for (
UInt_t ind=0; ind<ntrees; ind++ ) {
567 MakeRulesFromTree( forest[ind] );
568 nrules = CalcNRules( forest[ind] );
569 nendn = (nrules/2) + 1;
571 sumn2 += nendn*nendn;
572 nrulesCheck += nrules;
574 Double_t nmean = (ntrees>0) ? sumnendn/ntrees : 0;
576 Double_t ndev = 2.0*(nmean-2.0-nsigm)/(nmean-2.0+nsigm);
578 Log() << kVERBOSE <<
"Average number of end nodes per tree = " << nmean <<
Endl;
579 if (ntrees>1) Log() << kVERBOSE <<
"sigma of ditto ( ~= mean-2 ?) = "
582 Log() << kVERBOSE <<
"Deviation from exponential model = " << ndev <<
Endl;
583 Log() << kVERBOSE <<
"Corresponds to L (eq. 13, RuleFit ppr) = " << nmean <<
Endl;
585 if (nrulesCheck !=
static_cast<Int_t>(fRules.size())) {
587 <<
"BUG! number of generated and possible rules do not match! N(rules) = " << fRules.size()
588 <<
" != " << nrulesCheck <<
Endl;
590 Log() << kVERBOSE <<
"Number of generated rules: " << fRules.size() <<
Endl;
593 fNRulesGenerated = fRules.size();
595 RemoveSimilarRules();
607 if (!DoLinear())
return;
609 const std::vector<const Event *> *events = GetTrainingEvents();
610 UInt_t neve = events->size();
611 UInt_t nvars = ((*events)[0])->GetNVariables();
613 typedef std::pair< Double_t, Int_t> dataType;
614 typedef std::pair< Double_t, dataType > dataPoint;
616 std::vector< std::vector<dataPoint> > vardata(nvars);
617 std::vector< Double_t > varsum(nvars,0.0);
618 std::vector< Double_t > varsum2(nvars,0.0);
623 for (
UInt_t i=0; i<neve; i++) {
624 ew = ((*events)[i])->GetWeight();
626 val = ((*events)[i])->GetValue(
v);
627 vardata[
v].push_back( dataPoint( val, dataType(ew,((*events)[i])->GetClass()) ) );
633 fLinCoefficients.clear();
635 fLinDP.resize(nvars,0);
636 fLinDM.resize(nvars,0);
637 fLinCoefficients.resize(nvars,0);
638 fLinNorm.resize(nvars,0);
640 Double_t averageWeight = neve ? fRuleFit->GetNEveEff()/
static_cast<Double_t>(neve) : 0;
657 std::sort( vardata[
v].begin(),vardata[
v].end() );
658 nquant = fLinQuantile*fRuleFit->GetNEveEff();
662 while ( (ie<neve) && (neff<nquant) ) {
663 neff += vardata[
v][ie].second.first;
666 indquantM = (ie==0 ? 0:ie-1);
670 while ( (ie>0) && (neff<nquant) ) {
672 neff += vardata[
v][ie].second.first;
674 indquantP = (ie==neve ? ie=neve-1:ie);
676 fLinDM[
v] = vardata[
v][indquantM].first;
677 fLinDP[
v] = vardata[
v][indquantP].first;
681 if (fLinPDFB[
v])
delete fLinPDFB[
v];
682 if (fLinPDFS[
v])
delete fLinPDFS[
v];
683 fLinPDFB[
v] =
new TH1F(
Form(
"bkgvar%d",
v),
"bkg temphist",40,fLinDM[
v],fLinDP[
v]);
684 fLinPDFS[
v] =
new TH1F(
Form(
"sigvar%d",
v),
"sig temphist",40,fLinDM[
v],fLinDP[
v]);
685 fLinPDFB[
v]->Sumw2();
686 fLinPDFS[
v]->Sumw2();
690 const Double_t w = 1.0/fRuleFit->GetNEveEff();
691 for (ie=0; ie<neve; ie++) {
692 val = vardata[
v][ie].first;
693 ew = vardata[
v][ie].second.first;
694 type = vardata[
v][ie].second.second;
697 varsum2[
v] += ew*lx*lx;
700 if (
type==1) fLinPDFS[
v]->Fill(lx,w*ew);
701 else fLinPDFB[
v]->Fill(lx,w*ew);
707 stdl =
TMath::Sqrt( (varsum2[
v] - (varsum[
v]*varsum[
v]/fRuleFit->GetNEveEff()))/(fRuleFit->GetNEveEff()-averageWeight) );
708 fLinNorm[
v] = CalcLinNorm(stdl);
714 fLinPDFS[
v]->Write();
715 fLinPDFB[
v]->Write();
725 UInt_t nvars=fLinDP.size();
733 Int_t bin = fLinPDFS[
v]->FindBin(val);
734 fstot += fLinPDFS[
v]->GetBinContent(bin);
735 fbtot += fLinPDFB[
v]->GetBinContent(bin);
737 if (nvars<1)
return 0;
738 ntot = (fstot+fbtot)/
Double_t(nvars);
740 return fstot/(fstot+fbtot);
755 UInt_t nrules = fRules.size();
756 for (
UInt_t ir=0; ir<nrules; ir++) {
757 if (fEventRuleVal[ir]>0) {
758 ssb = fEventRuleVal[ir]*GetRulesConst(ir)->GetSSB();
759 neve = GetRulesConst(ir)->GetSSBNeve();
769 if (ntot>0)
return nsig/ntot;
798 if (DoLinear()) pl = PdfLinear(nls, nlt);
799 if (DoRules()) pr = PdfRule(nrs, nrt);
801 if ((nlt>0) && (nrt>0)) nt=2.0;
813 const std::vector<const Event *> *events = GetTrainingEvents();
814 const UInt_t neve = events->size();
815 const UInt_t nvars = GetMethodBase()->GetNvar();
816 const UInt_t nrules = fRules.size();
817 const Event *eveData;
833 std::vector<Int_t> varcnt;
841 varcnt.resize(nvars,0);
842 fRuleVarFrac.clear();
843 fRuleVarFrac.resize(nvars,0);
845 for (
UInt_t i=0; i<nrules; i++ ) {
847 if (fRules[i]->ContainsVariable(
v)) varcnt[
v]++;
849 sigRule = fRules[i]->IsSignalRule();
864 eveData = (*events)[
e];
865 tagged = fRules[i]->EvalEvent(*eveData);
866 sigTag = (tagged && sigRule);
867 bkgTag = (tagged && (!sigRule));
869 sigTrue = (eveData->
GetClass() == 0);
872 if (sigTag && sigTrue) nss++;
873 if (sigTag && !sigTrue) nsb++;
874 if (bkgTag && sigTrue) nbs++;
875 if (bkgTag && !sigTrue) nbb++;
879 if (ntag>0 && neve > 0) {
888 fRuleFSig = (nsig>0) ?
static_cast<Double_t>(nsig)/
static_cast<Double_t>(nsig+nbkg) : 0;
899 const UInt_t nrules = fRules.size();
903 for (
UInt_t i=0; i<nrules; i++ ) {
904 nc =
static_cast<Double_t>(fRules[i]->GetNcuts());
911 fRuleNCave = sumNc/nrules;
921 Log() << kHEADER <<
"-------------------RULE ENSEMBLE SUMMARY------------------------" <<
Endl;
923 if (mrf) Log() << kINFO <<
"Tree training method : " << (mrf->
UseBoost() ?
"AdaBoost":
"Random") <<
Endl;
924 Log() << kINFO <<
"Number of events per tree : " << fRuleFit->GetNTreeSample() <<
Endl;
925 Log() << kINFO <<
"Number of trees : " << fRuleFit->GetForest().size() <<
Endl;
926 Log() << kINFO <<
"Number of generated rules : " << fNRulesGenerated <<
Endl;
927 Log() << kINFO <<
"Idem, after cleanup : " << fRules.size() <<
Endl;
928 Log() << kINFO <<
"Average number of cuts per rule : " <<
Form(
"%8.2f",fRuleNCave) <<
Endl;
929 Log() << kINFO <<
"Spread in number of cuts per rules : " <<
Form(
"%8.2f",fRuleNCsig) <<
Endl;
930 Log() << kVERBOSE <<
"Complexity : " <<
Form(
"%8.2f",fRuleNCave*fRuleNCsig) <<
Endl;
931 Log() << kINFO <<
"----------------------------------------------------------------" <<
Endl;
932 Log() << kINFO <<
Endl;
940 const EMsgType kmtype=kINFO;
941 const Bool_t isDebug = (fLogger->GetMinType()<=kDEBUG);
943 Log() << kmtype <<
Endl;
944 Log() << kmtype <<
"================================================================" <<
Endl;
945 Log() << kmtype <<
" M o d e l " <<
Endl;
946 Log() << kmtype <<
"================================================================" <<
Endl;
949 const UInt_t nvars = GetMethodBase()->GetNvar();
950 const Int_t nrules = fRules.size();
953 for (
UInt_t iv = 0; iv<fVarImportance.size(); iv++) {
954 if (GetMethodBase()->GetInputLabel(iv).Length() > maxL) maxL = GetMethodBase()->GetInputLabel(iv).Length();
958 Log() << kDEBUG <<
"Variable importance:" <<
Endl;
959 for (
UInt_t iv = 0; iv<fVarImportance.size(); iv++) {
960 Log() << kDEBUG << std::setw(maxL) << GetMethodBase()->GetInputLabel(iv)
961 << std::resetiosflags(std::ios::right)
962 <<
" : " <<
Form(
" %3.3f",fVarImportance[iv]) <<
Endl;
966 Log() << kHEADER <<
"Offset (a0) = " << fOffset <<
Endl;
969 if (fLinNorm.size() > 0) {
970 Log() << kmtype <<
"------------------------------------" <<
Endl;
971 Log() << kmtype <<
"Linear model (weights unnormalised)" <<
Endl;
972 Log() << kmtype <<
"------------------------------------" <<
Endl;
973 Log() << kmtype << std::setw(maxL) <<
"Variable"
974 << std::resetiosflags(std::ios::right) <<
" : "
975 << std::setw(11) <<
" Weights"
976 << std::resetiosflags(std::ios::right) <<
" : "
978 << std::resetiosflags(std::ios::right)
980 Log() << kmtype <<
"------------------------------------" <<
Endl;
981 for (
UInt_t i=0; i<fLinNorm.size(); i++ ) {
982 Log() << kmtype << std::setw(std::max(maxL,8)) << GetMethodBase()->GetInputLabel(i);
985 << std::resetiosflags(std::ios::right)
986 <<
" : " <<
Form(
" %10.3e",fLinCoefficients[i]*fLinNorm[i])
987 <<
" : " <<
Form(
" %3.3f",fLinImportance[i]/fImportanceRef) <<
Endl;
990 Log() << kmtype <<
"-> importance below threshold = "
991 <<
Form(
" %3.3f",fLinImportance[i]/fImportanceRef) <<
Endl;
994 Log() << kmtype <<
"------------------------------------" <<
Endl;
997 else Log() << kmtype <<
"Linear terms were disabled" <<
Endl;
999 if ((!DoRules()) || (nrules==0)) {
1001 Log() << kmtype <<
"Rule terms were disabled" <<
Endl;
1004 Log() << kmtype <<
"Even though rules were included in the model, none passed! " << nrules <<
Endl;
1008 Log() << kmtype <<
"Number of rules = " << nrules <<
Endl;
1010 Log() << kmtype <<
"N(cuts) in rules, average = " << fRuleNCave <<
Endl;
1011 Log() << kmtype <<
" RMS = " << fRuleNCsig <<
Endl;
1012 Log() << kmtype <<
"Fraction of signal rules = " << fRuleFSig <<
Endl;
1013 Log() << kmtype <<
"Fraction of rules containing a variable (%):" <<
Endl;
1015 Log() << kmtype <<
" " << std::setw(maxL) << GetMethodBase()->GetInputLabel(
v);
1016 Log() << kmtype <<
Form(
" = %2.2f",fRuleVarFrac[
v]*100.0) <<
" %" <<
Endl;
1022 std::list< std::pair<double,int> > sortedImp;
1023 for (
Int_t i=0; i<nrules; i++) {
1024 sortedImp.push_back( std::pair<double,int>( fRules[i]->GetImportance(),i ) );
1028 Log() << kmtype <<
"Printing the first " << printN <<
" rules, ordered in importance." <<
Endl;
1030 for ( std::list< std::pair<double,int> >::reverse_iterator itpair = sortedImp.rbegin();
1031 itpair != sortedImp.rend(); ++itpair ) {
1032 ind = itpair->second;
1036 fRules[ind]->PrintLogger(
Form(
"Rule %4d : ",pind+1));
1039 if (nrules==printN) {
1040 Log() << kmtype <<
"All rules printed" <<
Endl;
1043 Log() << kmtype <<
"Skipping the next " << nrules-printN <<
" rules" <<
Endl;
1049 Log() << kmtype <<
"================================================================" <<
Endl;
1050 Log() << kmtype <<
Endl;
1058 Int_t dp = os.precision();
1059 UInt_t nrules = fRules.size();
1062 os <<
"ImportanceCut= " << fImportanceCut << std::endl;
1063 os <<
"LinQuantile= " << fLinQuantile << std::endl;
1064 os <<
"AverageSupport= " << fAverageSupport << std::endl;
1065 os <<
"AverageRuleSigma= " << fAverageRuleSigma << std::endl;
1066 os <<
"Offset= " << fOffset << std::endl;
1067 os <<
"NRules= " << nrules << std::endl;
1068 for (
UInt_t i=0; i<nrules; i++){
1069 os <<
"***Rule " << i << std::endl;
1070 (fRules[i])->PrintRaw(os);
1072 UInt_t nlinear = fLinNorm.size();
1074 os <<
"NLinear= " << fLinTermOK.size() << std::endl;
1075 for (
UInt_t i=0; i<nlinear; i++) {
1076 os <<
"***Linear " << i << std::endl;
1077 os << std::setprecision(10) << (fLinTermOK[i] ? 1:0) <<
" "
1078 << fLinCoefficients[i] <<
" "
1079 << fLinNorm[i] <<
" "
1082 << fLinImportance[i] <<
" " << std::endl;
1084 os << std::setprecision(dp);
1094 UInt_t nrules = fRules.size();
1095 UInt_t nlinear = fLinNorm.size();
1098 gTools().
AddAttr( re,
"LearningModel", (
int)fLearningModel );
1102 gTools().
AddAttr( re,
"AverageRuleSigma", fAverageRuleSigma );
1104 for (
UInt_t i=0; i<nrules; i++) fRules[i]->AddXMLTo(re);
1106 for (
UInt_t i=0; i<nlinear; i++) {
1126 Int_t iLearningModel;
1131 gTools().
ReadAttr( wghtnode,
"AverageSupport", fAverageSupport );
1132 gTools().
ReadAttr( wghtnode,
"AverageRuleSigma", fAverageRuleSigma );
1139 fRules.resize( nrules );
1141 for (i=0; i<nrules; i++) {
1142 fRules[i] =
new Rule();
1143 fRules[i]->SetRuleEnsemble(
this );
1144 fRules[i]->ReadFromXML( ch );
1150 fLinNorm .resize( nlinear );
1151 fLinTermOK .resize( nlinear );
1152 fLinCoefficients.resize( nlinear );
1153 fLinDP .resize( nlinear );
1154 fLinDM .resize( nlinear );
1155 fLinImportance .resize( nlinear );
1161 fLinTermOK[i] = (iok == 1);
1185 istr >> dummy >> fImportanceCut;
1186 istr >> dummy >> fLinQuantile;
1187 istr >> dummy >> fAverageSupport;
1188 istr >> dummy >> fAverageRuleSigma;
1189 istr >> dummy >> fOffset;
1190 istr >> dummy >> nrules;
1196 for (
UInt_t i=0; i<nrules; i++){
1197 istr >> dummy >> idum;
1198 fRules.push_back(
new Rule() );
1199 (fRules.back())->SetRuleEnsemble(
this );
1200 (fRules.back())->ReadRaw(istr);
1208 istr >> dummy >> nlinear;
1210 fLinNorm .resize( nlinear );
1211 fLinTermOK .resize( nlinear );
1212 fLinCoefficients.resize( nlinear );
1213 fLinDP .resize( nlinear );
1214 fLinDM .resize( nlinear );
1215 fLinImportance .resize( nlinear );
1219 for (
UInt_t i=0; i<nlinear; i++) {
1220 istr >> dummy >> idum;
1222 fLinTermOK[i] = (iok==1);
1223 istr >> fLinCoefficients[i];
1224 istr >> fLinNorm[i];
1227 istr >> fLinImportance[i];
1236 if(
this != &other) {
1263 if (dtree==0)
return 0;
1265 Int_t nendnodes = 0;
1266 FindNEndNodes( node, nendnodes );
1267 return 2*(nendnodes-1);
1275 if (node==0)
return;
1282 FindNEndNodes( nodeR, nendnodes );
1283 FindNEndNodes( nodeL, nendnodes );
1300 if (node==0)
return;
1306 Rule *rule = MakeTheRule(node);
1308 fRules.push_back( rule );
1313 Log() << kFATAL <<
"<AddRule> - ERROR failed in creating a rule! BUG!" <<
Endl;
1327 Log() << kFATAL <<
"<MakeTheRule> Input node is NULL. Should not happen. BUG!" <<
Endl;
1335 std::vector< const Node * > nodeVec;
1336 const Node *parent = node;
1341 nodeVec.push_back( node );
1344 if (!parent)
continue;
1347 nodeVec.insert( nodeVec.begin(), parent );
1350 if (nodeVec.size()<2) {
1351 Log() << kFATAL <<
"<MakeTheRule> BUG! Inconsistent Rule!" <<
Endl;
1354 Rule *rule =
new Rule(
this, nodeVec );
1364 Log() << kVERBOSE <<
"Making Rule map for all events" <<
Endl;
1366 if (events==0) events = GetTrainingEvents();
1367 if ((ifirst==0) || (ilast==0) || (ifirst>ilast)) {
1369 ilast = events->size()-1;
1372 if ((events!=fRuleMapEvents) ||
1373 (ifirst!=fRuleMapInd0) ||
1374 (ilast !=fRuleMapInd1)) {
1379 Log() << kVERBOSE <<
"<MakeRuleMap> Map is already valid" <<
Endl;
1382 fRuleMapEvents = events;
1383 fRuleMapInd0 = ifirst;
1384 fRuleMapInd1 = ilast;
1386 UInt_t nrules = GetNRules();
1388 Log() << kVERBOSE <<
"No rules found in MakeRuleMap()" <<
Endl;
1395 std::vector<UInt_t> ruleind;
1397 for (
UInt_t i=ifirst; i<=ilast; i++) {
1399 fRuleMap.push_back( ruleind );
1401 if (fRules[
r]->EvalEvent(*((*events)[i]))) {
1402 fRuleMap.back().push_back(
r);
1407 Log() << kVERBOSE <<
"Made rule map for event# " << ifirst <<
" : " << ilast <<
Endl;
1415 os <<
"DON'T USE THIS - TO BE REMOVED" << std::endl;
R__EXTERN TRandom * gRandom
char * Form(const char *fmt,...)
1-D histogram with a float per channel (see TH1 documentation)}
Short_t GetSelector() const
Implementation of a Decision Tree.
virtual DecisionTreeNode * GetRoot() const
Virtual base Class for all MVA method.
Bool_t IsSilentFile() const
J Friedman's RuleFit method.
ostringstream derivative to redirect and format output
Node for the BinarySearch or Decision Trees.
virtual Node * GetLeft() const
virtual Node * GetParent() const
virtual Node * GetRight() const
virtual ~RuleEnsemble()
destructor
void CalcVarImportance()
Calculates variable importance using eq (35) in RuleFit paper by Friedman et.al.
void SetImportanceRef(Double_t impref)
set reference importance
void CalcImportance()
calculate the importance of each rule
void PrintRuleGen() const
print rule generation info
void MakeRuleMap(const std::vector< const TMVA::Event * > *events=0, UInt_t ifirst=0, UInt_t ilast=0)
Makes rule map for all events.
Int_t CalcNRules(const TMVA::DecisionTree *dtree)
calculate the number of rules
void ResetCoefficients()
reset all rule coefficients
void SetMsgType(EMsgType t)
Double_t GetLinQuantile() const
void ReadRaw(std::istream &istr)
read rule ensemble from stream
void AddRule(const Node *node)
add a new rule to the tree
void ReadFromXML(void *wghtnode)
read rules from XML
Double_t GetImportanceCut() const
const Event * GetTrainingEvent(UInt_t i) const
get the training event from the rule fitter
const std::vector< const TMVA::Event * > * GetTrainingEvents() const
get list of training events from the rule fitter
Double_t GetRuleMinDist() const
void SetRules(const std::vector< TMVA::Rule * > &rules)
set rules
void MakeRules(const std::vector< const TMVA::DecisionTree * > &forest)
Makes rules from the given decision tree.
void RemoveSimilarRules()
remove rules that behave similar
void FindNEndNodes(const TMVA::Node *node, Int_t &nendnodes)
find the number of leaf nodes
RuleEnsemble()
constructor
const std::vector< Double_t > & GetVarImportance() const
void CleanupRules()
cleanup rules
void Initialize(const RuleFit *rf)
Initializes all member variables with default values.
void CleanupLinear()
cleanup linear model
void RuleResponseStats()
calculate various statistics for this rule
const RuleFit * GetRuleFit() const
void * AddXMLTo(void *parent) const
write rules to XML
const std::vector< TMVA::Rule * > & GetRulesConst() const
const MethodRuleFit * GetMethodRuleFit() const
Get a pointer to the original MethodRuleFit.
void MakeModel()
create model
void RuleStatistics()
calculate various statistics for this rule
void SetCoefficients(const std::vector< Double_t > &v)
set all rule coefficients
void Print() const
print function
Double_t PdfRule(Double_t &nsig, Double_t &ntot) const
This function returns Pr( y = 1 | x ) for rules.
const MethodBase * GetMethodBase() const
Get a pointer to the original MethodRuleFit.
Double_t GetOffset() const
void Copy(RuleEnsemble const &other)
copy function
Double_t CalcLinImportance()
calculate the linear importance for each rule
Double_t CalcRuleImportance()
calculate importance of each rule
void PrintRaw(std::ostream &os) const
write rules to stream
Double_t fAverageRuleSigma
void CalcRuleSupport()
calculate the support for all rules
ELearningModel GetLearningModel() const
Double_t PdfLinear(Double_t &nsig, Double_t &ntot) const
This function returns Pr( y = 1 | x ) for the linear terms.
Double_t CoefficientRadius()
Calculates sqrt(Sum(a_i^2)), i=1..N (NOTE do not include a0)
void MakeRulesFromTree(const DecisionTree *dtree)
create rules from the decision tree structure
void MakeLinearTerms()
Make the linear terms as in eq 25, ref 2 For this the b and (1-b) quantiles are needed.
Rule * MakeTheRule(const Node *node)
Make a Rule from a given Node.
void GetCoefficients(std::vector< Double_t > &v)
Retrieve all rule coefficients.
Double_t FStar() const
We want to estimate F* = argmin Eyx( L(y,F(x) ), min wrt F(x) F(x) = FL(x) + FR(x) ,...
A class implementing various fits of rule ensembles.
Implementation of a rule.
void SetMsgType(EMsgType t)
virtual Double_t Rndm()
Machine independent random number generator.
std::ostream & operator<<(std::ostream &os, const BinaryTree &tree)
MsgLogger & Endl(MsgLogger &ml)
Short_t Max(Short_t a, Short_t b)
Double_t Sqrt(Double_t x)
Short_t Min(Short_t a, Short_t b)