57   : fLearningModel   ( kFull )
 
   58   , fImportanceCut   ( 0 )
 
   59   , fLinQuantile     ( 0.025 ) 
 
   61   , fAverageSupport  ( 0.8 )
 
   62   , fAverageRuleSigma( 0.4 )  
 
   66   , fRuleMinDist     ( 1
e-3 ) 
 
   67   , fNRulesGenerated ( 0 )
 
   69   , fEventCacheOK    ( true )
 
   73   , fRuleMapEvents   ( 0 )
 
   83   : fAverageSupport   ( 1 )
 
   96   : fLearningModel     ( kFull )
 
   97   , fImportanceCut   ( 0 )
 
   98   , fLinQuantile     ( 0.025 ) 
 
  100   , fImportanceRef   ( 1.0 )
 
  101   , fAverageSupport  ( 0.8 )
 
  102   , fAverageRuleSigma( 0.4 )  
 
  106   , fRuleMinDist     ( 1
e-3 ) 
 
  107   , fNRulesGenerated ( 0 )
 
  109   , fEventCacheOK    ( true )
 
  110   , fRuleMapOK       ( true )
 
  113   , fRuleMapEvents   ( 0 )
 
  124   for ( std::vector<Rule *>::iterator itrRule = fRules.begin(); itrRule != fRules.end(); ++itrRule ) {
 
  136   SetAverageRuleSigma(0.4); 
 
  138   UInt_t nvars =  GetMethodBase()->GetNvar();
 
  139   fVarImportance.clear();
 
  143   fVarImportance.resize( nvars,0.0 );
 
  144   fLinPDFB.resize( nvars,0 );
 
  145   fLinPDFS.resize( nvars,0 );
 
  146   fImportanceRef = 1.0;
 
  147   for (
UInt_t i=0; i<nvars; i++) { 
 
  148      fLinTermOK.push_back(
kTRUE);
 
  155   fLogger->SetMinType(t);
 
  164   return ( fRuleFit==0 ? 0:fRuleFit->GetMethodRuleFit());
 
  173   return ( fRuleFit==0 ? 0:fRuleFit->GetMethodBase());
 
  181   MakeRules( fRuleFit->GetForest() );
 
  200   Int_t ncoeffs = fRules.size();
 
  201   if (ncoeffs<1) 
return 0;
 
  205   for (
Int_t i=0; i<ncoeffs; i++) {
 
  206      val = fRules[i]->GetCoefficient();
 
  218   UInt_t nrules = fRules.size();
 
  219   for (
UInt_t i=0; i<nrules; i++) {
 
  220      fRules[i]->SetCoefficient(0.0);
 
  229   UInt_t nrules = fRules.size();
 
  230   if (
v.size()!=nrules) {
 
  231      Log() << kFATAL << 
"<SetCoefficients> - BUG TRAP - input vector wrong size! It is = " << 
v.size()
 
  232            << 
" when it should be = " << nrules << 
Endl;
 
  234   for (
UInt_t i=0; i<nrules; i++) {
 
  235      fRules[i]->SetCoefficient(
v[i]);
 
  244   UInt_t nrules = fRules.size();
 
  246   if (nrules==0) 
return;
 
  248   for (
UInt_t i=0; i<nrules; i++) {
 
  249      v[i] = (fRules[i]->GetCoefficient());
 
  258   return &(fRuleFit->GetTrainingEvents());
 
  266   return fRuleFit->GetTrainingEvent(i);
 
  274   Log() << kVERBOSE << 
"Removing similar rules; distance = " << fRuleMinDist << 
Endl;
 
  276   UInt_t nrulesIn = fRules.size();
 
  278   std::vector< Char_t > removeMe( nrulesIn,
false );  
 
  283   for (
UInt_t i=0; i<nrulesIn; i++) {
 
  286         for (
UInt_t k=i+1; k<nrulesIn; k++) {
 
  292                  remind = (
r>0.5 ? k:i); 
 
  299                  if (!removeMe[remind]) {
 
  300                     removeMe[remind] = 
true;
 
  309   for (
UInt_t i=0; i<nrulesIn; i++) {
 
  311         theRule = fRules[ind];
 
  312         fRules.erase( fRules.begin() + ind );
 
  318   UInt_t nrulesOut = fRules.size();
 
  319   Log() << kVERBOSE << 
"Removed " << nrulesIn - nrulesOut << 
" out of " << nrulesIn << 
" rules" << 
Endl;
 
  327   UInt_t nrules   = fRules.size();
 
  328   if (nrules==0) 
return;
 
  329   Log() << kVERBOSE << 
"Removing rules with relative importance < " << fImportanceCut << 
Endl;
 
  330   if (fImportanceCut<=0) 
return;
 
  336   for (
UInt_t i=0; i<nrules; i++) {
 
  337      if (fRules[ind]->GetRelImportance()<fImportanceCut) {
 
  338         therule = fRules[ind];
 
  339         fRules.erase( fRules.begin() + ind );
 
  345   Log() << kINFO << 
"Removed " << nrules-ind << 
" out of a total of " << nrules
 
  346         << 
" rules with importance < " << fImportanceCut << 
Endl;
 
  354   UInt_t nlin = fLinNorm.size();
 
  356   Log() << kVERBOSE << 
"Removing linear terms with relative importance < " << fImportanceCut << 
Endl;
 
  359   for (
UInt_t i=0; i<nlin; i++) {
 
  360      fLinTermOK.push_back( (fLinImportance[i]/fImportanceRef > fImportanceCut) );
 
  369   Log() << kVERBOSE << 
"Evaluating Rule support" << 
Endl;
 
  374   SetAverageRuleSigma(0.4);
 
  375   const std::vector<const Event *> *events = GetTrainingEvents();
 
  379   if ((nrules>0) && (events->size()>0)) {
 
  380      for ( std::vector< Rule * >::iterator itrRule=fRules.begin(); itrRule!=fRules.end(); ++itrRule ) {
 
  384         for ( std::vector<const Event * >::const_iterator itrEvent=events->begin(); itrEvent!=events->end(); ++itrEvent ) {
 
  385            if ((*itrRule)->EvalEvent( *(*itrEvent) )) {
 
  386               ew = (*itrEvent)->GetWeight();
 
  388               if (GetMethodRuleFit()->DataInfo().IsSignal(*itrEvent)) ssig += ew;
 
  393         s = s/fRuleFit->GetNEveEff();
 
  395         t = (t<0 ? 0:sqrt(t));
 
  399         (*itrRule)->SetSupport(s);
 
  400         (*itrRule)->SetNorm(t);
 
  401         (*itrRule)->SetSSB( ssb );
 
  402         (*itrRule)->SetSSBNeve(
Double_t(ssig+sbkg));
 
  404      fAverageSupport   = stot/nrules;
 
  405      fAverageRuleSigma = 
TMath::Sqrt(fAverageSupport*(1.0-fAverageSupport));
 
  406      Log() << kVERBOSE << 
"Standard deviation of support = " << fAverageRuleSigma << 
Endl;
 
  407      Log() << kVERBOSE << 
"Average rule support          = " << fAverageSupport   << 
Endl;
 
  416   Double_t maxRuleImp = CalcRuleImportance();
 
  417   Double_t maxLinImp  = CalcLinImportance();
 
  418   Double_t maxImp = (maxRuleImp>maxLinImp ? maxRuleImp : maxLinImp);
 
  419   SetImportanceRef( maxImp );
 
  427   for ( 
UInt_t i=0; i<fRules.size(); i++ ) {
 
  428      fRules[i]->SetImportanceRef(impref);
 
  430   fImportanceRef = impref;
 
  439   Int_t nrules = fRules.size();
 
  440   for ( 
int i=0; i<nrules; i++ ) {
 
  441      fRules[i]->CalcImportance();
 
  442      imp = fRules[i]->GetImportance();
 
  443      if (imp>maxImp) maxImp = imp;
 
  445   for ( 
Int_t i=0; i<nrules; i++ ) {
 
  446      fRules[i]->SetImportanceRef(maxImp);
 
  458   UInt_t nvars = fLinCoefficients.size();
 
  459   fLinImportance.resize(nvars,0.0);
 
  460   if (!DoLinear()) 
return maxImp;
 
  470   for ( 
UInt_t i=0; i<nvars; i++ ) {
 
  471      imp = fAverageRuleSigma*
TMath::Abs(fLinCoefficients[i]);
 
  472      fLinImportance[i] = imp;
 
  473      if (imp>maxImp) maxImp = imp;
 
  483   Log() << kVERBOSE << 
"Compute variable importance" << 
Endl;
 
  485   UInt_t nrules = fRules.size();
 
  486   if (GetMethodBase()==0) Log() << kFATAL << 
"RuleEnsemble::CalcVarImportance() - should not be here!" << 
Endl;
 
  487   UInt_t nvars  = GetMethodBase()->GetNvar();
 
  490   fVarImportance.resize(nvars,0);
 
  493      for ( 
UInt_t ind=0; ind<nrules; ind++ ) {
 
  494         rimp = fRules[ind]->GetImportance();
 
  495         nvarsUsed = fRules[ind]->GetNumVarsUsed();
 
  497            Log() << kFATAL << 
"<CalcVarImportance> Variables for importance calc!!!??? A BUG!" << 
Endl;
 
  498         rimpN = (nvarsUsed > 0 ? rimp/nvarsUsed:0.0);
 
  499         for ( 
UInt_t iv=0; iv<nvars; iv++ ) {
 
  500            if (fRules[ind]->ContainsVariable(iv)) {
 
  501               fVarImportance[iv] += rimpN;
 
  508      for ( 
UInt_t iv=0; iv<fLinTermOK.size(); iv++ ) {
 
  509         if (fLinTermOK[iv]) fVarImportance[iv] += fLinImportance[iv];
 
  516   for ( 
UInt_t iv=0; iv<nvars; iv++ ) {
 
  517      if ( fVarImportance[iv] > maximp ) maximp = fVarImportance[iv];
 
  520      for ( 
UInt_t iv=0; iv<nvars; iv++ ) {
 
  521         fVarImportance[iv] *= 1.0/maximp;
 
  535   fRules.resize(rules.size());
 
  536   for (
UInt_t i=0; i<fRules.size(); i++) {
 
  537      fRules[i] = rules[i];
 
  549   if (!DoRules()) 
return;
 
  558   UInt_t ntrees = forest.size();
 
  559   for ( 
UInt_t ind=0; ind<ntrees; ind++ ) {
 
  561      MakeRulesFromTree( forest[ind] );
 
  562      nrules = CalcNRules( forest[ind] );
 
  563      nendn = (nrules/2) + 1;
 
  565      sumn2    += nendn*nendn;
 
  566      nrulesCheck += nrules;
 
  568   Double_t nmean = (ntrees>0) ? sumnendn/ntrees : 0;
 
  570   Double_t ndev = 2.0*(nmean-2.0-nsigm)/(nmean-2.0+nsigm);
 
  572   Log() << kVERBOSE << 
"Average number of end nodes per tree   = " << nmean << 
Endl;
 
  573   if (ntrees>1) Log() << kVERBOSE << 
"sigma of ditto ( ~= mean-2 ?)          = " 
  576   Log() << kVERBOSE << 
"Deviation from exponential model       = " << ndev      << 
Endl;
 
  577   Log() << kVERBOSE << 
"Corresponds to L (eq. 13, RuleFit ppr) = " << nmean << 
Endl;
 
  579   if (nrulesCheck != 
static_cast<Int_t>(fRules.size())) {
 
  581            << 
"BUG! number of generated and possible rules do not match! N(rules) =  " << fRules.size()
 
  582            << 
" != " << nrulesCheck << 
Endl;
 
  584   Log() << kVERBOSE << 
"Number of generated rules: " << fRules.size() << 
Endl;
 
  587   fNRulesGenerated = fRules.size();
 
  589   RemoveSimilarRules();
 
  601   if (!DoLinear()) 
return;
 
  603   const std::vector<const Event *> *events = GetTrainingEvents();
 
  604   UInt_t neve  = events->size();
 
  605   UInt_t nvars = ((*events)[0])->GetNVariables(); 
 
  607   typedef std::pair< Double_t, Int_t> dataType;
 
  608   typedef std::pair< Double_t, dataType > dataPoint;
 
  610   std::vector< std::vector<dataPoint> > vardata(nvars);
 
  611   std::vector< Double_t > varsum(nvars,0.0);
 
  612   std::vector< Double_t > varsum2(nvars,0.0);
 
  617   for (
UInt_t i=0; i<neve; i++) {
 
  618      ew   = ((*events)[i])->GetWeight();
 
  620         val = ((*events)[i])->GetValue(
v);
 
  621         vardata[
v].push_back( dataPoint( val, dataType(ew,((*events)[i])->GetClass()) ) );
 
  627   fLinCoefficients.clear();
 
  629   fLinDP.resize(nvars,0);
 
  630   fLinDM.resize(nvars,0);
 
  631   fLinCoefficients.resize(nvars,0);
 
  632   fLinNorm.resize(nvars,0);
 
  634   Double_t averageWeight = neve ? fRuleFit->GetNEveEff()/
static_cast<Double_t>(neve) : 0;
 
  651      std::sort( vardata[
v].begin(),vardata[
v].end() );
 
  652      nquant = fLinQuantile*fRuleFit->GetNEveEff(); 
 
  656      while ( (ie<neve) && (neff<nquant) ) {
 
  657         neff += vardata[
v][ie].second.first;
 
  660      indquantM = (ie==0 ? 0:ie-1);
 
  664      while ( (ie>0) && (neff<nquant) ) {
 
  666         neff += vardata[
v][ie].second.first;
 
  668      indquantP = (ie==neve ? ie=neve-1:ie);
 
  670      fLinDM[
v] = vardata[
v][indquantM].first; 
 
  671      fLinDP[
v] = vardata[
v][indquantP].first; 
 
  675            if (fLinPDFB[
v]) 
delete fLinPDFB[
v];
 
  676            if (fLinPDFS[
v]) 
delete fLinPDFS[
v];
 
  677            fLinPDFB[
v] = 
new TH1F(
Form(
"bkgvar%d",
v),
"bkg temphist",40,fLinDM[
v],fLinDP[
v]);
 
  678            fLinPDFS[
v] = 
new TH1F(
Form(
"sigvar%d",
v),
"sig temphist",40,fLinDM[
v],fLinDP[
v]);
 
  679            fLinPDFB[
v]->Sumw2();
 
  680            fLinPDFS[
v]->Sumw2();
 
  684      const Double_t w = 1.0/fRuleFit->GetNEveEff();
 
  685      for (ie=0; ie<neve; ie++) {
 
  686         val  = vardata[
v][ie].first;
 
  687         ew   = vardata[
v][ie].second.first;
 
  688         type = vardata[
v][ie].second.second;
 
  691         varsum2[
v] += ew*lx*lx;
 
  694             if (
type==1) fLinPDFS[
v]->Fill(lx,
w*ew);
 
  695             else         fLinPDFB[
v]->Fill(lx,
w*ew);
 
  701      stdl = 
TMath::Sqrt( (varsum2[
v] - (varsum[
v]*varsum[
v]/fRuleFit->GetNEveEff()))/(fRuleFit->GetNEveEff()-averageWeight) );
 
  702      fLinNorm[
v] = CalcLinNorm(stdl);
 
  708            fLinPDFS[
v]->Write();
 
  709            fLinPDFB[
v]->Write();
 
  719   UInt_t nvars=fLinDP.size();
 
  727      Int_t bin = fLinPDFS[
v]->FindBin(val);
 
  728      fstot += fLinPDFS[
v]->GetBinContent(bin);
 
  729      fbtot += fLinPDFB[
v]->GetBinContent(bin);
 
  731   if (nvars<1) 
return 0;
 
  732   ntot = (fstot+fbtot)/
Double_t(nvars);
 
  734   return fstot/(fstot+fbtot);
 
  748   UInt_t nrules = fRules.size();
 
  749   for (
UInt_t ir=0; ir<nrules; ir++) {
 
  750      if (fEventRuleVal[ir]>0) {
 
  751         ssb = fEventRuleVal[ir]*GetRulesConst(ir)->GetSSB(); 
 
  752         neve = GetRulesConst(ir)->GetSSBNeve(); 
 
  761   if (ntot>0) 
return nsig/ntot;
 
  790   if (DoLinear()) pl = PdfLinear(nls, nlt);
 
  791   if (DoRules())  pr = PdfRule(nrs, nrt);
 
  793   if ((nlt>0) && (nrt>0)) nt=2.0;
 
  805   const std::vector<const Event *> *events = GetTrainingEvents();
 
  806   const UInt_t neve   = events->size();
 
  807   const UInt_t nvars  = GetMethodBase()->GetNvar();
 
  808   const UInt_t nrules = fRules.size();
 
  809   const Event *eveData;
 
  825   std::vector<Int_t> varcnt;
 
  833   varcnt.resize(nvars,0);
 
  834   fRuleVarFrac.clear();
 
  835   fRuleVarFrac.resize(nvars,0);
 
  837   for ( 
UInt_t i=0; i<nrules; i++ ) {
 
  839         if (fRules[i]->ContainsVariable(
v)) varcnt[
v]++; 
 
  841      sigRule = fRules[i]->IsSignalRule();
 
  856         eveData = (*events)[
e];
 
  857         tagged  = fRules[i]->EvalEvent(*eveData);
 
  858         sigTag = (tagged && sigRule);        
 
  859         bkgTag = (tagged && (!sigRule));     
 
  861         sigTrue = (eveData->
GetClass() == 0);       
 
  864            if (sigTag && sigTrue)  nss++;
 
  865            if (sigTag && !sigTrue) nsb++;
 
  866            if (bkgTag && sigTrue)  nbs++;
 
  867            if (bkgTag && !sigTrue) nbb++;
 
  871      if (ntag>0 && neve > 0) { 
 
  880   fRuleFSig = (nsig>0) ? 
static_cast<Double_t>(nsig)/
static_cast<Double_t>(nsig+nbkg) : 0;
 
  891   const UInt_t nrules = fRules.size();
 
  895   for ( 
UInt_t i=0; i<nrules; i++ ) {
 
  896      nc = 
static_cast<Double_t>(fRules[i]->GetNcuts());
 
  903      fRuleNCave = sumNc/nrules;
 
  913   Log() << kHEADER << 
"-------------------RULE ENSEMBLE SUMMARY------------------------"  << 
Endl;
 
  915   if (mrf) Log() << kINFO << 
"Tree training method               : " << (mrf->
UseBoost() ? 
"AdaBoost":
"Random") << 
Endl;
 
  916   Log() << kINFO << 
"Number of events per tree          : " << fRuleFit->GetNTreeSample()    << 
Endl;
 
  917   Log() << kINFO << 
"Number of trees                    : " << fRuleFit->GetForest().size() << 
Endl;
 
  918   Log() << kINFO << 
"Number of generated rules          : " << fNRulesGenerated << 
Endl;
 
  919   Log() << kINFO << 
"Idem, after cleanup                : " << fRules.size() << 
Endl;
 
  920   Log() << kINFO << 
"Average number of cuts per rule    : " << 
Form(
"%8.2f",fRuleNCave) << 
Endl;
 
  921   Log() << kINFO << 
"Spread in number of cuts per rules : " << 
Form(
"%8.2f",fRuleNCsig) << 
Endl;
 
  922   Log() << kVERBOSE << 
"Complexity                         : " << 
Form(
"%8.2f",fRuleNCave*fRuleNCsig) << 
Endl;
 
  923   Log() << kINFO << 
"----------------------------------------------------------------"  << 
Endl;
 
  924   Log() << kINFO << 
Endl;
 
  932   const EMsgType kmtype=kINFO;
 
  933   const Bool_t   isDebug = (fLogger->GetMinType()<=kDEBUG);
 
  935   Log() << kmtype << 
Endl;
 
  936   Log() << kmtype << 
"================================================================" << 
Endl;
 
  937   Log() << kmtype << 
"                          M o d e l                             " << 
Endl;
 
  938   Log() << kmtype << 
"================================================================" << 
Endl;
 
  941   const UInt_t nvars =  GetMethodBase()->GetNvar();
 
  942   const Int_t nrules = fRules.size();
 
  945   for (
UInt_t iv = 0; iv<fVarImportance.size(); iv++) {
 
  946      if (GetMethodBase()->GetInputLabel(iv).Length() > maxL) maxL = GetMethodBase()->GetInputLabel(iv).Length();
 
  950      Log() << kDEBUG << 
"Variable importance:" << 
Endl;
 
  951      for (
UInt_t iv = 0; iv<fVarImportance.size(); iv++) {
 
  952         Log() << kDEBUG << std::setw(maxL) << GetMethodBase()->GetInputLabel(iv)
 
  953               << std::resetiosflags(std::ios::right)
 
  954               << 
" : " << 
Form(
" %3.3f",fVarImportance[iv]) << 
Endl;
 
  958   Log() << kHEADER << 
"Offset (a0) = " << fOffset << 
Endl;
 
  961      if (fLinNorm.size() > 0) {
 
  962         Log() << kmtype << 
"------------------------------------" << 
Endl;
 
  963         Log() << kmtype << 
"Linear model (weights unnormalised)" << 
Endl;
 
  964         Log() << kmtype << 
"------------------------------------" << 
Endl;
 
  965         Log() << kmtype << std::setw(maxL) << 
"Variable" 
  966               << std::resetiosflags(std::ios::right) << 
" : " 
  967               << std::setw(11) << 
" Weights" 
  968               << std::resetiosflags(std::ios::right) << 
" : " 
  970               << std::resetiosflags(std::ios::right)
 
  972         Log() << kmtype << 
"------------------------------------" << 
Endl;
 
  973         for ( 
UInt_t i=0; i<fLinNorm.size(); i++ ) {
 
  974            Log() << kmtype << std::setw(std::max(maxL,8)) << GetMethodBase()->GetInputLabel(i);
 
  977                     << std::resetiosflags(std::ios::right)
 
  978                     << 
" : " << 
Form(
" %10.3e",fLinCoefficients[i]*fLinNorm[i])
 
  979                     << 
" : " << 
Form(
" %3.3f",fLinImportance[i]/fImportanceRef) << 
Endl;
 
  982               Log() << kmtype << 
"-> importance below threshold = " 
  983                     << 
Form(
" %3.3f",fLinImportance[i]/fImportanceRef) << 
Endl;
 
  986         Log() << kmtype << 
"------------------------------------" << 
Endl;
 
  989   else Log() << kmtype << 
"Linear terms were disabled" << 
Endl;
 
  991   if ((!DoRules()) || (nrules==0)) {
 
  993         Log() << kmtype << 
"Rule terms were disabled" << 
Endl;
 
  996         Log() << kmtype << 
"Even though rules were included in the model, none passed! " << nrules << 
Endl;
 
 1000      Log() << kmtype << 
"Number of rules = " << nrules << 
Endl;
 
 1002         Log() << kmtype << 
"N(cuts) in rules, average = " << fRuleNCave << 
Endl;
 
 1003         Log() << kmtype << 
"                      RMS = " << fRuleNCsig << 
Endl;
 
 1004         Log() << kmtype << 
"Fraction of signal rules = " << fRuleFSig << 
Endl;
 
 1005         Log() << kmtype << 
"Fraction of rules containing a variable (%):" << 
Endl;
 
 1007            Log() << kmtype << 
"   " << std::setw(maxL) << GetMethodBase()->GetInputLabel(
v);
 
 1008            Log() << kmtype << 
Form(
" = %2.2f",fRuleVarFrac[
v]*100.0) << 
" %" << 
Endl;
 
 1014      std::list< std::pair<double,int> > sortedImp;
 
 1015      for (
Int_t i=0; i<nrules; i++) {
 
 1016         sortedImp.push_back( std::pair<double,int>( fRules[i]->GetImportance(),i ) );
 
 1020      Log() << kmtype << 
"Printing the first " << printN << 
" rules, ordered in importance." << 
Endl;
 
 1022      for ( std::list< std::pair<double,int> >::reverse_iterator itpair = sortedImp.rbegin();
 
 1023            itpair != sortedImp.rend(); ++itpair ) {
 
 1024         ind = itpair->second;
 
 1028         fRules[ind]->PrintLogger(
Form(
"Rule %4d : ",pind+1));
 
 1031            if (nrules==printN) {
 
 1032               Log() << kmtype << 
"All rules printed" << 
Endl;
 
 1035               Log() << kmtype << 
"Skipping the next " << nrules-printN << 
" rules" << 
Endl;
 
 1041   Log() << kmtype << 
"================================================================" << 
Endl;
 
 1042   Log() << kmtype << 
Endl;
 
 1050   Int_t dp = os.precision();
 
 1051   UInt_t nrules = fRules.size();
 
 1054   os << 
"ImportanceCut= "    << fImportanceCut << std::endl;
 
 1055   os << 
"LinQuantile= "      << fLinQuantile   << std::endl;
 
 1056   os << 
"AverageSupport= "   << fAverageSupport << std::endl;
 
 1057   os << 
"AverageRuleSigma= " << fAverageRuleSigma << std::endl;
 
 1058   os << 
"Offset= "           << fOffset << std::endl;
 
 1059   os << 
"NRules= "           << nrules << std::endl;
 
 1060   for (
UInt_t i=0; i<nrules; i++){
 
 1061      os << 
"***Rule " << i << std::endl;
 
 1062      (fRules[i])->PrintRaw(os);
 
 1064   UInt_t nlinear = fLinNorm.size();
 
 1066   os << 
"NLinear= " << fLinTermOK.size() << std::endl;
 
 1067   for (
UInt_t i=0; i<nlinear; i++) {
 
 1068      os << 
"***Linear " << i << std::endl;
 
 1069      os << std::setprecision(10) << (fLinTermOK[i] ? 1:0) << 
" " 
 1070         << fLinCoefficients[i] << 
" " 
 1071         << fLinNorm[i] << 
" " 
 1074         << fLinImportance[i] << 
" " << std::endl;
 
 1076   os << std::setprecision(dp);
 
 1086   UInt_t nrules  = fRules.size();
 
 1087   UInt_t nlinear = fLinNorm.size();
 
 1090   gTools().
AddAttr( re, 
"LearningModel",    (
int)fLearningModel );
 
 1094   gTools().
AddAttr( re, 
"AverageRuleSigma", fAverageRuleSigma );
 
 1096   for (
UInt_t i=0; i<nrules; i++) fRules[i]->AddXMLTo(re);
 
 1098   for (
UInt_t i=0; i<nlinear; i++) {
 
 1118   Int_t iLearningModel;
 
 1123   gTools().
ReadAttr( wghtnode, 
"AverageSupport",    fAverageSupport );
 
 1124   gTools().
ReadAttr( wghtnode, 
"AverageRuleSigma",  fAverageRuleSigma );
 
 1131   fRules.resize( nrules  );
 
 1133   for (i=0; i<nrules; i++) {
 
 1134      fRules[i] = 
new Rule();
 
 1135      fRules[i]->SetRuleEnsemble( 
this );
 
 1136      fRules[i]->ReadFromXML( ch );
 
 1142   fLinNorm        .resize( nlinear );
 
 1143   fLinTermOK      .resize( nlinear );
 
 1144   fLinCoefficients.resize( nlinear );
 
 1145   fLinDP          .resize( nlinear );
 
 1146   fLinDM          .resize( nlinear );
 
 1147   fLinImportance  .resize( nlinear );
 
 1153      fLinTermOK[i] = (iok == 1);
 
 1177   istr >> dummy >> fImportanceCut;
 
 1178   istr >> dummy >> fLinQuantile;
 
 1179   istr >> dummy >> fAverageSupport;
 
 1180   istr >> dummy >> fAverageRuleSigma;
 
 1181   istr >> dummy >> fOffset;
 
 1182   istr >> dummy >> nrules;
 
 1188   for (
UInt_t i=0; i<nrules; i++){
 
 1189      istr >> dummy >> idum; 
 
 1190      fRules.push_back( 
new Rule() );
 
 1191      (fRules.back())->SetRuleEnsemble( 
this );
 
 1192      (fRules.back())->ReadRaw(istr);
 
 1200   istr >> dummy >> nlinear;
 
 1202   fLinNorm        .resize( nlinear );
 
 1203   fLinTermOK      .resize( nlinear );
 
 1204   fLinCoefficients.resize( nlinear );
 
 1205   fLinDP          .resize( nlinear );
 
 1206   fLinDM          .resize( nlinear );
 
 1207   fLinImportance  .resize( nlinear );
 
 1211   for (
UInt_t i=0; i<nlinear; i++) {
 
 1212      istr >> dummy >> idum;
 
 1214      fLinTermOK[i] = (iok==1);
 
 1215      istr >> fLinCoefficients[i];
 
 1216      istr >> fLinNorm[i];
 
 1219      istr >> fLinImportance[i];
 
 1228   if(
this != &other) {
 
 1255   if (dtree==0) 
return 0;
 
 1257   Int_t nendnodes = 0;
 
 1258   FindNEndNodes( node, nendnodes );
 
 1259   return 2*(nendnodes-1);
 
 1267   if (node==0) 
return;
 
 1274   FindNEndNodes( nodeR, nendnodes );
 
 1275   FindNEndNodes( nodeL, nendnodes );
 
 1292   if (node==0) 
return;
 
 1298      Rule *rule = MakeTheRule(node);
 
 1300         fRules.push_back( rule );
 
 1305         Log() << kFATAL << 
"<AddRule> - ERROR failed in creating a rule! BUG!" << 
Endl;
 
 1319      Log() << kFATAL << 
"<MakeTheRule> Input node is NULL. Should not happen. BUG!" << 
Endl;
 
 1327   std::vector< const Node * > nodeVec;
 
 1328   const Node *parent = node;
 
 1333   nodeVec.push_back( node );
 
 1336      if (!parent) 
continue;
 
 1339         nodeVec.insert( nodeVec.begin(), parent );
 
 1342   if (nodeVec.size()<2) {
 
 1343      Log() << kFATAL << 
"<MakeTheRule> BUG! Inconsistent Rule!" << 
Endl;
 
 1346   Rule *rule = 
new Rule( 
this, nodeVec );
 
 1356   Log() << kVERBOSE << 
"Making Rule map for all events" << 
Endl;
 
 1358   if (events==0) events = GetTrainingEvents();
 
 1359   if ((ifirst==0) || (ilast==0) || (ifirst>ilast)) {
 
 1361      ilast  = events->size()-1;
 
 1364   if ((events!=fRuleMapEvents) ||
 
 1365       (ifirst!=fRuleMapInd0) ||
 
 1366       (ilast !=fRuleMapInd1)) {
 
 1371      Log() << kVERBOSE << 
"<MakeRuleMap> Map is already valid" << 
Endl;
 
 1374   fRuleMapEvents = events;
 
 1375   fRuleMapInd0   = ifirst;
 
 1376   fRuleMapInd1   = ilast;
 
 1378   UInt_t nrules = GetNRules();
 
 1380      Log() << kVERBOSE << 
"No rules found in MakeRuleMap()" << 
Endl;
 
 1387   std::vector<UInt_t> ruleind;
 
 1389   for (
UInt_t i=ifirst; i<=ilast; i++) {
 
 1391      fRuleMap.push_back( ruleind );
 
 1393         if (fRules[
r]->EvalEvent(*((*events)[i]))) {
 
 1394            fRuleMap.back().push_back(
r); 
 
 1399   Log() << kVERBOSE << 
"Made rule map for event# " << ifirst << 
" : " << ilast << 
Endl;
 
 1407   os << 
"DON'T USE THIS - TO BE REMOVED" << std::endl;
 
winID h TVirtualViewer3D TVirtualGLPainter p
 
Option_t Option_t TPoint TPoint const char GetTextMagnitude GetFillStyle GetLineColor GetLineWidth GetMarkerStyle GetTextAlign GetTextColor GetTextSize void char Point_t Rectangle_t WindowAttributes_t Float_t r
 
Option_t Option_t TPoint TPoint const char GetTextMagnitude GetFillStyle GetLineColor GetLineWidth GetMarkerStyle GetTextAlign GetTextColor GetTextSize void char Point_t Rectangle_t WindowAttributes_t Float_t Float_t Float_t Int_t Int_t UInt_t UInt_t Rectangle_t Int_t Int_t Window_t TString Int_t GCValues_t GetPrimarySelectionOwner GetDisplay GetScreen GetColormap GetNativeEvent const char const char dpyName wid window const char font_name cursor keysym reg const char only_if_exist regb h Point_t winding char text const char depth char const char Int_t count const char ColorStruct_t color const char Pixmap_t Pixmap_t PictureAttributes_t attr const char char ret_data h unsigned char height h Atom_t Int_t ULong_t ULong_t unsigned char prop_list Atom_t Atom_t Atom_t Time_t type
 
R__EXTERN TRandom * gRandom
 
char * Form(const char *fmt,...)
Formats a string in a circular formatting buffer.
 
1-D histogram with a float per channel (see TH1 documentation)}
 
Short_t GetSelector() const
return index of variable used for discrimination at this node
 
Implementation of a Decision Tree.
 
virtual DecisionTreeNode * GetRoot() const
 
Virtual base Class for all MVA method.
 
Bool_t IsSilentFile() const
 
J Friedman's RuleFit method.
 
ostringstream derivative to redirect and format output
 
Node for the BinarySearch or Decision Trees.
 
virtual Node * GetLeft() const
 
virtual Node * GetParent() const
 
virtual Node * GetRight() const
 
virtual ~RuleEnsemble()
destructor
 
void CalcVarImportance()
Calculates variable importance using eq (35) in RuleFit paper by Friedman et.al.
 
void SetImportanceRef(Double_t impref)
set reference importance
 
void CalcImportance()
calculate the importance of each rule
 
void PrintRuleGen() const
print rule generation info
 
Int_t CalcNRules(const TMVA::DecisionTree *dtree)
calculate the number of rules
 
UInt_t fNRulesGenerated
number of rules generated, before cleanup
 
void ResetCoefficients()
reset all rule coefficients
 
void SetMsgType(EMsgType t)
 
Double_t GetLinQuantile() const
 
void ReadRaw(std::istream &istr)
read rule ensemble from stream
 
void AddRule(const Node *node)
add a new rule to the tree
 
void ReadFromXML(void *wghtnode)
read rules from XML
 
Double_t GetImportanceCut() const
 
const Event * GetTrainingEvent(UInt_t i) const
get the training event from the rule fitter
 
const std::vector< const TMVA::Event * > * GetTrainingEvents() const
get list of training events from the rule fitter
 
Double_t GetRuleMinDist() const
 
void SetRules(const std::vector< TMVA::Rule * > &rules)
set rules
 
void MakeRules(const std::vector< const TMVA::DecisionTree * > &forest)
Makes rules from the given decision tree.
 
void RemoveSimilarRules()
remove rules that behave similar
 
Double_t fRuleFSig
N(sig)/N(sig)+N(bkg)
 
void FindNEndNodes(const TMVA::Node *node, Int_t &nendnodes)
find the number of leaf nodes
 
Bool_t fRuleMapOK
true if MakeRuleMap() has been called
 
RuleEnsemble()
constructor
 
UInt_t fRuleMapInd1
last index
 
const std::vector< Double_t > & GetVarImportance() const
 
void CleanupRules()
cleanup rules
 
void Initialize(const RuleFit *rf)
Initializes all member variables with default values.
 
void CleanupLinear()
cleanup linear model
 
void RuleResponseStats()
calculate various statistics for this rule
 
const RuleFit * GetRuleFit() const
 
void * AddXMLTo(void *parent) const
write rules to XML
 
const std::vector< TMVA::Rule * > & GetRulesConst() const
 
const MethodRuleFit * GetMethodRuleFit() const
Get a pointer to the original MethodRuleFit.
 
void MakeModel()
create model
 
void RuleStatistics()
calculate various statistics for this rule
 
void SetCoefficients(const std::vector< Double_t > &v)
set all rule coefficients
 
void Print() const
print function
 
Double_t PdfRule(Double_t &nsig, Double_t &ntot) const
This function returns Pr( y = 1 | x ) for rules.
 
void MakeRuleMap(const std::vector< const TMVA::Event * > *events=nullptr, UInt_t ifirst=0, UInt_t ilast=0)
Makes rule map for all events.
 
const MethodBase * GetMethodBase() const
Get a pointer to the original MethodRuleFit.
 
Double_t GetOffset() const
 
Double_t fRuleNCave
N(cuts) average.
 
void Copy(RuleEnsemble const &other)
copy function
 
Double_t CalcLinImportance()
calculate the linear importance for each rule
 
Double_t CalcRuleImportance()
calculate importance of each rule
 
Double_t fImportanceRef
reference importance (max)
 
void PrintRaw(std::ostream &os) const
write rules to stream
 
Double_t fAverageRuleSigma
average rule sigma
 
Bool_t fEventCacheOK
true if rule/linear respons are updated
 
void CalcRuleSupport()
calculate the support for all rules
 
ELearningModel GetLearningModel() const
 
Double_t PdfLinear(Double_t &nsig, Double_t &ntot) const
This function returns Pr( y = 1 | x ) for the linear terms.
 
Double_t CoefficientRadius()
Calculates sqrt(Sum(a_i^2)), i=1..N (NOTE do not include a0)
 
UInt_t fRuleMapInd0
start index
 
void MakeRulesFromTree(const DecisionTree *dtree)
create rules from the decision tree structure
 
Double_t fRuleNCsig
idem sigma
 
void MakeLinearTerms()
Make the linear terms as in eq 25, ref 2 For this the b and (1-b) quantiles are needed.
 
Rule * MakeTheRule(const Node *node)
Make a Rule from a given Node.
 
void GetCoefficients(std::vector< Double_t > &v)
Retrieve all rule coefficients.
 
Double_t FStar() const
We want to estimate F* = argmin Eyx( L(y,F(x) ), min wrt F(x) F(x) = FL(x) + FR(x) ,...
 
A class implementing various fits of rule ensembles.
 
Implementation of a rule.
 
void SetMsgType(EMsgType t)
 
Double_t Rndm() override
Machine independent random number generator.
 
std::ostream & operator<<(std::ostream &os, const BinaryTree &tree)
 
MsgLogger & Endl(MsgLogger &ml)
 
Short_t Max(Short_t a, Short_t b)
Returns the largest of a and b.
 
Double_t Sqrt(Double_t x)
Returns the square root of x.
 
Short_t Min(Short_t a, Short_t b)
Returns the smallest of a and b.
 
Short_t Abs(Short_t d)
Returns the absolute value of parameter Short_t d.