doc/hackathon/MethodRuleFit_8cxx_source.html

// @(#)root/tmva $Id$

// Author: Fredrik Tegenfeldt


/**********************************************************************************

 * Project: TMVA - a Root-integrated toolkit for multivariate data analysis       *

 * Package: TMVA                                                                  *

 * Class  : MethodRuleFit                                                         *

 *                                             *

 *                                                                                *

 * Description:                                                                   *

 *      Implementation (see header file for description)                          *

 *                                                                                *

 * Authors (alphabetical):                                                        *

 *      Fredrik Tegenfeldt <Fredrik.Tegenfeldt@cern.ch>  - Iowa State U., USA     *

 *                                                                                *

 * Copyright (c) 2005:                                                            *

 *      CERN, Switzerland                                                         *

 *      Iowa State U.                                                             *

 *      MPI-K Heidelberg, Germany                                                 *

 *                                                                                *

 * Redistribution and use in source and binary forms, with or without             *

 * modification, are permitted according to the terms listed in LICENSE           *

 * (see tmva/doc/LICENSE)                                          *

 **********************************************************************************/


/*! \class TMVA::MethodRuleFit

\ingroup TMVA

J Friedman's RuleFit method

*/


#include "TMVA/MethodRuleFit.h"


#include "TMVA/ClassifierFactory.h"

#include "TMVA/Config.h"

#include "TMVA/Configurable.h"

#include "TMVA/CrossEntropy.h"

#include "TMVA/DataSet.h"

#include "TMVA/DecisionTree.h"

#include "TMVA/GiniIndex.h"

#include "TMVA/IMethod.h"

#include "TMVA/MethodBase.h"

#include "TMVA/MisClassificationError.h"

#include "TMVA/MsgLogger.h"

#include "TMVA/Ranking.h"

#include "TMVA/RuleFitAPI.h"

#include "TMVA/SdivSqrtSplusB.h"

#include "TMVA/SeparationBase.h"

#include "TMVA/Timer.h"

#include "TMVA/Tools.h"

#include "TMVA/Types.h"


#include "TRandom3.h"

#include "TMatrix.h"


#include <iostream>

#include <iomanip>

#include <algorithm>

#include <list>

#include <random>


using std::min;


REGISTER_METHOD(RuleFit)


////////////////////////////////////////////////////////////////////////////////

/// standard constructor


   TMVA::MethodRuleFit::MethodRuleFit( const TString& jobName,

                                       const TString& methodTitle,

                                       DataSetInfo& theData,

                                       const TString& theOption) :

   MethodBase( jobName, Types::kRuleFit, methodTitle, theData, theOption)

   , fSignalFraction(0)

   , fNTImportance(0)

   , fNTCoefficient(0)

   , fNTSupport(0)

   , fNTNcuts(0)

   , fNTNvars(0)

   , fNTPtag(0)

   , fNTPss(0)

   , fNTPsb(0)

   , fNTPbs(0)

   , fNTPbb(0)

   , fNTSSB(0)

   , fNTType(0)

   , fUseRuleFitJF(kFALSE)

   , fRFNrules(0)

   , fRFNendnodes(0)

   , fNTrees(0)

   , fTreeEveFrac(0)

   , fSepType(0)

   , fMinFracNEve(0)

   , fMaxFracNEve(0)

   , fNCuts(0)

   , fPruneMethod(TMVA::DecisionTree::kCostComplexityPruning)

   , fPruneStrength(0)

   , fUseBoost(kFALSE)

   , fGDPathEveFrac(0)

   , fGDValidEveFrac(0)

   , fGDTau(0)

   , fGDTauPrec(0)

   , fGDTauMin(0)

   , fGDTauMax(0)

   , fGDTauScan(0)

   , fGDPathStep(0)

   , fGDNPathSteps(0)

   , fGDErrScale(0)

   , fMinimp(0)

   , fRuleMinDist(0)

   , fLinQuantile(0)

{

      fMonitorNtuple = NULL;

}


////////////////////////////////////////////////////////////////////////////////

/// constructor from weight file


TMVA::MethodRuleFit::MethodRuleFit( DataSetInfo& theData,

                                    const TString& theWeightFile) :

   MethodBase( Types::kRuleFit, theData, theWeightFile)

   , fSignalFraction(0)

   , fNTImportance(0)

   , fNTCoefficient(0)

   , fNTSupport(0)

   , fNTNcuts(0)

   , fNTNvars(0)

   , fNTPtag(0)

   , fNTPss(0)

   , fNTPsb(0)

   , fNTPbs(0)

   , fNTPbb(0)

   , fNTSSB(0)

   , fNTType(0)

   , fUseRuleFitJF(kFALSE)

   , fRFNrules(0)

   , fRFNendnodes(0)

   , fNTrees(0)

   , fTreeEveFrac(0)

   , fSepType(0)

   , fMinFracNEve(0)

   , fMaxFracNEve(0)

   , fNCuts(0)

   , fPruneMethod(TMVA::DecisionTree::kCostComplexityPruning)

   , fPruneStrength(0)

   , fUseBoost(kFALSE)

   , fGDPathEveFrac(0)

   , fGDValidEveFrac(0)

   , fGDTau(0)

   , fGDTauPrec(0)

   , fGDTauMin(0)

   , fGDTauMax(0)

   , fGDTauScan(0)

   , fGDPathStep(0)

   , fGDNPathSteps(0)

   , fGDErrScale(0)

   , fMinimp(0)

   , fRuleMinDist(0)

   , fLinQuantile(0)

{

      fMonitorNtuple = NULL;

}


////////////////////////////////////////////////////////////////////////////////

/// destructor


TMVA::MethodRuleFit::~MethodRuleFit( void )

{

   for (UInt_t i=0; i<fEventSample.size(); i++) delete fEventSample[i];

   for (UInt_t i=0; i<fForest.size(); i++)      delete fForest[i];

}


////////////////////////////////////////////////////////////////////////////////

/// RuleFit can handle classification with 2 classes


Bool_t TMVA::MethodRuleFit::HasAnalysisType( Types::EAnalysisType type, UInt_t numberClasses, UInt_t /*numberTargets*/ )

{

   if (type == Types::kClassification && numberClasses == 2) return kTRUE;

   return kFALSE;

}


////////////////////////////////////////////////////////////////////////////////

/// define the options (their key words) that can be set in the option string

/// know options.

///

/// #### general

///

///  - RuleFitModule  `<string>`

///    available values are:

///    - RFTMVA      - use TMVA implementation

///    - RFFriedman  - use Friedmans original implementation

///

/// #### Path search (fitting)

///

///  - GDTau          `<float>`      gradient-directed path: fit threshold, default

///  - GDTauPrec      `<float>`      gradient-directed path: precision of estimated tau

///  - GDStep         `<float>`      gradient-directed path: step size

///  - GDNSteps       `<float>`      gradient-directed path: number of steps

///  - GDErrScale     `<float>`      stop scan when error>scale*errmin

///

/// #### Tree generation

///

///  - fEventsMin     `<float>`      minimum fraction of events in a splittable node

///  - fEventsMax     `<float>`      maximum fraction of events in a splittable node

///  - nTrees         `<float>`      number of trees in forest.

///  - ForestType     `<string>`

///    available values are:

///    - Random    - create forest using random subsample and only random variables subset at each node

///    - AdaBoost  - create forest with boosted events

///

/// #### Model creation

///

///  - RuleMinDist    `<float>`      min distance allowed between rules

///  - MinImp         `<float>`      minimum rule importance accepted

///  - Model          `<string>`     model to be used

///    available values are:

///    - ModRuleLinear `<default>`

///    - ModRule

///    - ModLinear

///

/// #### Friedmans module

///

///  - RFWorkDir      `<string>`     directory where Friedmans module (rf_go.exe) is installed

///  - RFNrules       `<int>`        maximum number of rules allowed

///  - RFNendnodes    `<int>`        average number of end nodes in the forest of trees


void TMVA::MethodRuleFit::DeclareOptions()

{

   DeclareOptionRef(fGDTau=-1,             "GDTau",          "Gradient-directed (GD) path: default fit cut-off");

   DeclareOptionRef(fGDTauPrec=0.01,       "GDTauPrec",      "GD path: precision of tau");

   DeclareOptionRef(fGDPathStep=0.01,      "GDStep",         "GD path: step size");

   DeclareOptionRef(fGDNPathSteps=10000,   "GDNSteps",       "GD path: number of steps");

   DeclareOptionRef(fGDErrScale=1.1,       "GDErrScale",     "Stop scan when error > scale*errmin");

   DeclareOptionRef(fLinQuantile,           "LinQuantile",  "Quantile of linear terms (removes outliers)");

   DeclareOptionRef(fGDPathEveFrac=0.5,    "GDPathEveFrac",  "Fraction of events used for the path search");

   DeclareOptionRef(fGDValidEveFrac=0.5,   "GDValidEveFrac", "Fraction of events used for the validation");

   // tree options

   DeclareOptionRef(fMinFracNEve=0.1,      "fEventsMin",     "Minimum fraction of events in a splittable node");

   DeclareOptionRef(fMaxFracNEve=0.9,      "fEventsMax",     "Maximum fraction of events in a splittable node");

   DeclareOptionRef(fNTrees=20,            "nTrees",         "Number of trees in forest.");


   DeclareOptionRef(fForestTypeS="AdaBoost",  "ForestType",   "Method to use for forest generation (AdaBoost or RandomForest)");

   AddPreDefVal(TString("AdaBoost"));

   AddPreDefVal(TString("Random"));

   // rule cleanup options

   DeclareOptionRef(fRuleMinDist=0.001,    "RuleMinDist",    "Minimum distance between rules");

   DeclareOptionRef(fMinimp=0.01,          "MinImp",         "Minimum rule importance accepted");

   // rule model option

   DeclareOptionRef(fModelTypeS="ModRuleLinear", "Model",    "Model to be used");

   AddPreDefVal(TString("ModRule"));

   AddPreDefVal(TString("ModRuleLinear"));

   AddPreDefVal(TString("ModLinear"));

   DeclareOptionRef(fRuleFitModuleS="RFTMVA",  "RuleFitModule","Which RuleFit module to use");

   AddPreDefVal(TString("RFTMVA"));

   AddPreDefVal(TString("RFFriedman"));


   DeclareOptionRef(fRFWorkDir="./rulefit", "RFWorkDir",    "Friedman\'s RuleFit module (RFF): working dir");

   DeclareOptionRef(fRFNrules=2000,         "RFNrules",     "RFF: Mximum number of rules");

   DeclareOptionRef(fRFNendnodes=4,         "RFNendnodes",  "RFF: Average number of end nodes");

}


////////////////////////////////////////////////////////////////////////////////

/// process the options specified by the user


void TMVA::MethodRuleFit::ProcessOptions()

{

   if (IgnoreEventsWithNegWeightsInTraining()) {

      Log() << kFATAL << "Mechanism to ignore events with negative weights in training not yet available for method: "

            << GetMethodTypeName()

            << " --> please remove \"IgnoreNegWeightsInTraining\" option from booking string."

            << Endl;

   }


   fRuleFitModuleS.ToLower();

   if      (fRuleFitModuleS == "rftmva")     fUseRuleFitJF = kFALSE;

   else if (fRuleFitModuleS == "rffriedman") fUseRuleFitJF = kTRUE;

   else                                      fUseRuleFitJF = kTRUE;


   fSepTypeS.ToLower();

   if      (fSepTypeS == "misclassificationerror") fSepType = new MisClassificationError();

   else if (fSepTypeS == "giniindex")              fSepType = new GiniIndex();

   else if (fSepTypeS == "crossentropy")           fSepType = new CrossEntropy();

   else                                            fSepType = new SdivSqrtSplusB();


   fModelTypeS.ToLower();

   if      (fModelTypeS == "modlinear" ) fRuleFit.SetModelLinear();

   else if (fModelTypeS == "modrule" )   fRuleFit.SetModelRules();

   else                                  fRuleFit.SetModelFull();


   fPruneMethodS.ToLower();

   if      (fPruneMethodS == "expectederror" )   fPruneMethod  = DecisionTree::kExpectedErrorPruning;

   else if (fPruneMethodS == "costcomplexity" )  fPruneMethod  = DecisionTree::kCostComplexityPruning;

   else                                          fPruneMethod  = DecisionTree::kNoPruning;


   fForestTypeS.ToLower();

   if      (fForestTypeS == "random" )   fUseBoost = kFALSE;

   else if (fForestTypeS == "adaboost" ) fUseBoost = kTRUE;

   else                                  fUseBoost = kTRUE;

   //

   // if creating the forest by boosting the events

   // the full training sample is used per tree

   // -> only true for the TMVA version of RuleFit.

   if (fUseBoost && (!fUseRuleFitJF)) fTreeEveFrac = 1.0;


   // check event fraction for tree generation

   // if <0 set to automatic number

   if (fTreeEveFrac<=0) {

      Int_t nevents = Data()->GetNTrainingEvents();

      Double_t n = static_cast<Double_t>(nevents);

      fTreeEveFrac = min( 0.5, (100.0 +6.0*sqrt(n))/n);

   }

   // verify ranges of options

   VerifyRange(Log(), "nTrees",        fNTrees,0,100000,20);

   VerifyRange(Log(), "MinImp",        fMinimp,0.0,1.0,0.0);

   VerifyRange(Log(), "GDTauPrec",     fGDTauPrec,1e-5,5e-1);

   VerifyRange(Log(), "GDTauMin",      fGDTauMin,0.0,1.0);

   VerifyRange(Log(), "GDTauMax",      fGDTauMax,fGDTauMin,1.0);

   VerifyRange(Log(), "GDPathStep",    fGDPathStep,0.0,100.0,0.01);

   VerifyRange(Log(), "GDErrScale",    fGDErrScale,1.0,100.0,1.1);

   VerifyRange(Log(), "GDPathEveFrac", fGDPathEveFrac,0.01,0.9,0.5);

   VerifyRange(Log(), "GDValidEveFrac",fGDValidEveFrac,0.01,1.0-fGDPathEveFrac,1.0-fGDPathEveFrac);

   VerifyRange(Log(), "fEventsMin",    fMinFracNEve,0.0,1.0);

   VerifyRange(Log(), "fEventsMax",    fMaxFracNEve,fMinFracNEve,1.0);


   fRuleFit.GetRuleEnsemblePtr()->SetLinQuantile(fLinQuantile);

   fRuleFit.GetRuleFitParamsPtr()->SetGDTauRange(fGDTauMin,fGDTauMax);

   fRuleFit.GetRuleFitParamsPtr()->SetGDTau(fGDTau);

   fRuleFit.GetRuleFitParamsPtr()->SetGDTauPrec(fGDTauPrec);

   fRuleFit.GetRuleFitParamsPtr()->SetGDTauScan(fGDTauScan);

   fRuleFit.GetRuleFitParamsPtr()->SetGDPathStep(fGDPathStep);

   fRuleFit.GetRuleFitParamsPtr()->SetGDNPathSteps(fGDNPathSteps);

   fRuleFit.GetRuleFitParamsPtr()->SetGDErrScale(fGDErrScale);

   fRuleFit.SetImportanceCut(fMinimp);

   fRuleFit.SetRuleMinDist(fRuleMinDist);


   // check if Friedmans module is used.

   // print a message concerning the options.

   if (fUseRuleFitJF) {

      Log() << kINFO << "" << Endl;

      Log() << kINFO << "--------------------------------------" <<Endl;

      Log() << kINFO << "Friedmans RuleFit module is selected." << Endl;

      Log() << kINFO << "Only the following options are used:" << Endl;

      Log() << kINFO <<  Endl;

      Log() << kINFO << gTools().Color("bold") << "   Model"        << gTools().Color("reset") << Endl;

      Log() << kINFO << gTools().Color("bold") << "   RFWorkDir"    << gTools().Color("reset") << Endl;

      Log() << kINFO << gTools().Color("bold") << "   RFNrules"     << gTools().Color("reset") << Endl;

      Log() << kINFO << gTools().Color("bold") << "   RFNendnodes"  << gTools().Color("reset") << Endl;

      Log() << kINFO << gTools().Color("bold") << "   GDNPathSteps" << gTools().Color("reset") << Endl;

      Log() << kINFO << gTools().Color("bold") << "   GDPathStep"   << gTools().Color("reset") << Endl;

      Log() << kINFO << gTools().Color("bold") << "   GDErrScale"   << gTools().Color("reset") << Endl;

      Log() << kINFO << "--------------------------------------" <<Endl;

      Log() << kINFO << Endl;

   }


   // Select what weight to use in the 'importance' rule visualisation plots.

   // Note that if UseCoefficientsVisHists() is selected, the following weight is used:

   //    w = rule coefficient * rule support

   // The support is a positive number which is 0 if no events are accepted by the rule.

   // Normally the importance gives more useful information.

   //

   //fRuleFit.UseCoefficientsVisHists();

   fRuleFit.UseImportanceVisHists();


   fRuleFit.SetMsgType( Log().GetMinType() );


   if (HasTrainingTree()) InitEventSample();


}


////////////////////////////////////////////////////////////////////////////////

/// initialize the monitoring ntuple


void TMVA::MethodRuleFit::InitMonitorNtuple()

{

   BaseDir()->cd();

   fMonitorNtuple= new TTree("MonitorNtuple_RuleFit","RuleFit variables");

   fMonitorNtuple->Branch("importance",&fNTImportance,"importance/D");

   fMonitorNtuple->Branch("support",&fNTSupport,"support/D");

   fMonitorNtuple->Branch("coefficient",&fNTCoefficient,"coefficient/D");

   fMonitorNtuple->Branch("ncuts",&fNTNcuts,"ncuts/I");

   fMonitorNtuple->Branch("nvars",&fNTNvars,"nvars/I");

   fMonitorNtuple->Branch("type",&fNTType,"type/I");

   fMonitorNtuple->Branch("ptag",&fNTPtag,"ptag/D");

   fMonitorNtuple->Branch("pss",&fNTPss,"pss/D");

   fMonitorNtuple->Branch("psb",&fNTPsb,"psb/D");

   fMonitorNtuple->Branch("pbs",&fNTPbs,"pbs/D");

   fMonitorNtuple->Branch("pbb",&fNTPbb,"pbb/D");

   fMonitorNtuple->Branch("soversb",&fNTSSB,"soversb/D");

}


////////////////////////////////////////////////////////////////////////////////

/// default initialization


void TMVA::MethodRuleFit::Init()

{

   // the minimum requirement to declare an event signal-like

   SetSignalReferenceCut( 0.0 );


   // set variables that used to be options

   // any modifications are then made in ProcessOptions()

   fLinQuantile   = 0.025;       // Quantile of linear terms (remove outliers)

   fTreeEveFrac   = -1.0;        // Fraction of events used to train each tree

   fNCuts         = 20;          // Number of steps during node cut optimisation

   fSepTypeS      = "GiniIndex"; // Separation criterion for node splitting; see BDT

   fPruneMethodS  = "NONE";      // Pruning method; see BDT

   fPruneStrength = 3.5;         // Pruning strength; see BDT

   fGDTauMin      = 0.0;         // Gradient-directed path: min fit threshold (tau)

   fGDTauMax      = 1.0;         // Gradient-directed path: max fit threshold (tau)

   fGDTauScan     = 1000;        // Gradient-directed path: number of points scanning for best tau


}


////////////////////////////////////////////////////////////////////////////////

/// write all Events from the Tree into a vector of Events, that are

/// more easily manipulated.

/// This method should never be called without existing trainingTree, as it

/// the vector of events from the ROOT training tree


void TMVA::MethodRuleFit::InitEventSample( void )

{

   if (Data()->GetNEvents()==0) Log() << kFATAL << "<Init> Data().TrainingTree() is zero pointer" << Endl;


   Int_t nevents = Data()->GetNEvents();

   for (Int_t ievt=0; ievt<nevents; ievt++){

      const Event * ev = GetEvent(ievt);

      fEventSample.push_back( new Event(*ev));

   }

   if (fTreeEveFrac<=0) {

      Double_t n = static_cast<Double_t>(nevents);

      fTreeEveFrac = min( 0.5, (100.0 +6.0*sqrt(n))/n);

   }

   if (fTreeEveFrac>1.0) fTreeEveFrac=1.0;

   //

   std::shuffle(fEventSample.begin(), fEventSample.end(), std::default_random_engine{});

   //

   Log() << kDEBUG << "Set sub-sample fraction to " << fTreeEveFrac << Endl;

}


////////////////////////////////////////////////////////////////////////////////


void TMVA::MethodRuleFit::Train( void )

{

   TMVA::DecisionTreeNode::SetIsTraining(true);

   // training of rules


  if(!IsSilentFile()) InitMonitorNtuple();


   // fill the STL Vector with the event sample

   this->InitEventSample();


   if (fUseRuleFitJF) {

      TrainJFRuleFit();

   }

   else {

      TrainTMVARuleFit();

   }

   fRuleFit.GetRuleEnsemblePtr()->ClearRuleMap();

   TMVA::DecisionTreeNode::SetIsTraining(false);

   ExitFromTraining();

}


////////////////////////////////////////////////////////////////////////////////

/// training of rules using TMVA implementation


void TMVA::MethodRuleFit::TrainTMVARuleFit( void )

{

   if (IsNormalised()) Log() << kFATAL << "\"Normalise\" option cannot be used with RuleFit; "

                             << "please remove the option from the configuration string, or "

                             << "use \"!Normalise\""

                             << Endl;


   // timer

   Timer timer( 1, GetName() );


   // test tree nmin cut -> for debug purposes

   // the routine will generate trees with stopping cut on N(eve) given by

   // a fraction between [20,N(eve)-1].

   //

   //   MakeForestRnd();

   //   exit(1);

   //


   // Init RuleFit object and create rule ensemble

   // + make forest & rules

   fRuleFit.Initialize( this );


   // Make forest of decision trees

   //   if (fRuleFit.GetRuleEnsemble().DoRules()) fRuleFit.MakeForest();


   // Fit the rules

   Log() << kDEBUG << "Fitting rule coefficients ..." << Endl;

   fRuleFit.FitCoefficients();


   // Calculate importance

   Log() << kDEBUG << "Computing rule and variable importance" << Endl;

   fRuleFit.CalcImportance();


   // Output results and fill monitor ntuple

   fRuleFit.GetRuleEnsemblePtr()->Print();

   //

   if(!IsSilentFile())

    {

        Log() << kDEBUG << "Filling rule ntuple" << Endl;

        UInt_t nrules = fRuleFit.GetRuleEnsemble().GetRulesConst().size();

        const Rule *rule;

        for (UInt_t i=0; i<nrules; i++ ) {

            rule            = fRuleFit.GetRuleEnsemble().GetRulesConst(i);

            fNTImportance   = rule->GetRelImportance();

            fNTSupport      = rule->GetSupport();

            fNTCoefficient  = rule->GetCoefficient();

            fNTType         = (rule->IsSignalRule() ? 1:-1 );

            fNTNvars        = rule->GetRuleCut()->GetNvars();

            fNTNcuts        = rule->GetRuleCut()->GetNcuts();

            fNTPtag         = fRuleFit.GetRuleEnsemble().GetRulePTag(i); // should be identical with support

            fNTPss          = fRuleFit.GetRuleEnsemble().GetRulePSS(i);

            fNTPsb          = fRuleFit.GetRuleEnsemble().GetRulePSB(i);

            fNTPbs          = fRuleFit.GetRuleEnsemble().GetRulePBS(i);

            fNTPbb          = fRuleFit.GetRuleEnsemble().GetRulePBB(i);

            fNTSSB          = rule->GetSSB();

            fMonitorNtuple->Fill();

        }


        fRuleFit.MakeVisHists();

        fRuleFit.MakeDebugHists();

   }

   Log() << kDEBUG << "Training done" << Endl;


}


////////////////////////////////////////////////////////////////////////////////

/// training of rules using Jerome Friedmans implementation


void TMVA::MethodRuleFit::TrainJFRuleFit( void )

{

   fRuleFit.InitPtrs( this );

   Data()->SetCurrentType(Types::kTraining);

   UInt_t nevents = Data()->GetNTrainingEvents();

   std::vector<const TMVA::Event*> tmp;

   for (Long64_t ievt=0; ievt<nevents; ievt++) {

      const Event *event = GetEvent(ievt);

      tmp.push_back(event);

   }

   fRuleFit.SetTrainingEvents( tmp );


   RuleFitAPI *rfAPI = new RuleFitAPI( this, &fRuleFit, Log().GetMinType() );


   rfAPI->WelcomeMessage();


   // timer

   Timer timer( 1, GetName() );


   Log() << kINFO << "Training ..." << Endl;

   rfAPI->TrainRuleFit();


   Log() << kDEBUG << "reading model summary from rf_go.exe output" << Endl;

   rfAPI->ReadModelSum();


   //   fRuleFit.GetRuleEnsemblePtr()->MakeRuleMap();


   Log() << kDEBUG << "calculating rule and variable importance" << Endl;

   fRuleFit.CalcImportance();


   // Output results and fill monitor ntuple

   fRuleFit.GetRuleEnsemblePtr()->Print();

   //

   if(!IsSilentFile())fRuleFit.MakeVisHists();


   delete rfAPI;


   Log() << kDEBUG << "done training" << Endl;

}


////////////////////////////////////////////////////////////////////////////////

/// computes ranking of input variables


const TMVA::Ranking* TMVA::MethodRuleFit::CreateRanking()

{

   // create the ranking object

   fRanking = new Ranking( GetName(), "Importance" );


   for (UInt_t ivar=0; ivar<GetNvar(); ivar++) {

      fRanking->AddRank( Rank( GetInputLabel(ivar), fRuleFit.GetRuleEnsemble().GetVarImportance(ivar) ) );

   }


   return fRanking;

}


////////////////////////////////////////////////////////////////////////////////

/// add the rules to XML node


void TMVA::MethodRuleFit::AddWeightsXMLTo( void* parent ) const

{

   fRuleFit.GetRuleEnsemble().AddXMLTo( parent );

}


////////////////////////////////////////////////////////////////////////////////

/// read rules from an std::istream


void TMVA::MethodRuleFit::ReadWeightsFromStream( std::istream & istr )

{

   fRuleFit.GetRuleEnsemblePtr()->ReadRaw( istr );

}


////////////////////////////////////////////////////////////////////////////////

/// read rules from XML node


void TMVA::MethodRuleFit::ReadWeightsFromXML( void* wghtnode )

{

   fRuleFit.GetRuleEnsemblePtr()->ReadFromXML( wghtnode );

}


////////////////////////////////////////////////////////////////////////////////

/// returns MVA value for given event


Double_t TMVA::MethodRuleFit::GetMvaValue( Double_t* err, Double_t* errUpper )

{

   // cannot determine error

   NoErrorCalc(err, errUpper);


   return fRuleFit.EvalEvent( *GetEvent() );

}


////////////////////////////////////////////////////////////////////////////////

/// write special monitoring histograms to file (here ntuple)


void  TMVA::MethodRuleFit::WriteMonitoringHistosToFile( void ) const

{

   BaseDir()->cd();

   Log() << kINFO << "Write monitoring ntuple to file: " << BaseDir()->GetPath() << Endl;

   fMonitorNtuple->Write();

}


////////////////////////////////////////////////////////////////////////////////

/// write specific classifier response


void TMVA::MethodRuleFit::MakeClassSpecific( std::ostream& fout, const TString& className ) const

{

   Int_t dp = fout.precision();

   fout << "   // not implemented for class: \"" << className << "\"" << std::endl;

   fout << "};" << std::endl;

   fout << "void   " << className << "::Initialize(){}" << std::endl;

   fout << "void   " << className << "::Clear(){}" << std::endl;

   fout << "double " << className << "::GetMvaValue__( const std::vector<double>& inputValues ) const {" << std::endl;

   fout << "   double rval=" << std::setprecision(10) << fRuleFit.GetRuleEnsemble().GetOffset() << ";" << std::endl;

   MakeClassRuleCuts(fout);

   MakeClassLinear(fout);

   fout << "   return rval;" << std::endl;

   fout << "}" << std::endl;

   fout << std::setprecision(dp);

}


////////////////////////////////////////////////////////////////////////////////

/// print out the rule cuts


void TMVA::MethodRuleFit::MakeClassRuleCuts( std::ostream& fout ) const

{

   Int_t dp = fout.precision();

   if (!fRuleFit.GetRuleEnsemble().DoRules()) {

      fout << "   //" << std::endl;

      fout << "   // ==> MODEL CONTAINS NO RULES <==" << std::endl;

      fout << "   //" << std::endl;

      return;

   }

   const RuleEnsemble *rens = &(fRuleFit.GetRuleEnsemble());

   const std::vector< Rule* > *rules = &(rens->GetRulesConst());

   const RuleCut *ruleCut;

   //

   std::list< std::pair<Double_t,Int_t> > sortedRules;

   for (UInt_t ir=0; ir<rules->size(); ir++) {

      sortedRules.push_back( std::pair<Double_t,Int_t>( (*rules)[ir]->GetImportance()/rens->GetImportanceRef(),ir ) );

   }

   sortedRules.sort();

   //

   fout << "   //" << std::endl;

   fout << "   // here follows all rules ordered in importance (most important first)" << std::endl;

   fout << "   // at the end of each line, the relative importance of the rule is given" << std::endl;

   fout << "   //" << std::endl;

   //

   for ( std::list< std::pair<double,int> >::reverse_iterator itpair = sortedRules.rbegin();

         itpair != sortedRules.rend(); ++itpair ) {

      UInt_t ir     = itpair->second;

      Double_t impr = itpair->first;

      ruleCut = (*rules)[ir]->GetRuleCut();

      if (impr<rens->GetImportanceCut()) fout << "   //" << std::endl;

      fout << "   if (" << std::flush;

      for (UInt_t ic=0; ic<ruleCut->GetNvars(); ic++) {

         Double_t sel    = ruleCut->GetSelector(ic);

         Double_t valmin = ruleCut->GetCutMin(ic);

         Double_t valmax = ruleCut->GetCutMax(ic);

         Bool_t   domin  = ruleCut->GetCutDoMin(ic);

         Bool_t   domax  = ruleCut->GetCutDoMax(ic);

         //

         if (ic>0) fout << "&&" << std::flush;

         if (domin) {

            fout << "(" << std::setprecision(10) << valmin << std::flush;

            fout << "<inputValues[" << sel << "])" << std::flush;

         }

         if (domax) {

            if (domin) fout << "&&" << std::flush;

            fout << "(inputValues[" << sel << "]" << std::flush;

            fout << "<" << std::setprecision(10) << valmax << ")" <<std::flush;

         }

      }

      fout << ") rval+=" << std::setprecision(10) << (*rules)[ir]->GetCoefficient() << ";" << std::flush;

      fout << "   // importance = " << TString::Format("%3.3f",impr) << std::endl;

   }

   fout << std::setprecision(dp);

}


////////////////////////////////////////////////////////////////////////////////

/// print out the linear terms


void TMVA::MethodRuleFit::MakeClassLinear( std::ostream& fout ) const

{

   if (!fRuleFit.GetRuleEnsemble().DoLinear()) {

      fout << "   //" << std::endl;

      fout << "   // ==> MODEL CONTAINS NO LINEAR TERMS <==" << std::endl;

      fout << "   //" << std::endl;

      return;

   }

   fout << "   //" << std::endl;

   fout << "   // here follows all linear terms" << std::endl;

   fout << "   // at the end of each line, the relative importance of the term is given" << std::endl;

   fout << "   //" << std::endl;

   const RuleEnsemble *rens = &(fRuleFit.GetRuleEnsemble());

   UInt_t nlin = rens->GetNLinear();

   for (UInt_t il=0; il<nlin; il++) {

      if (rens->IsLinTermOK(il)) {

         Double_t norm = rens->GetLinNorm(il);

         Double_t imp  = rens->GetLinImportance(il)/rens->GetImportanceRef();

         fout << "   rval+="

            //           << std::setprecision(10) << rens->GetLinCoefficients(il)*norm << "*std::min(" << setprecision(10) << rens->GetLinDP(il)

            //           << ", std::max( inputValues[" << il << "]," << std::setprecision(10) << rens->GetLinDM(il) << "));"

              << std::setprecision(10) << rens->GetLinCoefficients(il)*norm

              << "*std::min( double(" << std::setprecision(10) << rens->GetLinDP(il)

              << "), std::max( double(inputValues[" << il << "]), double(" << std::setprecision(10) << rens->GetLinDM(il) << ")));"

              << std::flush;

         fout << "   // importance = " << TString::Format("%3.3f",imp) << std::endl;

      }

   }

}


////////////////////////////////////////////////////////////////////////////////

/// get help message text

///

/// typical length of text line:

///         "|--------------------------------------------------------------|"


void TMVA::MethodRuleFit::GetHelpMessage() const

{

   TString col    = gConfig().WriteOptionsReference() ? TString() : gTools().Color("bold");

   TString colres = gConfig().WriteOptionsReference() ? TString() : gTools().Color("reset");

   TString brk    = gConfig().WriteOptionsReference() ? "<br>" : "";


   Log() << Endl;

   Log() << col << "--- Short description:" << colres << Endl;

   Log() << Endl;

   Log() << "This method uses a collection of so called rules to create a" << Endl;

   Log() << "discriminating scoring function. Each rule consists of a series" << Endl;

   Log() << "of cuts in parameter space. The ensemble of rules are created" << Endl;

   Log() << "from a forest of decision trees, trained using the training data." << Endl;

   Log() << "Each node (apart from the root) corresponds to one rule." << Endl;

   Log() << "The scoring function is then obtained by linearly combining" << Endl;

   Log() << "the rules. A fitting procedure is applied to find the optimum" << Endl;

   Log() << "set of coefficients. The goal is to find a model with few rules" << Endl;

   Log() << "but with a strong discriminating power." << Endl;

   Log() << Endl;

   Log() << col << "--- Performance optimisation:" << colres << Endl;

   Log() << Endl;

   Log() << "There are two important considerations to make when optimising:" << Endl;

   Log() << Endl;

   Log() << "  1. Topology of the decision tree forest" << brk << Endl;

   Log() << "  2. Fitting of the coefficients" << Endl;

   Log() << Endl;

   Log() << "The maximum complexity of the rules is defined by the size of" << Endl;

   Log() << "the trees. Large trees will yield many complex rules and capture" << Endl;

   Log() << "higher order correlations. On the other hand, small trees will" << Endl;

   Log() << "lead to a smaller ensemble with simple rules, only capable of" << Endl;

   Log() << "modeling simple structures." << Endl;

   Log() << "Several parameters exists for controlling the complexity of the" << Endl;

   Log() << "rule ensemble." << Endl;

   Log() << Endl;

   Log() << "The fitting procedure searches for a minimum using a gradient" << Endl;

   Log() << "directed path. Apart from step size and number of steps, the" << Endl;

   Log() << "evolution of the path is defined by a cut-off parameter, tau." << Endl;

   Log() << "This parameter is unknown and depends on the training data." << Endl;

   Log() << "A large value will tend to give large weights to a few rules." << Endl;

   Log() << "Similarly, a small value will lead to a large set of rules" << Endl;

   Log() << "with similar weights." << Endl;

   Log() << Endl;

   Log() << "A final point is the model used; rules and/or linear terms." << Endl;

   Log() << "For a given training sample, the result may improve by adding" << Endl;

   Log() << "linear terms. If best performance is obtained using only linear" << Endl;

   Log() << "terms, it is very likely that the Fisher discriminant would be" << Endl;

   Log() << "a better choice. Ideally the fitting procedure should be able to" << Endl;

   Log() << "make this choice by giving appropriate weights for either terms." << Endl;

   Log() << Endl;

   Log() << col << "--- Performance tuning via configuration options:" << colres << Endl;

   Log() << Endl;

   Log() << "I.  TUNING OF RULE ENSEMBLE:" << Endl;

   Log() << Endl;

   Log() << "   " << col << "ForestType  " << colres

         << ": Recommended is to use the default \"AdaBoost\"." << brk << Endl;

   Log() << "   " << col << "nTrees      " << colres

         << ": More trees leads to more rules but also slow" << Endl;

   Log() << "                 performance. With too few trees the risk is" << Endl;

   Log() << "                 that the rule ensemble becomes too simple." << brk << Endl;

   Log() << "   " << col << "fEventsMin  " << colres << brk << Endl;

   Log() << "   " << col << "fEventsMax  " << colres

         << ": With a lower min, more large trees will be generated" << Endl;

   Log() << "                 leading to more complex rules." << Endl;

   Log() << "                 With a higher max, more small trees will be" << Endl;

   Log() << "                 generated leading to more simple rules." << Endl;

   Log() << "                 By changing this range, the average complexity" << Endl;

   Log() << "                 of the rule ensemble can be controlled." << brk << Endl;

   Log() << "   " << col << "RuleMinDist " << colres

         << ": By increasing the minimum distance between" << Endl;

   Log() << "                 rules, fewer and more diverse rules will remain." << Endl;

   Log() << "                 Initially it is a good idea to keep this small" << Endl;

   Log() << "                 or zero and let the fitting do the selection of" << Endl;

   Log() << "                 rules. In order to reduce the ensemble size," << Endl;

   Log() << "                 the value can then be increased." << Endl;

   Log() << Endl;

   //         "|--------------------------------------------------------------|"

   Log() << "II. TUNING OF THE FITTING:" << Endl;

   Log() << Endl;

   Log() << "   " << col << "GDPathEveFrac " << colres

         << ": fraction of events in path evaluation" << Endl;

   Log() << "                 Increasing this fraction will improve the path" << Endl;

   Log() << "                 finding. However, a too high value will give few" << Endl;

   Log() << "                 unique events available for error estimation." << Endl;

   Log() << "                 It is recommended to use the default = 0.5." << brk << Endl;

   Log() << "   " << col << "GDTau         " << colres

         << ": cutoff parameter tau" << Endl;

   Log() << "                 By default this value is set to -1.0." << Endl;

   //         "|----------------|---------------------------------------------|"

   Log() << "                 This means that the cut off parameter is" << Endl;

   Log() << "                 automatically estimated. In most cases" << Endl;

   Log() << "                 this should be fine. However, you may want" << Endl;

   Log() << "                 to fix this value if you already know it" << Endl;

   Log() << "                 and want to reduce on training time." << brk << Endl;

   Log() << "   " << col << "GDTauPrec     " << colres

         << ": precision of estimated tau" << Endl;

   Log() << "                 Increase this precision to find a more" << Endl;

   Log() << "                 optimum cut-off parameter." << brk << Endl;

   Log() << "   " << col << "GDNStep       " << colres

         << ": number of steps in path search" << Endl;

   Log() << "                 If the number of steps is too small, then" << Endl;

   Log() << "                 the program will give a warning message." << Endl;

   Log() << Endl;

   Log() << "III. WARNING MESSAGES" << Endl;

   Log() << Endl;

   Log() << col << "Risk(i+1)>=Risk(i) in path" << colres << brk << Endl;

   Log() << col << "Chaotic behaviour of risk evolution." << colres << Endl;

   //         "|----------------|---------------------------------------------|"

   Log() << "                 The error rate was still decreasing at the end" << Endl;

   Log() << "                 By construction the Risk should always decrease." << Endl;

   Log() << "                 However, if the training sample is too small or" << Endl;

   Log() << "                 the model is overtrained, such warnings can" << Endl;

   Log() << "                 occur." << Endl;

   Log() << "                 The warnings can safely be ignored if only a" << Endl;

   Log() << "                 few (<3) occur. If more warnings are generated," << Endl;

   Log() << "                 the fitting fails." << Endl;

   Log() << "                 A remedy may be to increase the value" << brk << Endl;

   Log() << "                 "

         << col << "GDValidEveFrac" << colres

         << " to 1.0 (or a larger value)." << brk << Endl;

   Log() << "                 In addition, if "

         << col << "GDPathEveFrac" << colres

         << " is too high" << Endl;

   Log() << "                 the same warnings may occur since the events" << Endl;

   Log() << "                 used for error estimation are also used for" << Endl;

   Log() << "                 path estimation." << Endl;

   Log() << "                 Another possibility is to modify the model - " << Endl;

   Log() << "                 See above on tuning the rule ensemble." << Endl;

   Log() << Endl;

   Log() << col << "The error rate was still decreasing at the end of the path"

         << colres << Endl;

   Log() << "                 Too few steps in path! Increase "

         << col << "GDNSteps" <<  colres << "." << Endl;

   Log() << Endl;

   Log() << col << "Reached minimum early in the search" << colres << Endl;


   Log() << "                 Minimum was found early in the fitting. This" << Endl;

   Log() << "                 may indicate that the used step size "

         << col << "GDStep" <<  colres << "." << Endl;

   Log() << "                 was too large. Reduce it and rerun." << Endl;

   Log() << "                 If the results still are not OK, modify the" << Endl;

   Log() << "                 model either by modifying the rule ensemble" << Endl;

   Log() << "                 or add/remove linear terms" << Endl;

}


ClassifierFactory.h

REGISTER_METHOD
#define REGISTER_METHOD(CLASS)
for example
Definition ClassifierFactory.h:124

Configurable.h

CrossEntropy.h

DataSet.h

DecisionTree.h

GiniIndex.h

IMethod.h

MethodBase.h

MethodRuleFit.h

MisClassificationError.h

MsgLogger.h

e
#define e(i)
Definition RSha256.hxx:103

Ranking.h

Int_t
int Int_t
Signed integer 4 bytes (int).
Definition RtypesCore.h:59

UInt_t
unsigned int UInt_t
Unsigned integer 4 bytes (unsigned int).
Definition RtypesCore.h:60

Bool_t
bool Bool_t
Boolean (0=false, 1=true) (bool).
Definition RtypesCore.h:77

kFALSE
constexpr Bool_t kFALSE
Definition RtypesCore.h:108

Double_t
double Double_t
Double 8 bytes.
Definition RtypesCore.h:73

Long64_t
long long Long64_t
Portable signed long integer 8 bytes.
Definition RtypesCore.h:83

kTRUE
constexpr Bool_t kTRUE
Definition RtypesCore.h:107

RuleFitAPI.h

SdivSqrtSplusB.h

SeparationBase.h

TMatrix.h

err
Double_t err
Definition TMultiDimFit.cxx:1939

TRandom3.h

Timer.h

Tools.h

TMVA::Config::WriteOptionsReference
Bool_t WriteOptionsReference() const
Definition Config.h:65

TMVA::Configurable::DeclareOptionRef
OptionBase * DeclareOptionRef(T &ref, const TString &name, const TString &desc="")

TMVA::Configurable::AddPreDefVal
void AddPreDefVal(const T &)
Definition Configurable.h:168

TMVA::Configurable::Log
MsgLogger & Log() const
Definition Configurable.h:122

TMVA::CrossEntropy
Implementation of the CrossEntropy as separation criterion.
Definition CrossEntropy.h:43

TMVA::DataSetInfo
Class that contains all the data information.
Definition DataSetInfo.h:62

TMVA::DecisionTreeNode::SetIsTraining
static void SetIsTraining(bool on)
Definition DecisionTreeNode.cxx:548

TMVA::DecisionTree
Implementation of a Decision Tree.
Definition DecisionTree.h:65

TMVA::DecisionTree::kExpectedErrorPruning
@ kExpectedErrorPruning
Definition DecisionTree.h:139

TMVA::DecisionTree::kNoPruning
@ kNoPruning
Definition DecisionTree.h:139

TMVA::DecisionTree::kCostComplexityPruning
@ kCostComplexityPruning
Definition DecisionTree.h:139

TMVA::Event
Definition Event.h:51

TMVA::GiniIndex
Implementation of the GiniIndex as separation criterion.
Definition GiniIndex.h:63

TMVA::MethodBase::MethodBase
MethodBase(const TString &jobName, Types::EMVA methodType, const TString &methodTitle, DataSetInfo &dsi, const TString &theOption="")
standard constructor
Definition MethodBase.cxx:236

TMVA::MethodBase::HasTrainingTree
Bool_t HasTrainingTree() const
Definition MethodBase.h:516

TMVA::MethodBase::GetName
const char * GetName() const override
Definition MethodBase.h:337

TMVA::MethodBase::GetMethodTypeName
TString GetMethodTypeName() const
Definition MethodBase.h:335

TMVA::MethodBase::IgnoreEventsWithNegWeightsInTraining
Bool_t IgnoreEventsWithNegWeightsInTraining() const
Definition MethodBase.h:689

TMVA::MethodBase::BaseDir
TDirectory * BaseDir() const
returns the ROOT directory where info/histograms etc of the corresponding MVA method instance are sto...
Definition MethodBase.cxx:2051

TMVA::MethodBase::ExitFromTraining
void ExitFromTraining()
Definition MethodBase.h:467

TMVA::MethodBase::GetNEvents
UInt_t GetNEvents() const
Definition MethodBase.h:419

TMVA::MethodBase::GetEvent
const Event * GetEvent() const
Definition MethodBase.h:754

TMVA::MethodBase::GetNvar
UInt_t GetNvar() const
Definition MethodBase.h:347

TMVA::MethodBase::IsSilentFile
Bool_t IsSilentFile() const
Definition MethodBase.h:382

TMVA::MethodBase::SetSignalReferenceCut
void SetSignalReferenceCut(Double_t cut)
Definition MethodBase.h:367

TMVA::MethodBase::NoErrorCalc
void NoErrorCalc(Double_t *const err, Double_t *const errUpper)
Definition MethodBase.cxx:900

TMVA::MethodBase::GetInputLabel
const TString & GetInputLabel(Int_t i) const
Definition MethodBase.h:353

TMVA::MethodBase::fRanking
Ranking * fRanking
Definition MethodBase.h:590

TMVA::MethodBase::Data
DataSet * Data() const
Definition MethodBase.h:412

TMVA::MethodBase::IsNormalised
Bool_t IsNormalised() const
Definition MethodBase.h:499

TMVA::MethodRuleFit::fRuleFit
RuleFit fRuleFit
RuleFit instance.
Definition MethodRuleFit.h:155

TMVA::MethodRuleFit::fGDTauScan
UInt_t fGDTauScan
GD path: number of points to scan.
Definition MethodRuleFit.h:200

TMVA::MethodRuleFit::fNTPss
Double_t fNTPss
ntuple: rule P(tag s, true s)
Definition MethodRuleFit.h:167

TMVA::MethodRuleFit::fForestTypeS
TString fForestTypeS
forest generation: how the trees are generated
Definition MethodRuleFit.h:191

TMVA::MethodRuleFit::fMinimp
Double_t fMinimp
rule/linear: minimum importance
Definition MethodRuleFit.h:204

TMVA::MethodRuleFit::fRuleFitModuleS
TString fRuleFitModuleS
which rulefit module to use
Definition MethodRuleFit.h:175

TMVA::MethodRuleFit::fLinQuantile
Double_t fLinQuantile
quantile cut to remove outliers - see RuleEnsemble
Definition MethodRuleFit.h:208

TMVA::MethodRuleFit::GetHelpMessage
void GetHelpMessage() const override
get help message text
Definition MethodRuleFit.cxx:750

TMVA::MethodRuleFit::ReadWeightsFromXML
void ReadWeightsFromXML(void *wghtnode) override
read rules from XML node
Definition MethodRuleFit.cxx:608

TMVA::MethodRuleFit::fMinFracNEve
Double_t fMinFracNEve
min fraction of number events
Definition MethodRuleFit.h:184

TMVA::MethodRuleFit::fNTType
Int_t fNTType
ntuple: rule type (+1->signal, -1->bkg)
Definition MethodRuleFit.h:172

TMVA::MethodRuleFit::fUseRuleFitJF
Bool_t fUseRuleFitJF
if true interface with J.Friedmans RuleFit module
Definition MethodRuleFit.h:176

TMVA::MethodRuleFit::fGDTauMax
Double_t fGDTauMax
GD path: max threshold fraction [0..1].
Definition MethodRuleFit.h:199

TMVA::MethodRuleFit::DeclareOptions
void DeclareOptions() override
define the options (their key words) that can be set in the option string know options.
Definition MethodRuleFit.cxx:227

TMVA::MethodRuleFit::fGDPathEveFrac
Double_t fGDPathEveFrac
GD path: fraction of subsamples used for the fitting.
Definition MethodRuleFit.h:194

TMVA::MethodRuleFit::fUseBoost
Bool_t fUseBoost
use boosted events for forest generation
Definition MethodRuleFit.h:192

TMVA::MethodRuleFit::fPruneMethod
TMVA::DecisionTree::EPruneMethod fPruneMethod
forest generation: method used for pruning - see DecisionTree
Definition MethodRuleFit.h:189

TMVA::MethodRuleFit::fForest
std::vector< DecisionTree * > fForest
the forest
Definition MethodRuleFit.h:180

TMVA::MethodRuleFit::fMaxFracNEve
Double_t fMaxFracNEve
ditto max
Definition MethodRuleFit.h:185

TMVA::MethodRuleFit::GetMvaValue
Double_t GetMvaValue(Double_t *err=nullptr, Double_t *errUpper=nullptr) override
returns MVA value for given event
Definition MethodRuleFit.cxx:616

TMVA::MethodRuleFit::fRFWorkDir
TString fRFWorkDir
working directory from Friedmans module
Definition MethodRuleFit.h:177

TMVA::MethodRuleFit::fTreeEveFrac
Double_t fTreeEveFrac
fraction of events used for training each tree
Definition MethodRuleFit.h:182

TMVA::MethodRuleFit::MakeClassLinear
void MakeClassLinear(std::ostream &) const
print out the linear terms
Definition MethodRuleFit.cxx:714

TMVA::MethodRuleFit::fNTPsb
Double_t fNTPsb
ntuple: rule P(tag s, true b)
Definition MethodRuleFit.h:168

TMVA::MethodRuleFit::fNTNvars
Int_t fNTNvars
ntuple: rule number of vars
Definition MethodRuleFit.h:165

TMVA::MethodRuleFit::fGDNPathSteps
Int_t fGDNPathSteps
GD path: number of steps.
Definition MethodRuleFit.h:202

TMVA::MethodRuleFit::fEventSample
std::vector< TMVA::Event * > fEventSample
the complete training sample
Definition MethodRuleFit.h:156

TMVA::MethodRuleFit::fNTPbb
Double_t fNTPbb
ntuple: rule P(tag b, true b)
Definition MethodRuleFit.h:170

TMVA::MethodRuleFit::fNTSSB
Double_t fNTSSB
ntuple: rule S/(S+B)
Definition MethodRuleFit.h:171

TMVA::MethodRuleFit::TrainJFRuleFit
void TrainJFRuleFit()
training of rules using Jerome Friedmans implementation
Definition MethodRuleFit.cxx:534

TMVA::MethodRuleFit::fNTNcuts
Int_t fNTNcuts
ntuple: rule number of cuts
Definition MethodRuleFit.h:164

TMVA::MethodRuleFit::fModelTypeS
TString fModelTypeS
rule ensemble: which model (rule,linear or both)
Definition MethodRuleFit.h:206

TMVA::MethodRuleFit::fNTImportance
Double_t fNTImportance
ntuple: rule importance
Definition MethodRuleFit.h:161

TMVA::MethodRuleFit::fGDValidEveFrac
Double_t fGDValidEveFrac
GD path: fraction of subsamples used for the fitting.
Definition MethodRuleFit.h:195

TMVA::MethodRuleFit::InitEventSample
void InitEventSample(void)
write all Events from the Tree into a vector of Events, that are more easily manipulated.
Definition MethodRuleFit.cxx:420

TMVA::MethodRuleFit::MakeClassRuleCuts
void MakeClassRuleCuts(std::ostream &) const
print out the rule cuts
Definition MethodRuleFit.cxx:656

TMVA::MethodRuleFit::fNTCoefficient
Double_t fNTCoefficient
ntuple: rule coefficient
Definition MethodRuleFit.h:162

TMVA::MethodRuleFit::Init
void Init(void) override
default initialization
Definition MethodRuleFit.cxx:395

TMVA::MethodRuleFit::fSepTypeS
TString fSepTypeS
forest generation: separation type - see DecisionTree
Definition MethodRuleFit.h:187

TMVA::MethodRuleFit::InitMonitorNtuple
void InitMonitorNtuple()
initialize the monitoring ntuple
Definition MethodRuleFit.cxx:374

TMVA::MethodRuleFit::fNTrees
Int_t fNTrees
number of trees in forest
Definition MethodRuleFit.h:181

TMVA::MethodRuleFit::fNTPtag
Double_t fNTPtag
ntuple: rule P(tag)
Definition MethodRuleFit.h:166

TMVA::MethodRuleFit::~MethodRuleFit
virtual ~MethodRuleFit(void)
destructor
Definition MethodRuleFit.cxx:167

TMVA::MethodRuleFit::fRFNendnodes
Int_t fRFNendnodes
max number of rules (only Friedmans module)
Definition MethodRuleFit.h:179

TMVA::MethodRuleFit::fNTPbs
Double_t fNTPbs
ntuple: rule P(tag b, true s)
Definition MethodRuleFit.h:169

TMVA::MethodRuleFit::ReadWeightsFromStream
void ReadWeightsFromStream(std::istream &istr) override
read rules from an std::istream
Definition MethodRuleFit.cxx:600

TMVA::MethodRuleFit::fMonitorNtuple
TTree * fMonitorNtuple
pointer to monitor rule ntuple
Definition MethodRuleFit.h:160

TMVA::MethodRuleFit::ProcessOptions
void ProcessOptions() override
process the options specified by the user
Definition MethodRuleFit.cxx:265

TMVA::MethodRuleFit::AddWeightsXMLTo
void AddWeightsXMLTo(void *parent) const override
add the rules to XML node
Definition MethodRuleFit.cxx:592

TMVA::MethodRuleFit::MethodRuleFit
MethodRuleFit(const TString &jobName, const TString &methodTitle, DataSetInfo &theData, const TString &theOption="")
standard constructor
Definition MethodRuleFit.cxx:69

TMVA::MethodRuleFit::fNTSupport
Double_t fNTSupport
ntuple: rule support
Definition MethodRuleFit.h:163

TMVA::MethodRuleFit::HasAnalysisType
Bool_t HasAnalysisType(Types::EAnalysisType type, UInt_t numberClasses, UInt_t) override
RuleFit can handle classification with 2 classes.
Definition MethodRuleFit.cxx:176

TMVA::MethodRuleFit::fGDTauMin
Double_t fGDTauMin
GD path: min threshold fraction [0..1].
Definition MethodRuleFit.h:198

TMVA::MethodRuleFit::fGDTau
Double_t fGDTau
GD path: def threshold fraction [0..1].
Definition MethodRuleFit.h:196

TMVA::MethodRuleFit::fSepType
SeparationBase * fSepType
the separation used in node splitting
Definition MethodRuleFit.h:183

TMVA::MethodRuleFit::CreateRanking
const Ranking * CreateRanking() override
computes ranking of input variables
Definition MethodRuleFit.cxx:577

TMVA::MethodRuleFit::fGDPathStep
Double_t fGDPathStep
GD path: step size in path.
Definition MethodRuleFit.h:201

TMVA::MethodRuleFit::fGDTauPrec
Double_t fGDTauPrec
GD path: precision of estimated tau.
Definition MethodRuleFit.h:197

TMVA::MethodRuleFit::MakeClassSpecific
void MakeClassSpecific(std::ostream &, const TString &) const override
write specific classifier response
Definition MethodRuleFit.cxx:637

TMVA::MethodRuleFit::fPruneStrength
Double_t fPruneStrength
forest generation: prune strength - see DecisionTree
Definition MethodRuleFit.h:190

TMVA::MethodRuleFit::Train
void Train(void) override
Definition MethodRuleFit.cxx:442

TMVA::MethodRuleFit::fNCuts
Int_t fNCuts
grid used in cut applied in node splitting
Definition MethodRuleFit.h:186

TMVA::MethodRuleFit::WriteMonitoringHistosToFile
void WriteMonitoringHistosToFile(void) const override
write special monitoring histograms to file (here ntuple)
Definition MethodRuleFit.cxx:627

TMVA::MethodRuleFit::fGDErrScale
Double_t fGDErrScale
GD path: stop.
Definition MethodRuleFit.h:203

TMVA::MethodRuleFit::fPruneMethodS
TString fPruneMethodS
forest generation: prune method - see DecisionTree
Definition MethodRuleFit.h:188

TMVA::MethodRuleFit::fRuleMinDist
Double_t fRuleMinDist
rule min distance - see RuleEnsemble
Definition MethodRuleFit.h:207

TMVA::MethodRuleFit::fRFNrules
Int_t fRFNrules
max number of rules (only Friedmans module)
Definition MethodRuleFit.h:178

TMVA::MethodRuleFit::VerifyRange
Bool_t VerifyRange(MsgLogger &mlog, const char *varstr, T &var, const T &vmin, const T &vmax)
Definition MethodRuleFit.h:228

TMVA::MethodRuleFit::TrainTMVARuleFit
void TrainTMVARuleFit()
training of rules using TMVA implementation
Definition MethodRuleFit.cxx:466

TMVA::MethodRuleFit::fSignalFraction
Double_t fSignalFraction
scalefactor for bkg events to modify initial s/b fraction in training data
Definition MethodRuleFit.h:157

TMVA::MisClassificationError
Implementation of the MisClassificationError as separation criterion.
Definition MisClassificationError.h:46

TMVA::Rank
Definition Ranking.h:76

TMVA::Ranking
Ranking for variables in method (implementation).
Definition Ranking.h:48

TMVA::RuleCut
A class describing a 'rule cut'.
Definition RuleCut.h:36

TMVA::RuleCut::GetNvars
UInt_t GetNvars() const
Definition RuleCut.h:72

TMVA::RuleCut::GetCutMin
Double_t GetCutMin(Int_t is) const
Definition RuleCut.h:74

TMVA::RuleCut::GetSelector
UInt_t GetSelector(Int_t is) const
Definition RuleCut.h:73

TMVA::RuleCut::GetCutDoMin
Char_t GetCutDoMin(Int_t is) const
Definition RuleCut.h:76

TMVA::RuleCut::GetCutDoMax
Char_t GetCutDoMax(Int_t is) const
Definition RuleCut.h:77

TMVA::RuleCut::GetNcuts
UInt_t GetNcuts() const
get number of cuts
Definition RuleCut.cxx:164

TMVA::RuleCut::GetCutMax
Double_t GetCutMax(Int_t is) const
Definition RuleCut.h:75

TMVA::RuleEnsemble
Definition RuleEnsemble.h:53

TMVA::RuleEnsemble::GetLinDP
Double_t GetLinDP(int i) const
Definition RuleEnsemble.h:280

TMVA::RuleEnsemble::GetLinDM
Double_t GetLinDM(int i) const
Definition RuleEnsemble.h:279

TMVA::RuleEnsemble::GetLinCoefficients
const std::vector< Double_t > & GetLinCoefficients() const
Definition RuleEnsemble.h:265

TMVA::RuleEnsemble::GetImportanceRef
Double_t GetImportanceRef() const
Definition RuleEnsemble.h:260

TMVA::RuleEnsemble::GetLinNorm
const std::vector< Double_t > & GetLinNorm() const
Definition RuleEnsemble.h:266

TMVA::RuleEnsemble::GetNLinear
UInt_t GetNLinear() const
Definition RuleEnsemble.h:269

TMVA::RuleEnsemble::GetRulesConst
const std::vector< TMVA::Rule * > & GetRulesConst() const
Definition RuleEnsemble.h:263

TMVA::RuleEnsemble::GetLinImportance
const std::vector< Double_t > & GetLinImportance() const
Definition RuleEnsemble.h:267

TMVA::RuleEnsemble::IsLinTermOK
Bool_t IsLinTermOK(int i) const
Definition RuleEnsemble.h:289

TMVA::RuleFitAPI
J Friedman's RuleFit method.
Definition RuleFitAPI.h:51

TMVA::RuleFitAPI::ReadModelSum
Bool_t ReadModelSum()
read model from rulefit.sum
Definition RuleFitAPI.cxx:543

TMVA::RuleFitAPI::WelcomeMessage
void WelcomeMessage()
welcome message
Definition RuleFitAPI.cxx:75

TMVA::RuleFitAPI::TrainRuleFit
void TrainRuleFit()
Definition RuleFitAPI.h:202

TMVA::Rule
Implementation of a rule.
Definition Rule.h:50

TMVA::Rule::GetSupport
Double_t GetSupport() const
Definition Rule.h:142

TMVA::Rule::GetRuleCut
const RuleCut * GetRuleCut() const
Definition Rule.h:139

TMVA::Rule::IsSignalRule
Bool_t IsSignalRule() const
Definition Rule.h:119

TMVA::Rule::GetCoefficient
Double_t GetCoefficient() const
Definition Rule.h:141

TMVA::Rule::GetSSB
Double_t GetSSB() const
Definition Rule.h:117

TMVA::Rule::GetRelImportance
Double_t GetRelImportance() const
Definition Rule.h:102

TMVA::SdivSqrtSplusB
Implementation of the SdivSqrtSplusB as separation criterion.
Definition SdivSqrtSplusB.h:44

TMVA::Timer
Timing information for training and evaluation of MVA methods.
Definition Timer.h:58

TMVA::Tools::Color
const TString & Color(const TString &)
human readable color strings
Definition Tools.cxx:803

TMVA::Types
Singleton class for Global types used by TMVA.
Definition Types.h:71

TMVA::Types::EAnalysisType
EAnalysisType
Definition Types.h:126

TMVA::Types::kClassification
@ kClassification
Definition Types.h:127

TMVA::Types::kTraining
@ kTraining
Definition Types.h:143

TString
Basic string class.
Definition TString.h:138

TString::Format
static TString Format(const char *fmt,...)
Static method which formats a string using a printf style format descriptor and return a TString.
Definition TString.cxx:2385

TTree
A TTree represents a columnar dataset.
Definition TTree.h:89

n
const Int_t n
Definition legend1.C:16

TMVA
create variable transformations
Definition GeneticMinimizer.h:22

TMVA::gConfig
Config & gConfig()

TMVA::gTools
Tools & gTools()

TMVA::Endl
MsgLogger & Endl(MsgLogger &ml)
Definition MsgLogger.h:148

Config.h

Types.h