38 #include "TDirectory.h"
63 TMVA::MethodRuleFit::MethodRuleFit( const
TString& jobName,
68 MethodBase( jobName, Types::kRuleFit, methodTitle, theData, theOption, theTargetDir )
91 , fPruneMethod(
TMVA::DecisionTree::kCostComplexityPruning)
163 for (
UInt_t i=0; i<fEventSample.size(); i++)
delete fEventSample[i];
164 for (
UInt_t i=0; i<fForest.size(); i++)
delete fForest[i];
223 DeclareOptionRef(fGDTau=-1,
"GDTau",
"Gradient-directed (GD) path: default fit cut-off");
224 DeclareOptionRef(fGDTauPrec=0.01,
"GDTauPrec",
"GD path: precision of tau");
225 DeclareOptionRef(fGDPathStep=0.01,
"GDStep",
"GD path: step size");
226 DeclareOptionRef(fGDNPathSteps=10000,
"GDNSteps",
"GD path: number of steps");
227 DeclareOptionRef(fGDErrScale=1.1,
"GDErrScale",
"Stop scan when error > scale*errmin");
228 DeclareOptionRef(fLinQuantile,
"LinQuantile",
"Quantile of linear terms (removes outliers)");
229 DeclareOptionRef(fGDPathEveFrac=0.5,
"GDPathEveFrac",
"Fraction of events used for the path search");
230 DeclareOptionRef(fGDValidEveFrac=0.5,
"GDValidEveFrac",
"Fraction of events used for the validation");
232 DeclareOptionRef(fMinFracNEve=0.1,
"fEventsMin",
"Minimum fraction of events in a splittable node");
233 DeclareOptionRef(fMaxFracNEve=0.9,
"fEventsMax",
"Maximum fraction of events in a splittable node");
234 DeclareOptionRef(fNTrees=20,
"nTrees",
"Number of trees in forest.");
236 DeclareOptionRef(fForestTypeS=
"AdaBoost",
"ForestType",
"Method to use for forest generation (AdaBoost or RandomForest)");
237 AddPreDefVal(
TString(
"AdaBoost"));
238 AddPreDefVal(
TString(
"Random"));
240 DeclareOptionRef(fRuleMinDist=0.001,
"RuleMinDist",
"Minimum distance between rules");
241 DeclareOptionRef(fMinimp=0.01,
"MinImp",
"Minimum rule importance accepted");
243 DeclareOptionRef(fModelTypeS=
"ModRuleLinear",
"Model",
"Model to be used");
244 AddPreDefVal(
TString(
"ModRule"));
245 AddPreDefVal(
TString(
"ModRuleLinear"));
246 AddPreDefVal(
TString(
"ModLinear"));
247 DeclareOptionRef(fRuleFitModuleS=
"RFTMVA",
"RuleFitModule",
"Which RuleFit module to use");
248 AddPreDefVal(
TString(
"RFTMVA"));
249 AddPreDefVal(
TString(
"RFFriedman"));
251 DeclareOptionRef(fRFWorkDir=
"./rulefit",
"RFWorkDir",
"Friedman\'s RuleFit module (RFF): working dir");
252 DeclareOptionRef(fRFNrules=2000,
"RFNrules",
"RFF: Mximum number of rules");
253 DeclareOptionRef(fRFNendnodes=4,
"RFNendnodes",
"RFF: Average number of end nodes");
261 if (IgnoreEventsWithNegWeightsInTraining()) {
262 Log() <<
kFATAL <<
"Mechanism to ignore events with negative weights in training not yet available for method: "
263 << GetMethodTypeName()
264 <<
" --> please remove \"IgnoreNegWeightsInTraining\" option from booking string."
268 fRuleFitModuleS.ToLower();
269 if (fRuleFitModuleS ==
"rftmva") fUseRuleFitJF =
kFALSE;
270 else if (fRuleFitModuleS ==
"rffriedman") fUseRuleFitJF =
kTRUE;
271 else fUseRuleFitJF =
kTRUE;
275 else if (fSepTypeS ==
"giniindex") fSepType =
new GiniIndex();
276 else if (fSepTypeS ==
"crossentropy") fSepType =
new CrossEntropy();
279 fModelTypeS.ToLower();
280 if (fModelTypeS ==
"modlinear" ) fRuleFit.SetModelLinear();
281 else if (fModelTypeS ==
"modrule" ) fRuleFit.SetModelRules();
282 else fRuleFit.SetModelFull();
284 fPruneMethodS.ToLower();
289 fForestTypeS.ToLower();
290 if (fForestTypeS ==
"random" ) fUseBoost =
kFALSE;
291 else if (fForestTypeS ==
"adaboost" ) fUseBoost =
kTRUE;
292 else fUseBoost =
kTRUE;
297 if (fUseBoost && (!fUseRuleFitJF)) fTreeEveFrac = 1.0;
301 if (fTreeEveFrac<=0) {
302 Int_t nevents =
Data()->GetNTrainingEvents();
304 fTreeEveFrac =
min( 0.5, (100.0 +6.0*
sqrt(n))/n);
307 VerifyRange(
Log(),
"nTrees", fNTrees,0,100000,20);
308 VerifyRange(
Log(),
"MinImp", fMinimp,0.0,1.0,0.0);
309 VerifyRange(
Log(),
"GDTauPrec", fGDTauPrec,1e-5,5e-1);
310 VerifyRange(
Log(),
"GDTauMin", fGDTauMin,0.0,1.0);
311 VerifyRange(
Log(),
"GDTauMax", fGDTauMax,fGDTauMin,1.0);
312 VerifyRange(
Log(),
"GDPathStep", fGDPathStep,0.0,100.0,0.01);
313 VerifyRange(
Log(),
"GDErrScale", fGDErrScale,1.0,100.0,1.1);
314 VerifyRange(
Log(),
"GDPathEveFrac", fGDPathEveFrac,0.01,0.9,0.5);
315 VerifyRange(
Log(),
"GDValidEveFrac",fGDValidEveFrac,0.01,1.0-fGDPathEveFrac,1.0-fGDPathEveFrac);
316 VerifyRange(
Log(),
"fEventsMin", fMinFracNEve,0.0,1.0);
317 VerifyRange(
Log(),
"fEventsMax", fMaxFracNEve,fMinFracNEve,1.0);
319 fRuleFit.GetRuleEnsemblePtr()->SetLinQuantile(fLinQuantile);
320 fRuleFit.GetRuleFitParamsPtr()->SetGDTauRange(fGDTauMin,fGDTauMax);
321 fRuleFit.GetRuleFitParamsPtr()->SetGDTau(fGDTau);
322 fRuleFit.GetRuleFitParamsPtr()->SetGDTauPrec(fGDTauPrec);
323 fRuleFit.GetRuleFitParamsPtr()->SetGDTauScan(fGDTauScan);
324 fRuleFit.GetRuleFitParamsPtr()->SetGDPathStep(fGDPathStep);
325 fRuleFit.GetRuleFitParamsPtr()->SetGDNPathSteps(fGDNPathSteps);
326 fRuleFit.GetRuleFitParamsPtr()->SetGDErrScale(fGDErrScale);
327 fRuleFit.SetImportanceCut(fMinimp);
328 fRuleFit.SetRuleMinDist(fRuleMinDist);
335 Log() <<
kINFO <<
"--------------------------------------" <<
Endl;
336 Log() <<
kINFO <<
"Friedmans RuleFit module is selected." <<
Endl;
337 Log() <<
kINFO <<
"Only the following options are used:" <<
Endl;
346 Log() <<
kINFO <<
"--------------------------------------" <<
Endl;
357 fRuleFit.UseImportanceVisHists();
359 fRuleFit.SetMsgType(
Log().GetMinType() );
361 if (HasTrainingTree()) InitEventSample();
371 fMonitorNtuple=
new TTree(
"MonitorNtuple_RuleFit",
"RuleFit variables");
372 fMonitorNtuple->Branch(
"importance",&fNTImportance,
"importance/D");
373 fMonitorNtuple->Branch(
"support",&fNTSupport,
"support/D");
374 fMonitorNtuple->Branch(
"coefficient",&fNTCoefficient,
"coefficient/D");
375 fMonitorNtuple->Branch(
"ncuts",&fNTNcuts,
"ncuts/I");
376 fMonitorNtuple->Branch(
"nvars",&fNTNvars,
"nvars/I");
377 fMonitorNtuple->Branch(
"type",&fNTType,
"type/I");
378 fMonitorNtuple->Branch(
"ptag",&fNTPtag,
"ptag/D");
379 fMonitorNtuple->Branch(
"pss",&fNTPss,
"pss/D");
380 fMonitorNtuple->Branch(
"psb",&fNTPsb,
"psb/D");
381 fMonitorNtuple->Branch(
"pbs",&fNTPbs,
"pbs/D");
382 fMonitorNtuple->Branch(
"pbb",&fNTPbb,
"pbb/D");
383 fMonitorNtuple->Branch(
"soversb",&fNTSSB,
"soversb/D");
392 SetSignalReferenceCut( 0.0 );
396 fLinQuantile = 0.025;
399 fSepTypeS =
"GiniIndex";
400 fPruneMethodS =
"NONE";
401 fPruneStrength = 3.5;
416 if (
Data()->GetNEvents()==0)
Log() <<
kFATAL <<
"<Init> Data().TrainingTree() is zero pointer" <<
Endl;
419 for (
Int_t ievt=0; ievt<nevents; ievt++){
420 const Event * ev = GetEvent(ievt);
421 fEventSample.push_back(
new Event(*ev));
423 if (fTreeEveFrac<=0) {
425 fTreeEveFrac =
min( 0.5, (100.0 +6.0*
sqrt(n))/n);
427 if (fTreeEveFrac>1.0) fTreeEveFrac=1.0;
429 std::random_shuffle(fEventSample.begin(), fEventSample.end());
431 Log() <<
kDEBUG <<
"Set sub-sample fraction to " << fTreeEveFrac <<
Endl;
444 this->InitEventSample();
452 fRuleFit.GetRuleEnsemblePtr()->ClearRuleMap();
461 if (IsNormalised())
Log() <<
kFATAL <<
"\"Normalise\" option cannot be used with RuleFit; "
462 <<
"please remove the optoin from the configuration string, or "
463 <<
"use \"!Normalise\""
479 fRuleFit.Initialize(
this );
486 fRuleFit.FitCoefficients();
489 Log() <<
kDEBUG <<
"Computing rule and variable importance" <<
Endl;
490 fRuleFit.CalcImportance();
493 fRuleFit.GetRuleEnsemblePtr()->
Print();
496 UInt_t nrules = fRuleFit.GetRuleEnsemble().GetRulesConst().size();
498 for (
UInt_t i=0; i<nrules; i++ ) {
506 fNTPtag = fRuleFit.GetRuleEnsemble().GetRulePTag(i);
507 fNTPss = fRuleFit.GetRuleEnsemble().GetRulePSS(i);
508 fNTPsb = fRuleFit.GetRuleEnsemble().GetRulePSB(i);
509 fNTPbs = fRuleFit.GetRuleEnsemble().GetRulePBS(i);
510 fNTPbb = fRuleFit.GetRuleEnsemble().GetRulePBB(i);
512 fMonitorNtuple->Fill();
516 fRuleFit.MakeVisHists();
518 fRuleFit.MakeDebugHists();
526 fRuleFit.InitPtrs(
this );
528 UInt_t nevents =
Data()->GetNTrainingEvents();
529 std::vector<const TMVA::Event*> tmp;
530 for (
Long64_t ievt=0; ievt<nevents; ievt++) {
531 const Event *
event = GetEvent(ievt);
532 tmp.push_back(event);
534 fRuleFit.SetTrainingEvents( tmp );
546 Log() <<
kDEBUG <<
"reading model summary from rf_go.exe output" <<
Endl;
551 Log() <<
kDEBUG <<
"calculating rule and variable importance" <<
Endl;
552 fRuleFit.CalcImportance();
555 fRuleFit.GetRuleEnsemblePtr()->
Print();
557 fRuleFit.MakeVisHists();
570 fRanking =
new Ranking( GetName(),
"Importance" );
572 for (
UInt_t ivar=0; ivar<GetNvar(); ivar++) {
573 fRanking->AddRank(
Rank( GetInputLabel(ivar), fRuleFit.GetRuleEnsemble().GetVarImportance(ivar) ) );
584 fRuleFit.GetRuleEnsemble().AddXMLTo( parent );
592 fRuleFit.GetRuleEnsemblePtr()->ReadRaw( istr );
600 fRuleFit.GetRuleEnsemblePtr()->ReadFromXML( wghtnode );
609 NoErrorCalc(err, errUpper);
611 return fRuleFit.EvalEvent( *GetEvent() );
620 Log() <<
kINFO <<
"Write monitoring ntuple to file: " << BaseDir()->GetPath() <<
Endl;
621 fMonitorNtuple->
Write();
629 Int_t dp = fout.precision();
630 fout <<
" // not implemented for class: \"" << className <<
"\"" << std::endl;
631 fout <<
"};" << std::endl;
632 fout <<
"void " << className <<
"::Initialize(){}" << std::endl;
633 fout <<
"void " << className <<
"::Clear(){}" << std::endl;
634 fout <<
"double " << className <<
"::GetMvaValue__( const std::vector<double>& inputValues ) const {" << std::endl;
635 fout <<
" double rval=" << std::setprecision(10) << fRuleFit.GetRuleEnsemble().GetOffset() <<
";" << std::endl;
636 MakeClassRuleCuts(fout);
637 MakeClassLinear(fout);
638 fout <<
" return rval;" << std::endl;
639 fout <<
"}" << std::endl;
640 fout << std::setprecision(dp);
648 Int_t dp = fout.precision();
649 if (!fRuleFit.GetRuleEnsemble().DoRules()) {
650 fout <<
" //" << std::endl;
651 fout <<
" // ==> MODEL CONTAINS NO RULES <==" << std::endl;
652 fout <<
" //" << std::endl;
655 const RuleEnsemble *rens = &(fRuleFit.GetRuleEnsemble());
656 const std::vector< Rule* > *rules = &(rens->
GetRulesConst());
659 std::list< std::pair<Double_t,Int_t> > sortedRules;
660 for (
UInt_t ir=0; ir<rules->size(); ir++) {
661 sortedRules.push_back( std::pair<Double_t,Int_t>( (*rules)[ir]->GetImportance()/rens->
GetImportanceRef(),ir ) );
665 fout <<
" //" << std::endl;
666 fout <<
" // here follows all rules ordered in importance (most important first)" << std::endl;
667 fout <<
" // at the end of each line, the relative importance of the rule is given" << std::endl;
668 fout <<
" //" << std::endl;
670 for ( std::list< std::pair<double,int> >::reverse_iterator itpair = sortedRules.rbegin();
671 itpair != sortedRules.rend(); itpair++ ) {
672 UInt_t ir = itpair->second;
674 ruleCut = (*rules)[ir]->GetRuleCut();
675 if (impr<rens->GetImportanceCut()) fout <<
" //" << std::endl;
676 fout <<
" if (" << std::flush;
684 if (ic>0) fout <<
"&&" << std::flush;
686 fout <<
"(" << std::setprecision(10) << valmin << std::flush;
687 fout <<
"<inputValues[" << sel <<
"])" << std::flush;
690 if (domin) fout <<
"&&" << std::flush;
691 fout <<
"(inputValues[" << sel <<
"]" << std::flush;
692 fout <<
"<" << std::setprecision(10) << valmax <<
")" <<std::flush;
695 fout <<
") rval+=" << std::setprecision(10) << (*rules)[ir]->GetCoefficient() <<
";" << std::flush;
696 fout <<
" // importance = " <<
Form(
"%3.3f",impr) << std::endl;
698 fout << std::setprecision(dp);
706 if (!fRuleFit.GetRuleEnsemble().DoLinear()) {
707 fout <<
" //" << std::endl;
708 fout <<
" // ==> MODEL CONTAINS NO LINEAR TERMS <==" << std::endl;
709 fout <<
" //" << std::endl;
712 fout <<
" //" << std::endl;
713 fout <<
" // here follows all linear terms" << std::endl;
714 fout <<
" // at the end of each line, the relative importance of the term is given" << std::endl;
715 fout <<
" //" << std::endl;
716 const RuleEnsemble *rens = &(fRuleFit.GetRuleEnsemble());
718 for (
UInt_t il=0; il<nlin; il++) {
726 <<
"*std::min( double(" << std::setprecision(10) << rens->
GetLinDP(il)
727 <<
"), std::max( double(inputValues[" << il <<
"]), double(" << std::setprecision(10) << rens->
GetLinDM(il) <<
")));"
729 fout <<
" // importance = " <<
Form(
"%3.3f",imp) << std::endl;
747 Log() << col <<
"--- Short description:" << colres <<
Endl;
749 Log() <<
"This method uses a collection of so called rules to create a" <<
Endl;
750 Log() <<
"discriminating scoring function. Each rule consists of a series" <<
Endl;
751 Log() <<
"of cuts in parameter space. The ensemble of rules are created" <<
Endl;
752 Log() <<
"from a forest of decision trees, trained using the training data." <<
Endl;
753 Log() <<
"Each node (apart from the root) corresponds to one rule." <<
Endl;
754 Log() <<
"The scoring function is then obtained by linearly combining" <<
Endl;
755 Log() <<
"the rules. A fitting procedure is applied to find the optimum" <<
Endl;
756 Log() <<
"set of coefficients. The goal is to find a model with few rules" <<
Endl;
757 Log() <<
"but with a strong discriminating power." <<
Endl;
759 Log() << col <<
"--- Performance optimisation:" << colres <<
Endl;
761 Log() <<
"There are two important considerations to make when optimising:" <<
Endl;
763 Log() <<
" 1. Topology of the decision tree forest" << brk <<
Endl;
764 Log() <<
" 2. Fitting of the coefficients" <<
Endl;
766 Log() <<
"The maximum complexity of the rules is defined by the size of" <<
Endl;
767 Log() <<
"the trees. Large trees will yield many complex rules and capture" <<
Endl;
768 Log() <<
"higher order correlations. On the other hand, small trees will" <<
Endl;
769 Log() <<
"lead to a smaller ensemble with simple rules, only capable of" <<
Endl;
770 Log() <<
"modeling simple structures." <<
Endl;
771 Log() <<
"Several parameters exists for controlling the complexity of the" <<
Endl;
772 Log() <<
"rule ensemble." <<
Endl;
774 Log() <<
"The fitting procedure searches for a minimum using a gradient" <<
Endl;
775 Log() <<
"directed path. Apart from step size and number of steps, the" <<
Endl;
776 Log() <<
"evolution of the path is defined by a cut-off parameter, tau." <<
Endl;
777 Log() <<
"This parameter is unknown and depends on the training data." <<
Endl;
778 Log() <<
"A large value will tend to give large weights to a few rules." <<
Endl;
779 Log() <<
"Similarily, a small value will lead to a large set of rules" <<
Endl;
780 Log() <<
"with similar weights." <<
Endl;
782 Log() <<
"A final point is the model used; rules and/or linear terms." <<
Endl;
783 Log() <<
"For a given training sample, the result may improve by adding" <<
Endl;
784 Log() <<
"linear terms. If best performance is optained using only linear" <<
Endl;
785 Log() <<
"terms, it is very likely that the Fisher discriminant would be" <<
Endl;
786 Log() <<
"a better choice. Ideally the fitting procedure should be able to" <<
Endl;
787 Log() <<
"make this choice by giving appropriate weights for either terms." <<
Endl;
789 Log() << col <<
"--- Performance tuning via configuration options:" << colres <<
Endl;
791 Log() <<
"I. TUNING OF RULE ENSEMBLE:" <<
Endl;
793 Log() <<
" " << col <<
"ForestType " << colres
794 <<
": Recomended is to use the default \"AdaBoost\"." << brk <<
Endl;
795 Log() <<
" " << col <<
"nTrees " << colres
796 <<
": More trees leads to more rules but also slow" <<
Endl;
797 Log() <<
" performance. With too few trees the risk is" <<
Endl;
798 Log() <<
" that the rule ensemble becomes too simple." << brk <<
Endl;
799 Log() <<
" " << col <<
"fEventsMin " << colres << brk <<
Endl;
800 Log() <<
" " << col <<
"fEventsMax " << colres
801 <<
": With a lower min, more large trees will be generated" <<
Endl;
802 Log() <<
" leading to more complex rules." <<
Endl;
803 Log() <<
" With a higher max, more small trees will be" <<
Endl;
804 Log() <<
" generated leading to more simple rules." <<
Endl;
805 Log() <<
" By changing this range, the average complexity" <<
Endl;
806 Log() <<
" of the rule ensemble can be controlled." << brk <<
Endl;
807 Log() <<
" " << col <<
"RuleMinDist " << colres
808 <<
": By increasing the minimum distance between" <<
Endl;
809 Log() <<
" rules, fewer and more diverse rules will remain." <<
Endl;
810 Log() <<
" Initially it is a good idea to keep this small" <<
Endl;
811 Log() <<
" or zero and let the fitting do the selection of" <<
Endl;
812 Log() <<
" rules. In order to reduce the ensemble size," <<
Endl;
813 Log() <<
" the value can then be increased." <<
Endl;
816 Log() <<
"II. TUNING OF THE FITTING:" <<
Endl;
818 Log() <<
" " << col <<
"GDPathEveFrac " << colres
819 <<
": fraction of events in path evaluation" <<
Endl;
820 Log() <<
" Increasing this fraction will improve the path" <<
Endl;
821 Log() <<
" finding. However, a too high value will give few" <<
Endl;
822 Log() <<
" unique events available for error estimation." <<
Endl;
823 Log() <<
" It is recomended to usethe default = 0.5." << brk <<
Endl;
824 Log() <<
" " << col <<
"GDTau " << colres
825 <<
": cutoff parameter tau" <<
Endl;
826 Log() <<
" By default this value is set to -1.0." <<
Endl;
828 Log() <<
" This means that the cut off parameter is" <<
Endl;
829 Log() <<
" automatically estimated. In most cases" <<
Endl;
830 Log() <<
" this should be fine. However, you may want" <<
Endl;
831 Log() <<
" to fix this value if you already know it" <<
Endl;
832 Log() <<
" and want to reduce on training time." << brk <<
Endl;
833 Log() <<
" " << col <<
"GDTauPrec " << colres
834 <<
": precision of estimated tau" <<
Endl;
835 Log() <<
" Increase this precision to find a more" <<
Endl;
836 Log() <<
" optimum cut-off parameter." << brk <<
Endl;
837 Log() <<
" " << col <<
"GDNStep " << colres
838 <<
": number of steps in path search" <<
Endl;
839 Log() <<
" If the number of steps is too small, then" <<
Endl;
840 Log() <<
" the program will give a warning message." <<
Endl;
842 Log() <<
"III. WARNING MESSAGES" <<
Endl;
844 Log() << col <<
"Risk(i+1)>=Risk(i) in path" << colres << brk <<
Endl;
845 Log() << col <<
"Chaotic behaviour of risk evolution." << colres <<
Endl;
847 Log() <<
" The error rate was still decreasing at the end" <<
Endl;
848 Log() <<
" By construction the Risk should always decrease." <<
Endl;
849 Log() <<
" However, if the training sample is too small or" <<
Endl;
850 Log() <<
" the model is overtrained, such warnings can" <<
Endl;
852 Log() <<
" The warnings can safely be ignored if only a" <<
Endl;
853 Log() <<
" few (<3) occur. If more warnings are generated," <<
Endl;
854 Log() <<
" the fitting fails." <<
Endl;
855 Log() <<
" A remedy may be to increase the value" << brk <<
Endl;
857 << col <<
"GDValidEveFrac" << colres
858 <<
" to 1.0 (or a larger value)." << brk <<
Endl;
859 Log() <<
" In addition, if "
860 << col <<
"GDPathEveFrac" << colres
861 <<
" is too high" <<
Endl;
862 Log() <<
" the same warnings may occur since the events" <<
Endl;
863 Log() <<
" used for error estimation are also used for" <<
Endl;
864 Log() <<
" path estimation." <<
Endl;
865 Log() <<
" Another possibility is to modify the model - " <<
Endl;
866 Log() <<
" See above on tuning the rule ensemble." <<
Endl;
868 Log() << col <<
"The error rate was still decreasing at the end of the path"
870 Log() <<
" Too few steps in path! Increase "
871 << col <<
"GDNSteps" << colres <<
"." <<
Endl;
873 Log() << col <<
"Reached minimum early in the search" << colres <<
Endl;
875 Log() <<
" Minimum was found early in the fitting. This" <<
Endl;
876 Log() <<
" may indicate that the used step size "
877 << col <<
"GDStep" << colres <<
"." <<
Endl;
878 Log() <<
" was too large. Reduce it and rerun." <<
Endl;
879 Log() <<
" If the results still are not OK, modify the" <<
Endl;
880 Log() <<
" model either by modifying the rule ensemble" <<
Endl;
881 Log() <<
" or add/remove linear terms" <<
Endl;
void DeclareOptions()
define the options (their key words) that can be set in the option string know options.
virtual Int_t Write(const char *name=0, Int_t option=0, Int_t bufsize=0)
Write this object to the current directory.
void Init(void)
default initialization
void WelcomeMessage()
welcome message
void ReadWeightsFromXML(void *wghtnode)
read rules from XML node
static Vc_ALWAYS_INLINE int_v min(const int_v &x, const int_v &y)
MsgLogger & Endl(MsgLogger &ml)
void ReadWeightsFromStream(std::istream &istr)
read rules from an std::istream
const std::vector< Double_t > & GetLinCoefficients() const
Double_t GetLinDP(int i) const
void InitMonitorNtuple()
initialize the monitoring ntuple
void WriteMonitoringHistosToFile(void) const
write special monitoring histograms to file (here ntuple)
Bool_t IsSignalRule() const
Double_t GetCutMax(Int_t is) const
const Ranking * CreateRanking()
computes ranking of input variables
void TrainJFRuleFit()
training of rules using Jerome Friedmans implementation
Double_t GetImportanceRef() const
UInt_t GetSelector(Int_t is) const
Double_t GetMvaValue(Double_t *err=0, Double_t *errUpper=0)
returns MVA value for given event
const std::vector< Double_t > & GetLinImportance() const
Char_t GetCutDoMax(Int_t is) const
Double_t GetCutMin(Int_t is) const
void ProcessOptions()
process the options specified by the user
void MakeClassLinear(std::ostream &) const
print out the linear terms
Bool_t IsLinTermOK(int i) const
std::vector< std::vector< double > > Data
Bool_t ReadModelSum()
read model from rulefit.sum
void TrainTMVARuleFit()
training of rules using TMVA implementation
const RuleCut * GetRuleCut() const
virtual void Print(Option_t *option="") const
This method must be overridden when a class wants to print itself.
Double_t GetLinDM(int i) const
void AddWeightsXMLTo(void *parent) const
add the rules to XML node
const RuleEnsemble * GetRuleEnsemble() const
char * Form(const char *fmt,...)
UInt_t GetNcuts() const
get number of cuts
const std::vector< TMVA::Rule * > & GetRulesConst() const
void GetHelpMessage() const
get help message text
UInt_t GetNLinear() const
Describe directory structure in memory.
Double_t GetRelImportance() const
Char_t GetCutDoMin(Int_t is) const
Double_t GetCoefficient() const
#define REGISTER_METHOD(CLASS)
for example
Abstract ClassifierFactory template that handles arbitrary types.
ClassImp(TMVA::MethodRuleFit) TMVA
standard constructor
Double_t GetSupport() const
Bool_t WriteOptionsReference() const
void MakeClassSpecific(std::ostream &, const TString &) const
write specific classifier response
A TTree object has a header with a name and a title.
const std::vector< Double_t > & GetLinNorm() const
virtual Bool_t HasAnalysisType(Types::EAnalysisType type, UInt_t numberClasses, UInt_t)
RuleFit can handle classification with 2 classes.
virtual ~MethodRuleFit(void)
destructor
double norm(double *x, double *p)
MethodRuleFit(const TString &jobName, const TString &methodTitle, DataSetInfo &theData, const TString &theOption="", TDirectory *theTargetDir=0)
void InitEventSample(void)
write all Events from the Tree into a vector of Events, that are more easily manipulated.
void MakeClassRuleCuts(std::ostream &) const
print out the rule cuts