65 : fVisHistsUseImp(
kTRUE )
80 , fVisHistsUseImp(
kTRUE)
102 fNEveEffTrain = CalcWeightSum( &fTrainingEvents );
111 this->SetMethodBase(
rfbase);
112 fRuleEnsemble.Initialize(
this );
113 fRuleFitParams.SetRuleFit(
this );
125 UInt_t nevents = fMethodRuleFit->Data()->GetNTrainingEvents();
126 std::vector<const TMVA::Event*> tmp;
128 const Event *
event = fMethodRuleFit->GetEvent(
ievt);
129 tmp.push_back(event);
131 SetTrainingEvents( tmp );
140 fRuleEnsemble.MakeModel();
143 fRuleFitParams.Init();
162 fMethodRuleFit =
other.GetMethodRuleFit();
163 fMethodBase =
other.GetMethodBase();
164 fTrainingEvents =
other.GetTrainingEvents();
167 fForest =
other.GetForest();
168 fRuleEnsemble =
other.GetRuleEnsemble();
177 if (
events==0)
return 0.0;
182 sumw += ((*events)[
ie])->GetWeight();
192 fLogger->SetMinType(t);
193 fRuleEnsemble.SetMsgType(t);
194 fRuleFitParams.SetMsgType(t);
203 if (fMethodRuleFit==0) {
204 Log() << kFATAL <<
"RuleFit::BuildTree() - Attempting to build a tree NOT from a MethodRuleFit" <<
Endl;
206 std::vector<const Event *>
evevec;
208 evevec.push_back(fTrainingEventsRndm[
ie]);
212 dt->SetPruneMethod(fMethodRuleFit->GetPruneMethod());
213 dt->SetPruneStrength(fMethodRuleFit->GetPruneStrength());
223 if (fMethodRuleFit==0) {
224 Log() << kFATAL <<
"RuleFit::BuildTree() - Attempting to build a tree NOT from a MethodRuleFit" <<
Endl;
226 Log() << kDEBUG <<
"Creating a forest with " << fMethodRuleFit->GetNTrees() <<
" decision trees" <<
Endl;
227 Log() << kDEBUG <<
"Each tree is built using a random subsample with " << fNTreeSample <<
" events" <<
Endl;
229 Timer timer( fMethodRuleFit->GetNTrees(),
"RuleFit" );
242 for (
Int_t i=0; i<fMethodRuleFit->GetNTrees(); i++) {
252 frnd = 100*
rndGen.Uniform( fMethodRuleFit->GetMinFracNEve(), 0.5*fMethodRuleFit->GetMaxFracNEve() );
256 dt->SetNVars(fMethodBase->GetNvar());
259 if (
dt->GetNNodes()<3) {
267 fForest.push_back(
dt);
272 Log() << kWARNING <<
"------------------------------------------------------------------" <<
Endl;
273 Log() << kWARNING <<
" Failed growing a tree even after " <<
ntriesMax <<
" trials" <<
Endl;
274 Log() << kWARNING <<
" Possible solutions: " <<
Endl;
275 Log() << kWARNING <<
" 1. increase the number of training events" <<
Endl;
276 Log() << kWARNING <<
" 2. set a lower min fraction cut (fEventsMin)" <<
Endl;
277 Log() << kWARNING <<
" 3. maybe also decrease the max fraction cut (fEventsMax)" <<
Endl;
278 Log() << kWARNING <<
" If the above warning occurs rarely only, it can be ignored" <<
Endl;
279 Log() << kWARNING <<
"------------------------------------------------------------------" <<
Endl;
282 Log() << kDEBUG <<
"Built tree with minimum cut at N = " <<
frnd <<
"% events"
283 <<
" => N(nodes) = " << fForest.back()->GetNNodes()
284 <<
" ; n(tries) = " <<
ntries
289 if (
useBoost) RestoreEventWeights();
300 fEventWeights.clear();
301 for (std::vector<const Event*>::iterator
e=fTrainingEvents.begin();
e!=fTrainingEvents.end(); ++
e) {
303 fEventWeights.push_back(
w);
313 if (fEventWeights.size() != fTrainingEvents.size()) {
314 Log() << kERROR <<
"RuleFit::RestoreEventWeights() called without having called SaveEventWeights() before!" <<
Endl;
317 for (std::vector<const Event*>::iterator
e=fTrainingEvents.begin();
e!=fTrainingEvents.end(); ++
e) {
318 (*e)->SetBoostWeight(fEventWeights[
ie]);
335 for (std::vector<const Event*>::iterator
e=fTrainingEvents.begin();
e!=fTrainingEvents.end(); ++
e) {
357 for (std::vector<const Event*>::iterator
e=fTrainingEvents.begin();
e!=fTrainingEvents.end(); ++
e) {
359 (*e)->SetBoostWeight( (*e)->GetBoostWeight() *
boostWeight);
365 for (std::vector<const Event*>::iterator
e=fTrainingEvents.begin();
e!=fTrainingEvents.end(); ++
e) {
366 (*e)->SetBoostWeight( (*e)->GetBoostWeight() *
scale);
390 Log() << kVERBOSE <<
"Nodes in trees: average & std dev = " <<
sumn/
ntrees <<
" , " << sig <<
Endl;
400 Log() << kVERBOSE <<
"Fitting rule/linear terms" <<
Endl;
401 fRuleFitParams.MakeGDPath();
409 Log() << kVERBOSE <<
"Calculating importance" <<
Endl;
410 fRuleEnsemble.CalcImportance();
411 fRuleEnsemble.CleanupRules();
412 fRuleEnsemble.CleanupLinear();
413 fRuleEnsemble.CalcVarImportance();
414 Log() << kVERBOSE <<
"Filling rule statistics" <<
Endl;
415 fRuleEnsemble.RuleResponseStats();
423 return fRuleEnsemble.EvalEvent(
e );
431 if (fMethodRuleFit==0) Log() << kFATAL <<
"RuleFit::SetTrainingEvents - MethodRuleFit not initialized" <<
Endl;
433 if (
neve==0) Log() << kWARNING <<
"An empty sample of training events was given" <<
Endl;
436 fTrainingEvents.clear();
437 fTrainingEventsRndm.clear();
439 fTrainingEvents.push_back(
static_cast< const Event *
>(
el[i]));
440 fTrainingEventsRndm.push_back(
static_cast< const Event *
>(
el[i]));
444 std::shuffle(fTrainingEventsRndm.begin(), fTrainingEventsRndm.end(), fRNGEngine);
447 fNTreeSample =
static_cast<UInt_t>(
neve*fMethodRuleFit->GetTreeEveFrac());
448 Log() << kDEBUG <<
"Number of events per tree : " << fNTreeSample
449 <<
" ( N(events) = " <<
neve <<
" )"
450 <<
" randomly drawn without replacement" <<
Endl;
466 Log() << kWARNING <<
"GetRndmSampleEvents() : requested sub sample size larger than total size (BUG!).";
477 if (
hlist.empty())
return;
486 w =
hs->GetMaximum();
487 wm =
hs->GetMinimum();
558 xc = h2->GetXaxis()->GetBinCenter(
fbin);
560 if (fVisHistsUseImp) {
561 val =
rule->GetImportance();
564 val =
rule->GetCoefficient()*
rule->GetSupport();
566 h2->Fill(
xc,0.5,val*
f);
576 if (!fRuleEnsemble.DoLinear())
return;
582 if (fVisHistsUseImp) {
583 val = fRuleEnsemble.GetLinImportance(
vind);
586 val = fRuleEnsemble.GetLinCoefficients(
vind);
589 xc = h2->GetXaxis()->GetBinCenter(bin);
590 h2->Fill(
xc,0.5,val);
602 if (fVisHistsUseImp) {
603 val =
rule->GetImportance();
606 val =
rule->GetCoefficient()*
rule->GetSupport();
653 xc = h2->GetXaxis()->GetBinCenter(
binx);
664 yc = h2->GetYaxis()->GetBinCenter(
biny);
676 Int_t nvar = fMethodBase->GetNvar();
677 if (
nhists!=nvar) Log() << kFATAL <<
"BUG TRAP: number of hists is not equal the number of variables!" <<
Endl;
679 std::vector<Int_t>
vindex;
685 if (fMethodBase->GetInputTitle(
iv) ==
hstr)
709 if (
ruleimp<fRuleEnsemble.GetImportanceCut())
return;
712 Int_t nvar = fMethodBase->GetNvar();
714 if (
nhists!=
ncorr) Log() << kERROR <<
"BUG TRAP: number of corr hists is not correct! ncorr = "
717 std::vector< std::pair<Int_t,Int_t> >
vindex;
724 iv1 = fMethodBase->DataInfo().FindVarIndex(
var1 );
725 iv2 = fMethodBase->DataInfo().FindVarIndex(
var2 );
729 Log() << kERROR <<
"BUG TRAP: should not be here - failed getting var1 and var2" <<
Endl;
769 "InputVariables_Deco",
770 "InputVariables_PCA",
771 "InputVariables_Gauss",
772 "InputVariables_Gauss_Deco" };
786 Log() << kWARNING <<
"No basedir - BUG??" <<
Endl;
795 Log() << kWARNING <<
"No input variable directory found - BUG?" <<
Endl;
800 Log() << kWARNING <<
"No correlation directory found" <<
Endl;
801 Log() << kWARNING <<
"Check for other warnings related to correlation histograms" <<
Endl;
805 Log() << kWARNING <<
"No rulefit method directory found - BUG?" <<
Endl;
815 Log() << kWARNING <<
"No correlation directory found : " <<
corrDirName <<
Endl;
821 Log() << kDEBUG <<
"Got number of plots = " <<
noPlots <<
Endl;
828 while ((key = (
TKey*)next())) {
834 Log() << kDEBUG <<
"Got histogram : " <<
hname <<
Endl;
837 if (
hname.Contains(
"__S")){
839 htitle.ReplaceAll(
"signal",
"");
841 newname.ReplaceAll(
"__Signal",
"__RF");
842 newname.ReplaceAll(
"__S",
"__RF");
863 if ((
hname.Contains(
"scat_")) && (
hname.Contains(
"_Signal"))) {
864 Log() << kDEBUG <<
"Got histogram (2D) : " <<
hname <<
Endl;
866 htitle.ReplaceAll(
"(Signal)",
"");
868 newname.ReplaceAll(
"_Signal",
"_RF2D");
901 rule = fRuleEnsemble.GetRulesConst(i);
912 rule = fRuleEnsemble.GetRulesConst(i);
930 Log() << kWARNING <<
"<MakeDebugHists> No rulefit method directory found - bug?" <<
Endl;
936 std::vector<Double_t>
fncuts;
937 std::vector<Double_t>
fnvars;
944 ruleA = fRuleEnsemble.GetRulesConst(i);
946 ruleB = fRuleEnsemble.GetRulesConst(
j);
ROOT::Detail::TRangeCast< T, true > TRangeDynCast
TRangeDynCast is an adapter class that allows the typed iteration through a TCollection.
Option_t Option_t TPoint TPoint const char GetTextMagnitude GetFillStyle GetLineColor GetLineWidth GetMarkerStyle GetTextAlign GetTextColor GetTextSize void char Point_t Rectangle_t wmin
Option_t Option_t TPoint TPoint const char GetTextMagnitude GetFillStyle GetLineColor GetLineWidth GetMarkerStyle GetTextAlign GetTextColor GetTextSize void char Point_t Rectangle_t WindowAttributes_t Float_t Float_t Float_t Int_t Int_t UInt_t UInt_t Rectangle_t Int_t Int_t Window_t TString Int_t GCValues_t GetPrimarySelectionOwner GetDisplay GetScreen GetColormap GetNativeEvent const char const char dpyName wid window const char font_name cursor keysym reg const char only_if_exist regb h Point_t winding char text const char depth char const char Int_t count const char ColorStruct_t color const char Pixmap_t Pixmap_t PictureAttributes_t attr const char char ret_data h unsigned char height h Atom_t Int_t ULong_t ULong_t unsigned char prop_list Atom_t Atom_t Atom_t Time_t type
Option_t Option_t TPoint TPoint const char GetTextMagnitude GetFillStyle GetLineColor GetLineWidth GetMarkerStyle GetTextAlign GetTextColor GetTextSize void char Point_t Rectangle_t wmax
TClass instances represent classes, structs and namespaces in the ROOT type system.
Bool_t InheritsFrom(const char *cl) const override
Return kTRUE if this class inherits from a class with name "classname".
static TClass * GetClass(const char *name, Bool_t load=kTRUE, Bool_t silent=kFALSE)
Static method returning pointer to TClass of the specified class name.
Describe directory structure in memory.
1-D histogram with a float per channel (see TH1 documentation)
virtual Int_t GetNbinsY() const
virtual Int_t GetNbinsX() const
2-D histogram with a float per channel (see TH1 documentation)
Book space in a file, create I/O buffers, to fill them, (un)compress them.
virtual const char * GetClassName() const
virtual TObject * ReadObj()
To read a TObject* from the file.
Implementation of a Decision Tree.
Virtual base Class for all MVA method.
J Friedman's RuleFit method.
ostringstream derivative to redirect and format output
A class implementing various fits of rule ensembles.
void GetRndmSampleEvents(std::vector< const TMVA::Event * > &evevec, UInt_t nevents)
draw a random subsample of the training events without replacement
Double_t EvalEvent(const Event &e)
evaluate single event
void SetMethodBase(const MethodBase *rfbase)
set MethodBase
void InitPtrs(const TMVA::MethodBase *rfbase)
initialize pointers
void Boost(TMVA::DecisionTree *dt)
Boost the events.
void ForestStatistics()
summary of statistics of all trees
static const Int_t randSEED
void CalcImportance()
calculates the importance of each rule
void SetMsgType(EMsgType t)
set the current message type to that of mlog for this class and all other subtools
void Initialize(const TMVA::MethodBase *rfbase)
initialize the parameters of the RuleFit method and make rules
virtual ~RuleFit(void)
destructor
void FillVisHistCorr(const Rule *rule, std::vector< TH2F * > &hlist)
help routine to MakeVisHists() - fills for all correlation plots
std::default_random_engine fRNGEngine
void InitNEveEff()
init effective number of events (using event weights)
void SaveEventWeights()
save event weights - must be done before making the forest
void FillCut(TH2F *h2, const TMVA::Rule *rule, Int_t vind)
Fill cut.
void FillLin(TH2F *h2, Int_t vind)
fill lin
Bool_t GetCorrVars(TString &title, TString &var1, TString &var2)
get first and second variables from title
void MakeForest()
make a forest of decisiontrees
void FitCoefficients()
Fit the coefficients for the rule ensemble.
void FillCorr(TH2F *h2, const TMVA::Rule *rule, Int_t v1, Int_t v2)
fill rule correlation between vx and vy, weighted with either the importance or the coefficient
void NormVisHists(std::vector< TH2F * > &hlist)
normalize rule importance hists
void RestoreEventWeights()
save event weights - must be done before making the forest
void MakeVisHists()
this will create histograms visualizing the rule ensemble
void FillVisHistCut(const Rule *rule, std::vector< TH2F * > &hlist)
help routine to MakeVisHists() - fills for all variables
void BuildTree(TMVA::DecisionTree *dt)
build the decision tree using fNTreeSample events from fTrainingEventsRndm
void SetTrainingEvents(const std::vector< const TMVA::Event * > &el)
set the training events randomly
void Copy(const RuleFit &other)
copy method
Double_t CalcWeightSum(const std::vector< const TMVA::Event * > *events, UInt_t neve=0)
calculate the sum of weights
RuleFit(void)
default constructor
void MakeDebugHists()
this will create a histograms intended rather for debugging or for the curious user
Implementation of a rule.
Timing information for training and evaluation of MVA methods.
virtual void SetTitle(const char *title="")
Set the title of the TNamed.
const char * GetName() const override
Returns name of object.
const char * GetTitle() const override
Returns title of object.
Random number generator class based on M.
Bool_t BeginsWith(const char *s, ECaseCompare cmp=kExact) const
A TTree represents a columnar dataset.
MsgLogger & Endl(MsgLogger &ml)
Double_t Sqrt(Double_t x)
Returns the square root of x.
Short_t Abs(Short_t d)
Returns the absolute value of parameter Short_t d.