64 : fVisHistsUseImp(
kTRUE )
79 , fVisHistsUseImp(
kTRUE)
101 fNEveEffTrain = CalcWeightSum( &fTrainingEvents );
110 this->SetMethodBase(
rfbase);
111 fRuleEnsemble.Initialize(
this );
112 fRuleFitParams.SetRuleFit(
this );
124 UInt_t nevents = fMethodRuleFit->Data()->GetNTrainingEvents();
125 std::vector<const TMVA::Event*> tmp;
127 const Event *
event = fMethodRuleFit->GetEvent(
ievt);
128 tmp.push_back(event);
130 SetTrainingEvents( tmp );
139 fRuleEnsemble.MakeModel();
142 fRuleFitParams.Init();
161 fMethodRuleFit =
other.GetMethodRuleFit();
162 fMethodBase =
other.GetMethodBase();
163 fTrainingEvents =
other.GetTrainingEvents();
166 fForest =
other.GetForest();
167 fRuleEnsemble =
other.GetRuleEnsemble();
176 if (events==0)
return 0.0;
181 sumw += ((*events)[
ie])->GetWeight();
191 fLogger->SetMinType(t);
192 fRuleEnsemble.SetMsgType(t);
193 fRuleFitParams.SetMsgType(t);
202 if (fMethodRuleFit==0) {
203 Log() << kFATAL <<
"RuleFit::BuildTree() - Attempting to build a tree NOT from a MethodRuleFit" <<
Endl;
205 std::vector<const Event *>
evevec;
207 evevec.push_back(fTrainingEventsRndm[
ie]);
211 dt->SetPruneMethod(fMethodRuleFit->GetPruneMethod());
212 dt->SetPruneStrength(fMethodRuleFit->GetPruneStrength());
222 if (fMethodRuleFit==0) {
223 Log() << kFATAL <<
"RuleFit::BuildTree() - Attempting to build a tree NOT from a MethodRuleFit" <<
Endl;
225 Log() << kDEBUG <<
"Creating a forest with " << fMethodRuleFit->GetNTrees() <<
" decision trees" <<
Endl;
226 Log() << kDEBUG <<
"Each tree is built using a random subsample with " << fNTreeSample <<
" events" <<
Endl;
228 Timer timer( fMethodRuleFit->GetNTrees(),
"RuleFit" );
241 for (
Int_t i=0; i<fMethodRuleFit->GetNTrees(); i++) {
251 frnd = 100*
rndGen.Uniform( fMethodRuleFit->GetMinFracNEve(), 0.5*fMethodRuleFit->GetMaxFracNEve() );
255 dt->SetNVars(fMethodBase->GetNvar());
258 if (
dt->GetNNodes()<3) {
266 fForest.push_back(
dt);
271 Log() << kWARNING <<
"------------------------------------------------------------------" <<
Endl;
272 Log() << kWARNING <<
" Failed growing a tree even after " <<
ntriesMax <<
" trials" <<
Endl;
273 Log() << kWARNING <<
" Possible solutions: " <<
Endl;
274 Log() << kWARNING <<
" 1. increase the number of training events" <<
Endl;
275 Log() << kWARNING <<
" 2. set a lower min fraction cut (fEventsMin)" <<
Endl;
276 Log() << kWARNING <<
" 3. maybe also decrease the max fraction cut (fEventsMax)" <<
Endl;
277 Log() << kWARNING <<
" If the above warning occurs rarely only, it can be ignored" <<
Endl;
278 Log() << kWARNING <<
"------------------------------------------------------------------" <<
Endl;
281 Log() << kDEBUG <<
"Built tree with minimum cut at N = " <<
frnd <<
"% events"
282 <<
" => N(nodes) = " << fForest.back()->GetNNodes()
283 <<
" ; n(tries) = " <<
ntries
288 if (
useBoost) RestoreEventWeights();
299 fEventWeights.clear();
300 for (std::vector<const Event*>::iterator
e=fTrainingEvents.begin();
e!=fTrainingEvents.end(); ++
e) {
302 fEventWeights.push_back(
w);
312 if (fEventWeights.size() != fTrainingEvents.size()) {
313 Log() << kERROR <<
"RuleFit::RestoreEventWeights() called without having called SaveEventWeights() before!" <<
Endl;
316 for (std::vector<const Event*>::iterator
e=fTrainingEvents.begin();
e!=fTrainingEvents.end(); ++
e) {
317 (*e)->SetBoostWeight(fEventWeights[
ie]);
334 for (std::vector<const Event*>::iterator
e=fTrainingEvents.begin();
e!=fTrainingEvents.end(); ++
e) {
356 for (std::vector<const Event*>::iterator
e=fTrainingEvents.begin();
e!=fTrainingEvents.end(); ++
e) {
358 (*e)->SetBoostWeight( (*e)->GetBoostWeight() *
boostWeight);
364 for (std::vector<const Event*>::iterator
e=fTrainingEvents.begin();
e!=fTrainingEvents.end(); ++
e) {
365 (*e)->SetBoostWeight( (*e)->GetBoostWeight() *
scale);
389 Log() << kVERBOSE <<
"Nodes in trees: average & std dev = " <<
sumn/
ntrees <<
" , " << sig <<
Endl;
399 Log() << kVERBOSE <<
"Fitting rule/linear terms" <<
Endl;
400 fRuleFitParams.MakeGDPath();
408 Log() << kVERBOSE <<
"Calculating importance" <<
Endl;
409 fRuleEnsemble.CalcImportance();
410 fRuleEnsemble.CleanupRules();
411 fRuleEnsemble.CleanupLinear();
412 fRuleEnsemble.CalcVarImportance();
413 Log() << kVERBOSE <<
"Filling rule statistics" <<
Endl;
414 fRuleEnsemble.RuleResponseStats();
422 return fRuleEnsemble.EvalEvent(
e );
430 if (fMethodRuleFit==0) Log() << kFATAL <<
"RuleFit::SetTrainingEvents - MethodRuleFit not initialized" <<
Endl;
432 if (
neve==0) Log() << kWARNING <<
"An empty sample of training events was given" <<
Endl;
435 fTrainingEvents.clear();
436 fTrainingEventsRndm.clear();
438 fTrainingEvents.push_back(
static_cast< const Event *
>(
el[i]));
439 fTrainingEventsRndm.push_back(
static_cast< const Event *
>(
el[i]));
443 std::shuffle(fTrainingEventsRndm.begin(), fTrainingEventsRndm.end(), fRNGEngine);
446 fNTreeSample =
static_cast<UInt_t>(
neve*fMethodRuleFit->GetTreeEveFrac());
447 Log() << kDEBUG <<
"Number of events per tree : " << fNTreeSample
448 <<
" ( N(events) = " <<
neve <<
" )"
449 <<
" randomly drawn without replacement" <<
Endl;
465 Log() << kWARNING <<
"GetRndmSampleEvents() : requested sub sample size larger than total size (BUG!).";
476 if (
hlist.empty())
return;
485 w =
hs->GetMaximum();
486 wm =
hs->GetMinimum();
557 xc = h2->GetXaxis()->GetBinCenter(
fbin);
559 if (fVisHistsUseImp) {
560 val =
rule->GetImportance();
563 val =
rule->GetCoefficient()*
rule->GetSupport();
565 h2->Fill(
xc,0.5,val*
f);
575 if (!fRuleEnsemble.DoLinear())
return;
581 if (fVisHistsUseImp) {
582 val = fRuleEnsemble.GetLinImportance(
vind);
585 val = fRuleEnsemble.GetLinCoefficients(
vind);
588 xc = h2->GetXaxis()->GetBinCenter(bin);
589 h2->Fill(
xc,0.5,val);
601 if (fVisHistsUseImp) {
602 val =
rule->GetImportance();
605 val =
rule->GetCoefficient()*
rule->GetSupport();
652 xc = h2->GetXaxis()->GetBinCenter(
binx);
663 yc = h2->GetYaxis()->GetBinCenter(
biny);
675 Int_t nvar = fMethodBase->GetNvar();
676 if (
nhists!=nvar) Log() << kFATAL <<
"BUG TRAP: number of hists is not equal the number of variables!" <<
Endl;
678 std::vector<Int_t>
vindex;
684 if (fMethodBase->GetInputTitle(
iv) ==
hstr)
708 if (
ruleimp<fRuleEnsemble.GetImportanceCut())
return;
711 Int_t nvar = fMethodBase->GetNvar();
713 if (
nhists!=
ncorr) Log() << kERROR <<
"BUG TRAP: number of corr hists is not correct! ncorr = "
716 std::vector< std::pair<Int_t,Int_t> >
vindex;
723 iv1 = fMethodBase->DataInfo().FindVarIndex(
var1 );
724 iv2 = fMethodBase->DataInfo().FindVarIndex(
var2 );
728 Log() << kERROR <<
"BUG TRAP: should not be here - failed getting var1 and var2" <<
Endl;
768 "InputVariables_Deco",
769 "InputVariables_PCA",
770 "InputVariables_Gauss",
771 "InputVariables_Gauss_Deco" };
785 Log() << kWARNING <<
"No basedir - BUG??" <<
Endl;
794 Log() << kWARNING <<
"No input variable directory found - BUG?" <<
Endl;
799 Log() << kWARNING <<
"No correlation directory found" <<
Endl;
800 Log() << kWARNING <<
"Check for other warnings related to correlation histograms" <<
Endl;
804 Log() << kWARNING <<
"No rulefit method directory found - BUG?" <<
Endl;
814 Log() << kWARNING <<
"No correlation directory found : " <<
corrDirName <<
Endl;
820 Log() << kDEBUG <<
"Got number of plots = " <<
noPlots <<
Endl;
827 while ((key = (
TKey*)next())) {
833 Log() << kDEBUG <<
"Got histogram : " <<
hname <<
Endl;
836 if (
hname.Contains(
"__S")){
838 htitle.ReplaceAll(
"signal",
"");
840 newname.ReplaceAll(
"__Signal",
"__RF");
841 newname.ReplaceAll(
"__S",
"__RF");
862 if ((
hname.Contains(
"scat_")) && (
hname.Contains(
"_Signal"))) {
863 Log() << kDEBUG <<
"Got histogram (2D) : " <<
hname <<
Endl;
865 htitle.ReplaceAll(
"(Signal)",
"");
867 newname.ReplaceAll(
"_Signal",
"_RF2D");
900 rule = fRuleEnsemble.GetRulesConst(i);
911 rule = fRuleEnsemble.GetRulesConst(i);
929 Log() << kWARNING <<
"<MakeDebugHists> No rulefit method directory found - bug?" <<
Endl;
935 std::vector<Double_t>
fncuts;
936 std::vector<Double_t>
fnvars;
943 ruleA = fRuleEnsemble.GetRulesConst(i);
945 ruleB = fRuleEnsemble.GetRulesConst(
j);
double Double_t
Double 8 bytes.
ROOT::Detail::TRangeCast< T, true > TRangeDynCast
TRangeDynCast is an adapter class that allows the typed iteration through a TCollection.
Option_t Option_t TPoint TPoint const char GetTextMagnitude GetFillStyle GetLineColor GetLineWidth GetMarkerStyle GetTextAlign GetTextColor GetTextSize void char Point_t Rectangle_t wmin
Option_t Option_t TPoint TPoint const char GetTextMagnitude GetFillStyle GetLineColor GetLineWidth GetMarkerStyle GetTextAlign GetTextColor GetTextSize void char Point_t Rectangle_t WindowAttributes_t Float_t Float_t Float_t Int_t Int_t UInt_t UInt_t Rectangle_t Int_t Int_t Window_t TString Int_t GCValues_t GetPrimarySelectionOwner GetDisplay GetScreen GetColormap GetNativeEvent const char const char dpyName wid window const char font_name cursor keysym reg const char only_if_exist regb h Point_t winding char text const char depth char const char Int_t count const char ColorStruct_t color const char Pixmap_t Pixmap_t PictureAttributes_t attr const char char ret_data h unsigned char height h Atom_t Int_t ULong_t ULong_t unsigned char prop_list Atom_t Atom_t Atom_t Time_t type
Option_t Option_t TPoint TPoint const char GetTextMagnitude GetFillStyle GetLineColor GetLineWidth GetMarkerStyle GetTextAlign GetTextColor GetTextSize void char Point_t Rectangle_t wmax
TClass instances represent classes, structs and namespaces in the ROOT type system.
Bool_t InheritsFrom(const char *cl) const override
Return kTRUE if this class inherits from a class with name "classname".
static TClass * GetClass(const char *name, Bool_t load=kTRUE, Bool_t silent=kFALSE)
Static method returning pointer to TClass of the specified class name.
Describe directory structure in memory.
1-D histogram with a float per channel (see TH1 documentation)
virtual Int_t GetNbinsY() const
virtual Int_t GetNbinsX() const
2-D histogram with a float per channel (see TH1 documentation)
Book space in a file, create I/O buffers, to fill them, (un)compress them.
virtual const char * GetClassName() const
virtual TObject * ReadObj()
To read a TObject* from the file.
Implementation of a Decision Tree.
Virtual base Class for all MVA method.
J Friedman's RuleFit method.
ostringstream derivative to redirect and format output
A class implementing various fits of rule ensembles.
void GetRndmSampleEvents(std::vector< const TMVA::Event * > &evevec, UInt_t nevents)
draw a random subsample of the training events without replacement
Double_t EvalEvent(const Event &e)
evaluate single event
void SetMethodBase(const MethodBase *rfbase)
set MethodBase
void InitPtrs(const TMVA::MethodBase *rfbase)
initialize pointers
void Boost(TMVA::DecisionTree *dt)
Boost the events.
void ForestStatistics()
summary of statistics of all trees
static const Int_t randSEED
void CalcImportance()
calculates the importance of each rule
void SetMsgType(EMsgType t)
set the current message type to that of mlog for this class and all other subtools
void Initialize(const TMVA::MethodBase *rfbase)
initialize the parameters of the RuleFit method and make rules
virtual ~RuleFit(void)
destructor
void FillVisHistCorr(const Rule *rule, std::vector< TH2F * > &hlist)
help routine to MakeVisHists() - fills for all correlation plots
std::default_random_engine fRNGEngine
void InitNEveEff()
init effective number of events (using event weights)
void SaveEventWeights()
save event weights - must be done before making the forest
void FillCut(TH2F *h2, const TMVA::Rule *rule, Int_t vind)
Fill cut.
void FillLin(TH2F *h2, Int_t vind)
fill lin
Bool_t GetCorrVars(TString &title, TString &var1, TString &var2)
get first and second variables from title
void MakeForest()
make a forest of decisiontrees
void FitCoefficients()
Fit the coefficients for the rule ensemble.
void FillCorr(TH2F *h2, const TMVA::Rule *rule, Int_t v1, Int_t v2)
fill rule correlation between vx and vy, weighted with either the importance or the coefficient
void NormVisHists(std::vector< TH2F * > &hlist)
normalize rule importance hists
void RestoreEventWeights()
save event weights - must be done before making the forest
void MakeVisHists()
this will create histograms visualizing the rule ensemble
void FillVisHistCut(const Rule *rule, std::vector< TH2F * > &hlist)
help routine to MakeVisHists() - fills for all variables
void BuildTree(TMVA::DecisionTree *dt)
build the decision tree using fNTreeSample events from fTrainingEventsRndm
void SetTrainingEvents(const std::vector< const TMVA::Event * > &el)
set the training events randomly
void Copy(const RuleFit &other)
copy method
Double_t CalcWeightSum(const std::vector< const TMVA::Event * > *events, UInt_t neve=0)
calculate the sum of weights
RuleFit(void)
default constructor
void MakeDebugHists()
this will create a histograms intended rather for debugging or for the curious user
Implementation of a rule.
Timing information for training and evaluation of MVA methods.
virtual void SetTitle(const char *title="")
Set the title of the TNamed.
const char * GetName() const override
Returns name of object.
const char * GetTitle() const override
Returns title of object.
Random number generator class based on M.
Bool_t BeginsWith(const char *s, ECaseCompare cmp=kExact) const
A TTree represents a columnar dataset.
MsgLogger & Endl(MsgLogger &ml)
Double_t Sqrt(Double_t x)
Returns the square root of x.
Short_t Abs(Short_t d)
Returns the absolute value of parameter Short_t d.