81 , fGDPathStep ( 0.01 )
82 , fGDNPathSteps ( 1000 )
119 Log() << kFATAL <<
"RuleFitParams::Init() - MethodRuleFit ptr is null" <<
Endl;
167 Log() << kVERBOSE <<
"Path constr. - event index range = [ " <<
fPathIdx1 <<
", " << fPathIdx2 <<
" ]" 169 Log() << kVERBOSE <<
"Error estim. - event index range = [ " <<
fPerfIdx1 <<
", " << fPerfIdx2 <<
" ]" 173 Log() << kDEBUG <<
"Number of rules in ensemble: " <<
fNRules <<
Endl;
175 Log() << kDEBUG <<
"Rules are disabled " <<
Endl;
180 Log() << kDEBUG <<
"Linear terms are disabled " <<
Endl;
188 fGDNtuple=
new TTree(
"MonitorNtuple_RuleFitParams",
"RuleFit path search");
210 std::vector<Double_t> &avsel,
211 std::vector<Double_t> &avrul )
213 UInt_t neve = ind2-ind1+1;
215 Log() << kFATAL <<
"<EvaluateAverage> - no events selected for path search -> BUG!" <<
Endl;
223 const std::vector<UInt_t> *eventRuleMap=0;
230 for (
UInt_t i=ind1; i<ind2+1; i++) {
240 nrules = (*eventRuleMap).size();
243 avrul[(*eventRuleMap)[
r]] += ew;
249 for (
UInt_t i=ind1; i<ind2+1; i++) {
267 avsel[sel] = avsel[sel] / sumew;
271 avrul[
r] = avrul[
r] / sumew;
317 UInt_t neve = ind2-ind1+1;
319 Log() << kFATAL <<
"<Risk> Invalid start/end indices! BUG!!!" <<
Endl;
324 for (
UInt_t i=ind1; i<ind2+1; i++) {
337 UInt_t neve = ind2-ind1+1;
339 Log() << kFATAL <<
"<Risk> Invalid start/end indices! BUG!!!" <<
Endl;
344 for (
UInt_t i=ind1; i<ind2+1; i++) {
359 Log() << kWARNING <<
"<Penalty> Using unverified code! Check!" <<
Endl;
456 Log() << kFATAL <<
"BUG! FindGDTau() has been called BEFORE InitGD()." <<
Endl;
458 Log() << kINFO <<
"Estimating the cutoff parameter tau. The estimated time is a pessimistic maximum." <<
Endl;
478 Timer timer( nscan,
"RuleFit" );
487 if ( (ip==0) || ((ip+1)%netst==0) ) {
494 doloop = ((ip<nscan) && (fGDNTauTstOK>3));
504 Log() << kERROR <<
"<FindGDTau> number of scanned loops is zero! Should NOT see this message." <<
Endl;
510 Log() << kINFO <<
"Best path found with tau = " <<
Form(
"%4.4f",
fGDTau)
541 Log() << kINFO <<
"GD path scan - the scan stops when the max num. of steps is reached or a min is found" 558 Log() << kVERBOSE <<
"Creating GD path" <<
Endl;
578 std::vector<Double_t> coefsMin;
579 std::vector<Double_t> lincoefsMin;
594 std::vector<Double_t> valx;
595 std::vector<Double_t> valy;
596 std::vector<Double_t> valxy;
608 if (imod>100) imod=100;
619 Log() << kVERBOSE <<
"Obtained initial offset = " << offsetMin <<
Endl;
637 Int_t stopCondition=0;
639 Log() << kINFO <<
"Fitting model..." <<
Endl;
645 if (isVerbose) t0 = clock();
648 tgradvec =
Double_t(clock()-t0)/CLOCKS_PER_SEC;
649 stgradvec += tgradvec;
653 if (isVerbose) t0 = clock();
656 tupgrade =
Double_t(clock()-t0)/CLOCKS_PER_SEC;
657 stupgrade += tupgrade;
661 docheck = ((iloop==0) ||((iloop+1)%imod==0));
678 trisk =
Double_t(clock()-t0)/CLOCKS_PER_SEC;
688 Log() <<
"Risk(i+1)>=Risk(i) in path" <<
Endl;
689 riskFlat=(nbadrisk>3);
691 Log() << kWARNING <<
"Chaotic behaviour of risk evolution" <<
Endl;
692 Log() <<
"--- STOPPING MINIMISATION ---" <<
Endl;
693 Log() <<
"This may be OK if minimum is already found" <<
Endl;
703 if (isVerbose) t0 = clock();
716 tperf =
Double_t(clock()-t0)/CLOCKS_PER_SEC;
735 if (valx.size()==npreg) {
736 valx.erase(valx.begin());
737 valy.erase(valy.begin());
738 valxy.erase(valxy.begin());
749 Log() << kVERBOSE <<
"ParamsIRE : " 751 <<
Form(
"%8d",iloop+1) <<
" " 753 <<
Form(
"%4.4f",riskPerf) <<
" " 771 if ( ((riskFlat) || (endOfLoop)) && (!found) ) {
775 else if (endOfLoop) {
779 Log() << kWARNING <<
"BUG TRAP: should not be here - still, this bug is harmless;)" <<
Endl;
793 Log() << kINFO <<
"----------------------------------------------------------------" <<
Endl;
794 Log() << kINFO <<
"Found minimum at step " << indMin+1 <<
" with error = " << errmin <<
Endl;
795 Log() << kINFO <<
"Reason for ending loop: ";
796 switch (stopCondition) {
798 Log() << kINFO <<
"clear minima found";
801 Log() << kINFO <<
"chaotic behaviour of risk";
804 Log() << kINFO <<
"end of loop reached";
807 Log() << kINFO <<
"unknown!";
811 Log() << kINFO <<
"----------------------------------------------------------------" <<
Endl;
815 Log() << kWARNING <<
"Reached minimum early in the search" <<
Endl;
816 Log() <<
"Check results and maybe decrease GDStep size" <<
Endl;
826 Log() << kINFO <<
"The error rate was still decreasing at the end of the path" <<
Endl;
827 Log() << kINFO <<
"Increase number of steps (GDNSteps)." <<
Endl;
838 Log() << kFATAL <<
"BUG TRAP: minimum not found in MakeGDPath()" <<
Endl;
845 Double_t stloop = strisk +stupgrade + stgradvec + stperf;
846 Log() << kVERBOSE <<
"Timing per loop (ms):" <<
Endl;
847 Log() << kVERBOSE <<
" gradvec = " << 1000*stgradvec/iloop <<
Endl;
848 Log() << kVERBOSE <<
" upgrade = " << 1000*stupgrade/iloop <<
Endl;
849 Log() << kVERBOSE <<
" risk = " << 1000*strisk/iloop <<
Endl;
850 Log() << kVERBOSE <<
" perf = " << 1000*stperf/iloop <<
Endl;
851 Log() << kVERBOSE <<
" loop = " << 1000*stloop/iloop <<
Endl;
854 Log() << kVERBOSE <<
" GDPtr = " << 1000*
gGDPtr/iloop <<
Endl;
888 Log() << kWARNING <<
"<CalcFStar> Using unverified code! Check!" <<
Endl;
891 Log() << kFATAL <<
"<CalcFStar> Invalid start/end indices!" <<
Endl;
898 std::vector<Double_t> fstarSorted;
902 const Event&
e = *(*events)[i];
904 fFstar.push_back(fstarVal);
905 fstarSorted.push_back(fstarVal);
909 std::sort( fstarSorted.begin(), fstarSorted.end() );
912 fFstarMedian = 0.5*(fstarSorted[ind]+fstarSorted[ind-1]);
928 Log() << kWARNING <<
"<Optimism> Using unverified code! Check!" <<
Endl;
931 Log() << kFATAL <<
"<Optimism> Invalid start/end indices!" <<
Endl;
946 const Event&
e = *(*events)[i];
952 sumyhaty += w*yhat*
y;
957 Double_t cov = sumyhaty - sumyhat*sumy;
969 Log() << kWARNING <<
"<ErrorRateReg> Using unverified code! Check!" <<
Endl;
972 Log() << kFATAL <<
"<ErrorRateReg> Invalid start/end indices!" <<
Endl;
974 if (
fFstar.size()!=neve) {
975 Log() << kFATAL <<
"--- RuleFitParams::ErrorRateReg() - F* not initialized! BUG!!!" 976 <<
" Fstar.size() = " <<
fFstar.size() <<
" , N(events) = " << neve <<
Endl;
991 const Event&
e = *(*events)[i];
1000 return sumdf/sumdfmed;
1013 Log() << kWARNING <<
"<ErrorRateBin> Using unverified code! Check!" <<
Endl;
1016 Log() << kFATAL <<
"<ErrorRateBin> Invalid start/end indices!" <<
Endl;
1027 const Event&
e = *(*events)[i];
1030 signF = (sF>0 ? +1:-1);
1046 std::vector<Double_t> & sFbkg )
1049 std::sort(sFsig.begin(), sFsig.end());
1050 std::sort(sFbkg.begin(), sFbkg.end());
1051 const Double_t minsig = sFsig.front();
1052 const Double_t minbkg = sFbkg.front();
1053 const Double_t maxsig = sFsig.back();
1054 const Double_t maxbkg = sFbkg.back();
1055 const Double_t minf = std::min(minsig,minbkg);
1056 const Double_t maxf = std::max(maxsig,maxbkg);
1059 const Int_t np = std::min((nsig+nbkg)/4,50);
1060 const Double_t df = (maxf-minf)/(np-1);
1065 std::vector<Double_t>::const_iterator indit;
1080 for (
Int_t i=0; i<np; i++) {
1082 indit = std::find_if(sFsig.begin(), sFsig.end(),
1083 std::bind(std::greater_equal<Double_t>(), std::placeholders::_1, fcut));
1084 nesig = sFsig.end()-indit;
1087 indit = std::find_if(sFbkg.begin(), sFbkg.end(),
1088 std::bind(std::greater_equal<Double_t>(), std::placeholders::_1, fcut));
1089 nrbkg = indit-sFbkg.begin();
1101 area += 0.5*(1+rejb)*effs;
1114 Log() << kWARNING <<
"<ErrorRateRoc> Should not be used in the current version! Check!" <<
Endl;
1117 Log() << kFATAL <<
"<ErrorRateRoc> Invalid start/end indices!" <<
Endl;
1124 std::vector<Double_t> sFsig;
1125 std::vector<Double_t> sFbkg;
1132 const Event&
e = *(*events)[i];
1135 sFsig.push_back(sF);
1140 sFbkg.push_back(sF);
1145 fsigave = sumfsig/sFsig.size();
1146 fbkgave = sumfbkg/sFbkg.size();
1162 Log() << kWARNING <<
"<ErrorRateRocTst> Should not be used in the current version! Check!" <<
Endl;
1165 Log() << kFATAL <<
"<ErrorRateRocTst> Invalid start/end indices!" <<
Endl;
1173 std::vector< std::vector<Double_t> > sFsig;
1174 std::vector< std::vector<Double_t> > sFbkg;
1186 sFsig[itau].push_back(sF);
1189 sFbkg[itau].push_back(sF);
1210 Log() << kFATAL <<
"<ErrorRateRocTst> Invalid start/end indices!" <<
Endl;
1226 if (fGDErrTst[itau]>maxx) maxx=fGDErrTst[itau];
1227 if (fGDErrTst[itau]<minx) {
1228 minx=fGDErrTst[itau];
1250 Log() << kVERBOSE <<
"TAU: " 1268 Log() << kFATAL <<
"<MakeTstGradientVector> Invalid start/end indices!" <<
Endl;
1292 const std::vector<UInt_t> *eventRuleMap=0;
1299 const Event *
e = (*events)[i];
1303 nrules = (*eventRuleMap).size();
1316 for (
UInt_t ir=0; ir<nrules; ir++) {
1317 rind = (*eventRuleMap)[ir];
1347 Double_t maxv = (maxr>maxl ? maxr:maxl);
1361 if (TMath::Abs(val)>=cthresh) {
1368 if (TMath::Abs(val)>=cthresh) {
1390 Log() << kFATAL <<
"<MakeGradientVector> Invalid start/end indices!" <<
Endl;
1410 const std::vector<UInt_t> *eventRuleMap=0;
1416 const Event *
e = (*events)[i];
1425 nrules = (*eventRuleMap).size();
1430 for (
UInt_t ir=0; ir<nrules; ir++) {
1431 rind = (*eventRuleMap)[ir];
1455 Double_t maxv = (maxr>maxl ? maxr:maxl);
1463 useRThresh = cthresh;
1464 useLThresh = cthresh;
1537 Log() << kFATAL <<
"<CalcAverageTruth> Invalid start/end indices!" <<
Endl;
1550 Log() << kVERBOSE <<
"Effective number of signal / background = " << ensig <<
" / " << enbkg <<
Endl;
static long int sum(long int i)
MsgLogger & Endl(MsgLogger &ml)
std::vector< std::vector< Double_t > > fGDCoefLinTst
std::vector< Double_t > fAverageSelectorPath
const std::vector< Double_t > & GetLinNorm() const
const std::vector< const TMVA::Event *> & GetTrainingEvents() const
Double_t EvalLinEventRaw(UInt_t vind, const Event &e, Bool_t norm) const
void MakeGradientVector()
make gradient vector
UInt_t GetNLinear() const
void EvaluateAveragePerf()
virtual Int_t Fill()
Fill all branches.
void FillCoefficients()
helper function to store the rule coefficients in local arrays
EMsgType GetMinType() const
const std::vector< UInt_t > & GetEventRuleMap(UInt_t evtidx) const
Double_t RiskPath() const
const std::vector< TMVA::Rule * > & GetRulesConst() const
std::vector< std::vector< Double_t > > fGradVecTst
void EvaluateAveragePath()
std::vector< Double_t > fAverageRulePath
Double_t GetGDValidEveFrac() const
Short_t Min(Short_t a, Short_t b)
std::vector< Double_t > fGDTauVec
std::vector< Double_t > fFstar
void ErrorRateRocTst()
Estimates the error rate with the current set of parameters.
const std::vector< Double_t > & GetLinCoefficients() const
Double_t GetEventLinearValNorm(UInt_t i) const
void SetLinCoefficients(const std::vector< Double_t > &v)
void MakeTstGradientVector()
make test gradient vector for all tau same algorithm as MakeGradientVector()
#define rprev(otri1, otri2)
std::vector< std::vector< Double_t > > fGDCoefTst
Int_t FindGDTau()
This finds the cutoff parameter tau by scanning several different paths.
Double_t GetEventRuleVal(UInt_t i) const
MsgLogger & Log() const
message logger
Double_t Risk(UInt_t ind1, UInt_t ind2, Double_t neff) const
risk assessment
Double_t FStar() const
We want to estimate F* = argmin Eyx( L(y,F(x) ), min wrt F(x) F(x) = FL(x) + FR(x) ...
Double_t CalcAverageTruth()
calculate the average truth
DataSetInfo & DataInfo() const
Double_t ErrorRateBin()
Estimates the error rate with the current set of parameters It uses a binary estimate of (y-F*(x)) (y...
void SetMinType(EMsgType minType)
RuleEnsemble * GetRuleEnsemblePtr()
Double_t GetWeight() const
return the event weight - depending on whether the flag IgnoreNegWeightsInTraining is or not...
void SetMsgType(EMsgType t)
TString GetElapsedTime(Bool_t Scientific=kTRUE)
returns pretty string with elapsed time
Double_t LossFunction(const Event &e) const
Implementation of squared-error ramp loss function (eq 39,40 in ref 1) This is used for binary Classi...
virtual Int_t Write(const char *name=0, Int_t option=0, Int_t bufsize=0)
Write this object to the current directory.
Double_t GetGDPathEveFrac() const
std::vector< Char_t > fGDErrTstOK
Double_t ErrorRateReg()
Estimates the error rate with the current set of parameters This code is pretty messy at the moment...
Double_t ErrorRateRoc()
Estimates the error rate with the current set of parameters.
Double_t CoefficientRadius()
Calculates sqrt(Sum(a_i^2)), i=1..N (NOTE do not include a0)
std::vector< Double_t > fGDOfsTst
void SetOffset(Double_t v=0.0)
void ClearCoefficients(Double_t val=0)
const TMVA::Event * GetRuleMapEvent(UInt_t evtidx) const
Double_t Optimism()
implementation of eq.
char * Form(const char *fmt,...)
Double_t Penalty() const
This is the "lasso" penalty To be used for regression.
void ClearLinCoefficients(Double_t val=0)
Bool_t IsRuleMapOK() const
void SetLinCoefficient(UInt_t i, Double_t v)
void SetCoefficients(const std::vector< Double_t > &v)
set all rule coefficients
void InitNtuple()
initializes the ntuple
Double_t GetTrainingEventWeight(UInt_t i) const
void CalcFStar()
Estimates F* (optimum scoring function) for all events for the given sets.
static constexpr double s
virtual ~RuleFitParams()
destructor
you should not use this method at all Int_t Int_t Double_t Double_t Double_t e
std::vector< std::vector< Double_t > > fGradVecLinTst
void UpdateCoefficients()
Establish maximum gradient for rules, linear terms and the offset.
ostringstream derivative to redirect and format output
Double_t ErrorRateRocRaw(std::vector< Double_t > &sFsig, std::vector< Double_t > &sFbkg)
Estimates the error rate with the current set of parameters.
RuleFitParams()
constructor
virtual Int_t Branch(TCollection *list, Int_t bufsize=32000, Int_t splitlevel=99, const char *name="")
Create one branch for each element in the collection.
std::vector< Double_t > fGradVecLin
Double_t GetOffset() const
std::vector< Double_t > fGradVec
Double_t EvalLinEvent() const
Short_t Max(Short_t a, Short_t b)
void Init()
Initializes all parameters using the RuleEnsemble and the training tree.
Double_t CalcAverageResponse()
calculate the average response - TODO : rewrite bad dependancy on EvaluateAverage() ! ...
RuleEnsemble * fRuleEnsemble
std::vector< TMVA::Rule * > & GetRules()
Bool_t IsSignal(const Event *ev) const
void DrawProgressBar(Int_t, const TString &comment="")
draws progress bar in color or B&W caution:
void GetCoefficients(std::vector< Double_t > &v)
Retrieve all rule coefficients.
void InitGD()
Initialize GD path search.
A TTree object has a header with a name and a title.
Double_t EvalEvent() const
void UpdateTstCoefficients()
Establish maximum gradient for rules, linear terms and the offset for all taus TODO: do not need inde...
const MethodRuleFit * GetMethodRuleFit() const
UInt_t RiskPerfTst()
Estimates the error rate with the current set of parameters.
std::vector< Double_t > fGDErrTst
Double_t Sqrt(Double_t x)
void EvaluateAverage(UInt_t ind1, UInt_t ind2, std::vector< Double_t > &avsel, std::vector< Double_t > &avrul)
evaluate the average of each variable and f(x) in the given range
void MakeGDPath()
The following finds the gradient directed path in parameter space.
Double_t RiskPerf() const
Timing information for training and evaluation of MVA methods.
Int_t Type(const Event *e) const
void CalcTstAverageResponse()
calc average response for all test paths - TODO: see comment under CalcAverageResponse() note that 0 ...