117 if (
fRuleFit->GetMethodRuleFit()==0) {
118 Log() << kFATAL <<
"RuleFitParams::Init() - MethodRuleFit ptr is null" <<
Endl;
166 Log() << kVERBOSE <<
"Path constr. - event index range = [ " <<
fPathIdx1 <<
", " <<
fPathIdx2 <<
" ]"
168 Log() << kVERBOSE <<
"Error estim. - event index range = [ " <<
fPerfIdx1 <<
", " <<
fPerfIdx2 <<
" ]"
172 Log() << kDEBUG <<
"Number of rules in ensemble: " <<
fNRules <<
Endl;
174 Log() << kDEBUG <<
"Rules are disabled " <<
Endl;
179 Log() << kDEBUG <<
"Linear terms are disabled " <<
Endl;
187 fGDNtuple=
new TTree(
"MonitorNtuple_RuleFitParams",
"RuleFit path search");
209 std::vector<Double_t> &avsel,
210 std::vector<Double_t> &avrul )
212 UInt_t neve = ind2-ind1+1;
214 Log() << kFATAL <<
"<EvaluateAverage> - no events selected for path search -> BUG!" <<
Endl;
222 const std::vector<UInt_t> *eventRuleMap=0;
229 for (
UInt_t i=ind1; i<ind2+1; i++) {
230 ew =
fRuleFit->GetTrainingEventWeight(i);
239 nrules = (*eventRuleMap).size();
242 avrul[(*eventRuleMap)[
r]] += ew;
247 const std::vector<const Event *> *events = &(
fRuleFit->GetTrainingEvents());
248 for (
UInt_t i=ind1; i<ind2+1; i++) {
249 ew =
fRuleFit->GetTrainingEventWeight(i);
266 avsel[sel] = avsel[sel] / sumew;
270 avrul[
r] = avrul[
r] / sumew;
283 return diff*diff*
e.GetWeight();
295 return diff*diff*
fRuleFit->GetTrainingEventWeight(evtidx);
308 return diff*diff*
fRuleFit->GetTrainingEventWeight(evtidx);
316 UInt_t neve = ind2-ind1+1;
318 Log() << kFATAL <<
"<Risk> Invalid start/end indices! BUG!!!" <<
Endl;
323 for (
UInt_t i=ind1; i<ind2+1; i++) {
336 UInt_t neve = ind2-ind1+1;
338 Log() << kFATAL <<
"<Risk> Invalid start/end indices! BUG!!!" <<
Endl;
343 for (
UInt_t i=ind1; i<ind2+1; i++) {
358 Log() << kWARNING <<
"<Penalty> Using unverified code! Check!" <<
Endl;
360 const std::vector<Double_t> *lincoeff = & (
fRuleEnsemble->GetLinCoefficients());
455 Log() << kFATAL <<
"BUG! FindGDTau() has been called BEFORE InitGD()." <<
Endl;
457 Log() << kINFO <<
"Estimating the cutoff parameter tau. The estimated time is a pessimistic maximum." <<
Endl;
477 Timer timer( nscan,
"RuleFit" );
486 if ( (ip==0) || ((ip+1)%netst==0) ) {
495 if (
Log().GetMinType()>kVERBOSE)
503 Log() << kERROR <<
"<FindGDTau> number of scanned loops is zero! Should NOT see this message." <<
Endl;
509 Log() << kINFO <<
"Best path found with tau = " <<
Form(
"%4.4f",
fGDTau)
540 Log() << kINFO <<
"GD path scan - the scan stops when the max num. of steps is reached or a min is found"
546 const Bool_t isVerbose = (
Log().GetMinType()<=kVERBOSE);
547 const Bool_t isDebug = (
Log().GetMinType()<=kDEBUG);
557 Log() << kVERBOSE <<
"Creating GD path" <<
Endl;
577 std::vector<Double_t> coefsMin;
578 std::vector<Double_t> lincoefsMin;
593 std::vector<Double_t> valx;
594 std::vector<Double_t> valy;
595 std::vector<Double_t> valxy;
607 if (imod>100) imod=100;
618 Log() << kVERBOSE <<
"Obtained initial offset = " << offsetMin <<
Endl;
636 Int_t stopCondition=0;
638 Log() << kINFO <<
"Fitting model..." <<
Endl;
644 if (isVerbose) t0 = clock();
647 tgradvec =
Double_t(clock()-t0)/CLOCKS_PER_SEC;
648 stgradvec += tgradvec;
652 if (isVerbose) t0 = clock();
655 tupgrade =
Double_t(clock()-t0)/CLOCKS_PER_SEC;
656 stupgrade += tupgrade;
660 docheck = ((iloop==0) ||((iloop+1)%imod==0));
677 trisk =
Double_t(clock()-t0)/CLOCKS_PER_SEC;
687 Log() <<
"Risk(i+1)>=Risk(i) in path" <<
Endl;
688 riskFlat=(nbadrisk>3);
690 Log() << kWARNING <<
"Chaotic behaviour of risk evolution" <<
Endl;
691 Log() <<
"--- STOPPING MINIMISATION ---" <<
Endl;
692 Log() <<
"This may be OK if minimum is already found" <<
Endl;
702 if (isVerbose) t0 = clock();
715 tperf =
Double_t(clock()-t0)/CLOCKS_PER_SEC;
734 if (valx.size()==npreg) {
735 valx.erase(valx.begin());
736 valy.erase(valy.begin());
737 valxy.erase(valxy.begin());
748 Log() << kVERBOSE <<
"ParamsIRE : "
750 <<
Form(
"%8d",iloop+1) <<
" "
752 <<
Form(
"%4.4f",riskPerf) <<
" "
770 if ( ((riskFlat) || (endOfLoop)) && (!found) ) {
774 else if (endOfLoop) {
778 Log() << kWARNING <<
"BUG TRAP: should not be here - still, this bug is harmless;)" <<
Endl;
792 Log() << kINFO <<
"----------------------------------------------------------------" <<
Endl;
793 Log() << kINFO <<
"Found minimum at step " << indMin+1 <<
" with error = " << errmin <<
Endl;
794 Log() << kINFO <<
"Reason for ending loop: ";
795 switch (stopCondition) {
797 Log() << kINFO <<
"clear minima found";
800 Log() << kINFO <<
"chaotic behaviour of risk";
803 Log() << kINFO <<
"end of loop reached";
806 Log() << kINFO <<
"unknown!";
810 Log() << kINFO <<
"----------------------------------------------------------------" <<
Endl;
814 Log() << kWARNING <<
"Reached minimum early in the search" <<
Endl;
815 Log() <<
"Check results and maybe decrease GDStep size" <<
Endl;
825 Log() << kINFO <<
"The error rate was still decreasing at the end of the path" <<
Endl;
826 Log() << kINFO <<
"Increase number of steps (GDNSteps)." <<
Endl;
837 Log() << kFATAL <<
"BUG TRAP: minimum not found in MakeGDPath()" <<
Endl;
844 Double_t stloop = strisk +stupgrade + stgradvec + stperf;
845 Log() << kVERBOSE <<
"Timing per loop (ms):" <<
Endl;
846 Log() << kVERBOSE <<
" gradvec = " << 1000*stgradvec/iloop <<
Endl;
847 Log() << kVERBOSE <<
" upgrade = " << 1000*stupgrade/iloop <<
Endl;
848 Log() << kVERBOSE <<
" risk = " << 1000*strisk/iloop <<
Endl;
849 Log() << kVERBOSE <<
" perf = " << 1000*stperf/iloop <<
Endl;
850 Log() << kVERBOSE <<
" loop = " << 1000*stloop/iloop <<
Endl;
853 Log() << kVERBOSE <<
" GDPtr = " << 1000*
gGDPtr/iloop <<
Endl;
887 Log() << kWARNING <<
"<CalcFStar> Using unverified code! Check!" <<
Endl;
890 Log() << kFATAL <<
"<CalcFStar> Invalid start/end indices!" <<
Endl;
894 const std::vector<const Event *> *events = &(
fRuleFit->GetTrainingEvents());
897 std::vector<Double_t> fstarSorted;
901 const Event&
e = *(*events)[i];
903 fFstar.push_back(fstarVal);
904 fstarSorted.push_back(fstarVal);
908 std::sort( fstarSorted.begin(), fstarSorted.end() );
911 fFstarMedian = 0.5*(fstarSorted[ind]+fstarSorted[ind-1]);
927 Log() << kWARNING <<
"<Optimism> Using unverified code! Check!" <<
Endl;
930 Log() << kFATAL <<
"<Optimism> Invalid start/end indices!" <<
Endl;
933 const std::vector<const Event *> *events = &(
fRuleFit->GetTrainingEvents());
944 const Event&
e = *(*events)[i];
946 y = (
fRuleFit->GetMethodRuleFit()->DataInfo().IsSignal(&
e) ? 1.0:-1.0);
950 sumyhaty += w*yhat*
y;
954 Double_t cov = sumyhaty - sumyhat*sumy;
966 Log() << kWARNING <<
"<ErrorRateReg> Using unverified code! Check!" <<
Endl;
969 Log() << kFATAL <<
"<ErrorRateReg> Invalid start/end indices!" <<
Endl;
971 if (
fFstar.size()!=neve) {
972 Log() << kFATAL <<
"--- RuleFitParams::ErrorRateReg() - F* not initialized! BUG!!!"
973 <<
" Fstar.size() = " <<
fFstar.size() <<
" , N(events) = " << neve <<
Endl;
978 const std::vector<const Event *> *events = &(
fRuleFit->GetTrainingEvents());
988 const Event&
e = *(*events)[i];
997 return sumdf/sumdfmed;
1010 Log() << kWARNING <<
"<ErrorRateBin> Using unverified code! Check!" <<
Endl;
1013 Log() << kFATAL <<
"<ErrorRateBin> Invalid start/end indices!" <<
Endl;
1016 const std::vector<const Event *> *events = &(
fRuleFit->GetTrainingEvents());
1024 const Event&
e = *(*events)[i];
1027 signF = (sF>0 ? +1:-1);
1029 signy = (
fRuleFit->GetMethodRuleFit()->DataInfo().IsSignal(&
e) ? +1:-1);
1043 std::vector<Double_t> & sFbkg )
1046 std::sort(sFsig.begin(), sFsig.end());
1047 std::sort(sFbkg.begin(), sFbkg.end());
1048 const Double_t minsig = sFsig.front();
1049 const Double_t minbkg = sFbkg.front();
1050 const Double_t maxsig = sFsig.back();
1051 const Double_t maxbkg = sFbkg.back();
1052 const Double_t minf = std::min(minsig,minbkg);
1053 const Double_t maxf = std::max(maxsig,maxbkg);
1056 const Int_t np = std::min((nsig+nbkg)/4,50);
1057 const Double_t df = (maxf-minf)/(np-1);
1062 std::vector<Double_t>::const_iterator indit;
1076 for (
Int_t i=0; i<np; i++) {
1078 indit = std::find_if(sFsig.begin(), sFsig.end(),
1079 std::bind(std::greater_equal<Double_t>(), std::placeholders::_1, fcut));
1080 nesig = sFsig.end()-indit;
1082 indit = std::find_if(sFbkg.begin(), sFbkg.end(),
1083 std::bind(std::greater_equal<Double_t>(), std::placeholders::_1, fcut));
1084 nrbkg = indit-sFbkg.begin();
1096 area += 0.5*(1+rejb)*effs;
1109 Log() << kWARNING <<
"<ErrorRateRoc> Should not be used in the current version! Check!" <<
Endl;
1112 Log() << kFATAL <<
"<ErrorRateRoc> Invalid start/end indices!" <<
Endl;
1115 const std::vector<const Event *> *events = &(
fRuleFit->GetTrainingEvents());
1119 std::vector<Double_t> sFsig;
1120 std::vector<Double_t> sFbkg;
1127 const Event&
e = *(*events)[i];
1129 if (
fRuleFit->GetMethodRuleFit()->DataInfo().IsSignal(&
e)) {
1130 sFsig.push_back(sF);
1135 sFbkg.push_back(sF);
1140 fsigave = sumfsig/sFsig.size();
1141 fbkgave = sumfbkg/sFbkg.size();
1157 Log() << kWARNING <<
"<ErrorRateRocTst> Should not be used in the current version! Check!" <<
Endl;
1160 Log() << kFATAL <<
"<ErrorRateRocTst> Invalid start/end indices!" <<
Endl;
1164 const std::vector<const Event *> *events = &(
fRuleFit->GetTrainingEvents());
1168 std::vector< std::vector<Double_t> > sFsig;
1169 std::vector< std::vector<Double_t> > sFbkg;
1180 if (
fRuleFit->GetMethodRuleFit()->DataInfo().IsSignal((*events)[i])) {
1181 sFsig[itau].push_back(sF);
1184 sFbkg[itau].push_back(sF);
1205 Log() << kFATAL <<
"<ErrorRateRocTst> Invalid start/end indices!" <<
Endl;
1245 Log() << kVERBOSE <<
"TAU: "
1263 Log() << kFATAL <<
"<MakeTstGradientVector> Invalid start/end indices!" <<
Endl;
1269 const std::vector<const Event *> *events = &(
fRuleFit->GetTrainingEvents());
1287 const std::vector<UInt_t> *eventRuleMap=0;
1293 const Event *
e = (*events)[i];
1297 nrules = (*eventRuleMap).size();
1306 y = (
fRuleFit->GetMethodRuleFit()->DataInfo().IsSignal(
e)?1.0:-1.0);
1307 r = norm*(
y - sF) *
fRuleFit->GetTrainingEventWeight(i);
1309 for (
UInt_t ir=0; ir<nrules; ir++) {
1310 rind = (*eventRuleMap)[ir];
1340 Double_t maxv = (maxr>maxl ? maxr:maxl);
1383 Log() << kFATAL <<
"<MakeGradientVector> Invalid start/end indices!" <<
Endl;
1389 const std::vector<const Event *> *events = &(
fRuleFit->GetTrainingEvents());
1403 const std::vector<UInt_t> *eventRuleMap=0;
1409 const Event *
e = (*events)[i];
1418 nrules = (*eventRuleMap).size();
1420 y = (
fRuleFit->GetMethodRuleFit()->DataInfo().IsSignal(
e)?1.0:-1.0);
1421 r = norm*(
y - sF) *
fRuleFit->GetTrainingEventWeight(i);
1423 for (
UInt_t ir=0; ir<nrules; ir++) {
1424 rind = (*eventRuleMap)[ir];
1448 Double_t maxv = (maxr>maxl ? maxr:maxl);
1456 useRThresh = cthresh;
1457 useLThresh = cthresh;
1530 Log() << kFATAL <<
"<CalcAverageTruth> Invalid start/end indices!" <<
Endl;
1536 const std::vector<const Event *> *events = &(
fRuleFit->GetTrainingEvents());
1539 if (
fRuleFit->GetMethodRuleFit()->DataInfo().IsSignal((*events)[i])) ensig += ew;
1541 sum += ew*(
fRuleFit->GetMethodRuleFit()->DataInfo().IsSignal((*events)[i])?1.0:-1.0);
1543 Log() << kVERBOSE <<
"Effective number of signal / background = " << ensig <<
" / " << enbkg <<
Endl;
1551 return (
fRuleFit->GetMethodRuleFit()->DataInfo().IsSignal(
e) ? 1:-1);
int Int_t
Signed integer 4 bytes (int).
unsigned int UInt_t
Unsigned integer 4 bytes (unsigned int).
bool Bool_t
Boolean (0=false, 1=true) (bool).
double Double_t
Double 8 bytes.
char * Form(const char *fmt,...)
Formats a string in a circular formatting buffer.
ostringstream derivative to redirect and format output
void CalcTstAverageResponse()
calc average response for all test paths - TODO: see comment under CalcAverageResponse() note that 0 ...
void MakeGDPath()
The following finds the gradient directed path in parameter space.
std::vector< std::vector< Double_t > > fGDCoefLinTst
linear coeffs - one per tau
void EvaluateAverage(UInt_t ind1, UInt_t ind2, std::vector< Double_t > &avsel, std::vector< Double_t > &avrul)
evaluate the average of each variable and f(x) in the given range
RuleFit * fRuleFit
rule fit
Double_t fGDTau
selected threshold parameter (tau in eq 26, ref 1)
Double_t fNTCoefRad
GD path: 'radius' of all rulecoeffs.
Double_t * fNTLinCoeff
GD path: linear coefficients.
UInt_t RiskPerfTst()
Estimates the error rate with the current set of parameters.
Int_t fGDNPathSteps
number of path steps
Double_t Risk(UInt_t ind1, UInt_t ind2, Double_t neff) const
risk assessment
Double_t Optimism()
implementation of eq.
Int_t FindGDTau()
This finds the cutoff parameter tau by scanning several different paths.
UInt_t fPerfIdx2
last event index for performance evaluation
std::vector< Double_t > fAverageSelectorPath
average of each variable over the range fPathIdx1,2
void EvaluateAveragePerf()
UInt_t fGDTauScan
number scan for tau-paths
std::vector< Double_t > fGradVecLin
gradient vector - dimension = number of variables
Double_t fsigrms
Rms of F(sig).
Double_t fNTRisk
GD path: risk.
virtual ~RuleFitParams()
destructor
RuleFitParams()
constructor
Double_t * fNTCoeff
GD path: rule coefficients.
void Init()
Initializes all parameters using the RuleEnsemble and the training tree.
Double_t CalcAverageResponse()
calculate the average response - TODO : rewrite bad dependancy on EvaluateAverage() !
Double_t fNEveEffPerf
idem for Perf events
void SetMsgType(EMsgType t)
Double_t Penalty() const
This is the "lasso" penalty To be used for regression.
std::vector< std::vector< Double_t > > fGradVecLinTst
gradient vector, linear terms - one per tau
Double_t fNEveEffPath
sum of weights for Path events
std::vector< std::vector< Double_t > > fGDCoefTst
rule coeffs - one per tau
Double_t RiskPath() const
MsgLogger * fLogger
! message logger
void FillCoefficients()
helper function to store the rule coefficients in local arrays
std::vector< Double_t > fGDTauVec
the tau's
Double_t LossFunction(const Event &e) const
Implementation of squared-error ramp loss function (eq 39,40 in ref 1) This is used for binary Classi...
Double_t fsigave
Sigma of current signal score function F(sig).
UInt_t fPathIdx2
last event index for path search
void InitGD()
Initialize GD path search.
UInt_t fPathIdx1
first event index for path search
void EvaluateAveragePath()
Double_t fNTNuval
GD path: value of nu.
Int_t Type(const Event *e) const
TTree * fGDNtuple
Gradient path ntuple, contains params for each step along the path.
std::vector< Double_t > fGDErrTst
error rates per tau
void ErrorRateRocTst()
Estimates the error rate with the current set of parameters.
std::vector< Double_t > fGradVec
gradient vector - dimension = number of rules in ensemble
Double_t fGDTauMax
max threshold parameter (tau in eq 26, ref 1)
std::vector< std::vector< Double_t > > fGradVecTst
gradient vector - one per tau
std::vector< Char_t > fGDErrTstOK
error rate is sufficiently low <— stores boolean
RuleEnsemble * fRuleEnsemble
rule ensemble
void CalcFStar()
Estimates F* (optimum scoring function) for all events for the given sets.
void MakeTstGradientVector()
make test gradient vector for all tau same algorithm as MakeGradientVector()
Double_t fbkgave
Average of F(bkg).
Double_t CalcAverageTruth()
calculate the average truth
Double_t fFstarMedian
median value of F*() using
Double_t fbkgrms
Rms of F(bkg).
Double_t fAverageTruth
average truth, ie sum(y)/N, y=+-1
std::vector< Double_t > fFstar
vector of F*() - filled in CalcFStar()
void UpdateTstCoefficients()
Establish maximum gradient for rules, linear terms and the offset for all taus TODO: do not need inde...
Double_t fGDErrScale
stop scan at error = scale*errmin
Double_t RiskPerf() const
void MakeGradientVector()
make gradient vector
void UpdateCoefficients()
Establish maximum gradient for rules, linear terms and the offset.
Double_t fGDTauMin
min threshold parameter (tau in eq 26, ref 1)
UInt_t fNLinear
number of linear terms
void InitNtuple()
initializes the ntuple
UInt_t fPerfIdx1
first event index for performance evaluation
Double_t ErrorRateBin()
Estimates the error rate with the current set of parameters It uses a binary estimate of (y-F*(x)) (y...
Double_t fGDTauPrec
precision in tau
Double_t ErrorRateReg()
Estimates the error rate with the current set of parameters This code is pretty messy at the moment.
std::vector< Double_t > fGDOfsTst
offset per tau
Double_t fNTOffset
GD path: model offset.
UInt_t fGDNTau
number of tau-paths - calculated in SetGDTauPrec
Double_t fGDPathStep
step size along path (delta nu in eq 22, ref 1)
Double_t ErrorRateRoc()
Estimates the error rate with the current set of parameters.
UInt_t fNRules
number of rules
Double_t ErrorRateRocRaw(std::vector< Double_t > &sFsig, std::vector< Double_t > &sFbkg)
Estimates the error rate with the current set of parameters.
UInt_t fGDNTauTstOK
number of tau in the test-phase that are ok
Double_t fNTErrorRate
GD path: error rate (or performance).
std::vector< Double_t > fAverageRulePath
average of each rule, same range
Timing information for training and evaluation of MVA methods.
TString GetElapsedTime(Bool_t Scientific=kTRUE)
returns pretty string with elapsed time
void DrawProgressBar(Int_t, const TString &comment="")
draws progress bar in color or B&W caution:
static TString Format(const char *fmt,...)
Static method which formats a string using a printf style format descriptor and return a TString.
A TTree represents a columnar dataset.
MsgLogger & Endl(MsgLogger &ml)
Short_t Max(Short_t a, Short_t b)
Returns the largest of a and b.
Double_t Sqrt(Double_t x)
Returns the square root of x.
Short_t Min(Short_t a, Short_t b)
Returns the smallest of a and b.
Short_t Abs(Short_t d)
Returns the absolute value of parameter Short_t d.
static uint64_t sum(uint64_t i)