80 , fGDPathStep ( 0.01 )
81 , fGDNPathSteps ( 1000 )
106 if (fNTCoeff) {
delete [] fNTCoeff; fNTCoeff =
nullptr; }
107 if (fNTLinCoeff) {
delete [] fNTLinCoeff; fNTLinCoeff =
nullptr; }
116 if (fRuleFit==0)
return;
117 if (fRuleFit->GetMethodRuleFit()==0) {
118 Log() << kFATAL <<
"RuleFitParams::Init() - MethodRuleFit ptr is null" <<
Endl;
120 UInt_t neve = fRuleFit->GetTrainingEvents().size();
122 fRuleEnsemble = fRuleFit->GetRuleEnsemblePtr();
123 fNRules = fRuleEnsemble->GetNRules();
124 fNLinear = fRuleEnsemble->GetNLinear();
133 fPerfIdx2 =
static_cast<UInt_t>((neve-1)*fRuleFit->GetMethodRuleFit()->GetGDValidEveFrac());
138 ofs = neve - fPerfIdx2 - 1;
148 fPathIdx2 =
static_cast<UInt_t>((neve-1)*fRuleFit->GetMethodRuleFit()->GetGDPathEveFrac());
157 for (
UInt_t ie=fPathIdx1; ie<fPathIdx2+1; ie++) {
158 fNEveEffPath += fRuleFit->GetTrainingEventWeight(ie);
162 for (
UInt_t ie=fPerfIdx1; ie<fPerfIdx2+1; ie++) {
163 fNEveEffPerf += fRuleFit->GetTrainingEventWeight(ie);
166 Log() << kVERBOSE <<
"Path constr. - event index range = [ " << fPathIdx1 <<
", " << fPathIdx2 <<
" ]"
167 <<
", effective N(events) = " << fNEveEffPath <<
Endl;
168 Log() << kVERBOSE <<
"Error estim. - event index range = [ " << fPerfIdx1 <<
", " << fPerfIdx2 <<
" ]"
169 <<
", effective N(events) = " << fNEveEffPerf <<
Endl;
171 if (fRuleEnsemble->DoRules())
172 Log() << kDEBUG <<
"Number of rules in ensemble: " << fNRules <<
Endl;
174 Log() << kDEBUG <<
"Rules are disabled " <<
Endl;
176 if (fRuleEnsemble->DoLinear())
177 Log() << kDEBUG <<
"Number of linear terms: " << fNLinear <<
Endl;
179 Log() << kDEBUG <<
"Linear terms are disabled " <<
Endl;
187 fGDNtuple=
new TTree(
"MonitorNtuple_RuleFitParams",
"RuleFit path search");
188 fGDNtuple->Branch(
"risk", &fNTRisk,
"risk/D");
189 fGDNtuple->Branch(
"error", &fNTErrorRate,
"error/D");
190 fGDNtuple->Branch(
"nuval", &fNTNuval,
"nuval/D");
191 fGDNtuple->Branch(
"coefrad", &fNTCoefRad,
"coefrad/D");
192 fGDNtuple->Branch(
"offset", &fNTOffset,
"offset/D");
194 fNTCoeff = (fNRules >0 ?
new Double_t[fNRules] : 0);
195 fNTLinCoeff = (fNLinear>0 ?
new Double_t[fNLinear] : 0);
197 for (
UInt_t i=0; i<fNRules; i++) {
198 fGDNtuple->Branch(
Form(
"a%d",i+1),&fNTCoeff[i],
Form(
"a%d/D",i+1));
200 for (
UInt_t i=0; i<fNLinear; i++) {
201 fGDNtuple->Branch(
Form(
"b%d",i+1),&fNTLinCoeff[i],
Form(
"b%d/D",i+1));
209 std::vector<Double_t> &avsel,
210 std::vector<Double_t> &avrul )
212 UInt_t neve = ind2-ind1+1;
214 Log() << kFATAL <<
"<EvaluateAverage> - no events selected for path search -> BUG!" <<
Endl;
220 if (fNLinear>0) avsel.resize(fNLinear,0);
221 if (fNRules>0) avrul.resize(fNRules,0);
222 const std::vector<UInt_t> *eventRuleMap=0;
228 if (fRuleEnsemble->IsRuleMapOK()) {
229 for (
UInt_t i=ind1; i<ind2+1; i++) {
230 ew = fRuleFit->GetTrainingEventWeight(i);
232 for (
UInt_t sel=0; sel<fNLinear; sel++ ) {
233 avsel[sel] += ew*fRuleEnsemble->EvalLinEvent(i,sel);
237 if (fRuleEnsemble->DoRules()) {
238 eventRuleMap = &(fRuleEnsemble->GetEventRuleMap(i));
239 nrules = (*eventRuleMap).size();
242 avrul[(*eventRuleMap)[
r]] += ew;
247 const std::vector<const Event *> *events = &(fRuleFit->GetTrainingEvents());
248 for (
UInt_t i=ind1; i<ind2+1; i++) {
249 ew = fRuleFit->GetTrainingEventWeight(i);
252 fRuleEnsemble->EvalLinEvent(*((*events)[i]));
253 fRuleEnsemble->EvalEvent(*((*events)[i]));
255 for (
UInt_t sel=0; sel<fNLinear; sel++ ) {
256 avsel[sel] += ew*fRuleEnsemble->GetEventLinearValNorm(sel);
260 avrul[
r] += ew*fRuleEnsemble->GetEventRuleVal(
r);
265 for (
UInt_t sel=0; sel<fNLinear; sel++ ) {
266 avsel[sel] = avsel[sel] / sumew;
270 avrul[
r] = avrul[
r] / sumew;
281 Double_t diff = (fRuleFit->GetMethodRuleFit()->DataInfo().IsSignal(&
e)?1:-1) -
h;
283 return diff*diff*
e.GetWeight();
293 Double_t diff = (fRuleFit->GetMethodRuleFit()->DataInfo().IsSignal(fRuleEnsemble->GetRuleMapEvent( evtidx ))?1:-1) -
h;
295 return diff*diff*fRuleFit->GetTrainingEventWeight(evtidx);
304 Double_t e = fRuleEnsemble->EvalEvent( evtidx , fGDOfsTst[itau], fGDCoefTst[itau], fGDCoefLinTst[itau]);
306 Double_t diff = (fRuleFit->GetMethodRuleFit()->DataInfo().IsSignal(fRuleEnsemble->GetRuleMapEvent( evtidx ))?1:-1) -
h;
308 return diff*diff*fRuleFit->GetTrainingEventWeight(evtidx);
316 UInt_t neve = ind2-ind1+1;
318 Log() << kFATAL <<
"<Risk> Invalid start/end indices! BUG!!!" <<
Endl;
323 for (
UInt_t i=ind1; i<ind2+1; i++) {
336 UInt_t neve = ind2-ind1+1;
338 Log() << kFATAL <<
"<Risk> Invalid start/end indices! BUG!!!" <<
Endl;
343 for (
UInt_t i=ind1; i<ind2+1; i++) {
358 Log() << kWARNING <<
"<Penalty> Using unverified code! Check!" <<
Endl;
360 const std::vector<Double_t> *lincoeff = & (fRuleEnsemble->GetLinCoefficients());
361 for (
UInt_t i=0; i<fNRules; i++) {
362 rval +=
TMath::Abs(fRuleEnsemble->GetRules(i)->GetCoefficient());
364 for (
UInt_t i=0; i<fNLinear; i++) {
391 fGDTauVec.resize( fGDNTau );
393 fGDTauVec[0] = fGDTau;
398 for (
UInt_t itau=0; itau<fGDNTau; itau++) {
399 fGDTauVec[itau] =
static_cast<Double_t>(itau)*dtau + fGDTauMin;
400 if (fGDTauVec[itau]>1.0) fGDTauVec[itau]=1.0;
408 fGradVecLinTst.clear();
413 fGDCoefLinTst.clear();
417 fGDCoefTst.resize(fGDNTau);
418 fGradVec.resize(fNRules,0);
419 fGradVecTst.resize(fGDNTau);
420 for (
UInt_t i=0; i<fGDNTau; i++) {
421 fGradVecTst[i].resize(fNRules,0);
422 fGDCoefTst[i].resize(fNRules,0);
427 fGDCoefLinTst.resize(fGDNTau);
428 fGradVecLin.resize(fNLinear,0);
429 fGradVecLinTst.resize(fGDNTau);
430 for (
UInt_t i=0; i<fGDNTau; i++) {
431 fGradVecLinTst[i].resize(fNLinear,0);
432 fGDCoefLinTst[i].resize(fNLinear,0);
437 fGDErrTst.resize(fGDNTau,0);
438 fGDErrTstOK.resize(fGDNTau,
kTRUE);
439 fGDOfsTst.resize(fGDNTau,0);
440 fGDNTauTstOK = fGDNTau;
451 if (fGDNTau<2)
return 0;
452 if (fGDTauScan==0)
return 0;
454 if (fGDOfsTst.size()<1)
455 Log() << kFATAL <<
"BUG! FindGDTau() has been called BEFORE InitGD()." <<
Endl;
457 Log() << kINFO <<
"Estimating the cutoff parameter tau. The estimated time is a pessimistic maximum." <<
Endl;
460 UInt_t nscan = fGDTauScan;
477 Timer timer( nscan,
"RuleFit" );
480 MakeTstGradientVector();
482 UpdateTstCoefficients();
486 if ( (ip==0) || ((ip+1)%netst==0) ) {
488 itauMin = RiskPerfTst();
489 Log() << kVERBOSE <<
Form(
"%4d",ip+1) <<
". tau = " <<
Form(
"%4.4f",fGDTauVec[itauMin])
490 <<
" => error rate = " << fGDErrTst[itauMin] <<
Endl;
493 doloop = ((ip<nscan) && (fGDNTauTstOK>3));
495 if (Log().GetMinType()>kVERBOSE)
503 Log() << kERROR <<
"<FindGDTau> number of scanned loops is zero! Should NOT see this message." <<
Endl;
505 fGDTau = fGDTauVec[itauMin];
506 fRuleEnsemble->SetCoefficients( fGDCoefTst[itauMin] );
507 fRuleEnsemble->SetLinCoefficients( fGDCoefLinTst[itauMin] );
508 fRuleEnsemble->SetOffset( fGDOfsTst[itauMin] );
509 Log() << kINFO <<
"Best path found with tau = " <<
Form(
"%4.4f",fGDTau)
540 Log() << kINFO <<
"GD path scan - the scan stops when the max num. of steps is reached or a min is found"
542 Log() << kVERBOSE <<
"Number of events used per path step = " << fPathIdx2-fPathIdx1+1 <<
Endl;
543 Log() << kVERBOSE <<
"Number of events used for error estimation = " << fPerfIdx2-fPerfIdx1+1 <<
Endl;
546 const Bool_t isVerbose = (Log().GetMinType()<=kVERBOSE);
547 const Bool_t isDebug = (Log().GetMinType()<=kDEBUG);
553 EvaluateAveragePath();
554 EvaluateAveragePerf();
557 Log() << kVERBOSE <<
"Creating GD path" <<
Endl;
558 Log() << kVERBOSE <<
" N(steps) = " << fGDNPathSteps <<
Endl;
559 Log() << kVERBOSE <<
" step = " << fGDPathStep <<
Endl;
560 Log() << kVERBOSE <<
" N(tau) = " << fGDNTau <<
Endl;
561 Log() << kVERBOSE <<
" N(tau steps) = " << fGDTauScan <<
Endl;
562 Log() << kVERBOSE <<
" tau range = [ " << fGDTauVec[0] <<
" , " << fGDTauVec[fGDNTau-1] <<
" ]" <<
Endl;
565 if (isDebug) InitNtuple();
577 std::vector<Double_t> coefsMin;
578 std::vector<Double_t> lincoefsMin;
593 std::vector<Double_t> valx;
594 std::vector<Double_t> valy;
595 std::vector<Double_t> valxy;
605 int imod = fGDNPathSteps/100;
606 if (imod<100) imod = std::min(100,fGDNPathSteps);
607 if (imod>100) imod=100;
610 fAverageTruth = -CalcAverageTruth();
611 offsetMin = fAverageTruth;
612 fRuleEnsemble->SetOffset(offsetMin);
613 fRuleEnsemble->ClearCoefficients(0);
614 fRuleEnsemble->ClearLinCoefficients(0);
615 for (
UInt_t i=0; i<fGDOfsTst.size(); i++) {
616 fGDOfsTst[i] = offsetMin;
618 Log() << kVERBOSE <<
"Obtained initial offset = " << offsetMin <<
Endl;
621 Int_t nprescan = FindGDTau();
636 Int_t stopCondition=0;
638 Log() << kINFO <<
"Fitting model..." <<
Endl;
640 Timer timer( fGDNPathSteps,
"RuleFit" );
644 if (isVerbose) t0 = clock();
645 MakeGradientVector();
647 tgradvec =
Double_t(clock()-t0)/CLOCKS_PER_SEC;
648 stgradvec += tgradvec;
652 if (isVerbose) t0 = clock();
653 UpdateCoefficients();
655 tupgrade =
Double_t(clock()-t0)/CLOCKS_PER_SEC;
656 stupgrade += tupgrade;
660 docheck = ((iloop==0) ||((iloop+1)%imod==0));
666 fNTNuval =
Double_t(iloop)*fGDPathStep;
671 if (isDebug) FillCoefficients();
672 fNTCoefRad = fRuleEnsemble->CoefficientRadius();
676 fNTRisk = RiskPath();
677 trisk =
Double_t(clock()-t0)/CLOCKS_PER_SEC;
684 if (fNTRisk>=
rprev) {
687 Log() <<
"Risk(i+1)>=Risk(i) in path" <<
Endl;
688 riskFlat=(nbadrisk>3);
690 Log() << kWARNING <<
"Chaotic behaviour of risk evolution" <<
Endl;
691 Log() <<
"--- STOPPING MINIMISATION ---" <<
Endl;
692 Log() <<
"This may be OK if minimum is already found" <<
Endl;
702 if (isVerbose) t0 = clock();
712 fNTErrorRate = errroc;
715 tperf =
Double_t(clock()-t0)/CLOCKS_PER_SEC;
722 if (fNTErrorRate<=errmin) {
723 errmin = fNTErrorRate;
726 fRuleEnsemble->GetCoefficients(coefsMin);
727 lincoefsMin = fRuleEnsemble->GetLinCoefficients();
728 offsetMin = fRuleEnsemble->GetOffset();
730 if ( fNTErrorRate > fGDErrScale*errmin) found =
kTRUE;
734 if (valx.size()==npreg) {
735 valx.erase(valx.begin());
736 valy.erase(valy.begin());
737 valxy.erase(valxy.begin());
739 valx.push_back(fNTNuval);
740 valy.push_back(fNTErrorRate);
741 valxy.push_back(fNTErrorRate*fNTNuval);
746 if (isDebug) fGDNtuple->Fill();
748 Log() << kVERBOSE <<
"ParamsIRE : "
750 <<
Form(
"%8d",iloop+1) <<
" "
751 <<
Form(
"%4.4f",fNTRisk) <<
" "
752 <<
Form(
"%4.4f",riskPerf) <<
" "
753 <<
Form(
"%4.4f",fNTRisk+riskPerf) <<
" "
769 Bool_t endOfLoop = (iloop==fGDNPathSteps);
770 if ( ((riskFlat) || (endOfLoop)) && (!found) ) {
774 else if (endOfLoop) {
778 Log() << kWARNING <<
"BUG TRAP: should not be here - still, this bug is harmless;)" <<
Endl;
779 errmin = fNTErrorRate;
782 fRuleEnsemble->GetCoefficients(coefsMin);
783 lincoefsMin = fRuleEnsemble->GetLinCoefficients();
784 offsetMin = fRuleEnsemble->GetOffset();
791 Log() << kINFO <<
"Minimisation elapsed time : " << timer.
GetElapsedTime() <<
" " <<
Endl;
792 Log() << kINFO <<
"----------------------------------------------------------------" <<
Endl;
793 Log() << kINFO <<
"Found minimum at step " << indMin+1 <<
" with error = " << errmin <<
Endl;
794 Log() << kINFO <<
"Reason for ending loop: ";
795 switch (stopCondition) {
797 Log() << kINFO <<
"clear minima found";
800 Log() << kINFO <<
"chaotic behaviour of risk";
803 Log() << kINFO <<
"end of loop reached";
806 Log() << kINFO <<
"unknown!";
810 Log() << kINFO <<
"----------------------------------------------------------------" <<
Endl;
814 Log() << kWARNING <<
"Reached minimum early in the search" <<
Endl;
815 Log() <<
"Check results and maybe decrease GDStep size" <<
Endl;
825 Log() << kINFO <<
"The error rate was still decreasing at the end of the path" <<
Endl;
826 Log() << kINFO <<
"Increase number of steps (GDNSteps)." <<
Endl;
832 fRuleEnsemble->SetCoefficients( coefsMin );
833 fRuleEnsemble->SetLinCoefficients( lincoefsMin );
834 fRuleEnsemble->SetOffset( offsetMin );
837 Log() << kFATAL <<
"BUG TRAP: minimum not found in MakeGDPath()" <<
Endl;
844 Double_t stloop = strisk +stupgrade + stgradvec + stperf;
845 Log() << kVERBOSE <<
"Timing per loop (ms):" <<
Endl;
846 Log() << kVERBOSE <<
" gradvec = " << 1000*stgradvec/iloop <<
Endl;
847 Log() << kVERBOSE <<
" upgrade = " << 1000*stupgrade/iloop <<
Endl;
848 Log() << kVERBOSE <<
" risk = " << 1000*strisk/iloop <<
Endl;
849 Log() << kVERBOSE <<
" perf = " << 1000*stperf/iloop <<
Endl;
850 Log() << kVERBOSE <<
" loop = " << 1000*stloop/iloop <<
Endl;
852 Log() << kVERBOSE <<
" GDInit = " << 1000*
gGDInit/iloop <<
Endl;
853 Log() << kVERBOSE <<
" GDPtr = " << 1000*
gGDPtr/iloop <<
Endl;
854 Log() << kVERBOSE <<
" GDEval = " << 1000*
gGDEval/iloop <<
Endl;
855 Log() << kVERBOSE <<
" GDEvalRule = " << 1000*
gGDEvalRule/iloop <<
Endl;
856 Log() << kVERBOSE <<
" GDNorm = " << 1000*
gGDNorm/iloop <<
Endl;
857 Log() << kVERBOSE <<
" GDRuleLoop = " << 1000*
gGDRuleLoop/iloop <<
Endl;
858 Log() << kVERBOSE <<
" GDLinLoop = " << 1000*
gGDLinLoop/iloop <<
Endl;
862 if (isDebug) fGDNtuple->
Write();
870 fNTOffset = fRuleEnsemble->GetOffset();
872 for (
UInt_t i=0; i<fNRules; i++) {
873 fNTCoeff[i] = fRuleEnsemble->GetRules(i)->GetCoefficient();
875 for (
UInt_t i=0; i<fNLinear; i++) {
876 fNTLinCoeff[i] = fRuleEnsemble->GetLinCoefficients(i);
887 Log() << kWARNING <<
"<CalcFStar> Using unverified code! Check!" <<
Endl;
888 UInt_t neve = fPerfIdx2-fPerfIdx1+1;
890 Log() << kFATAL <<
"<CalcFStar> Invalid start/end indices!" <<
Endl;
894 const std::vector<const Event *> *events = &(fRuleFit->GetTrainingEvents());
897 std::vector<Double_t> fstarSorted;
900 for (
UInt_t i=fPerfIdx1; i<fPerfIdx2+1; i++) {
901 const Event&
e = *(*events)[i];
902 fstarVal = fRuleEnsemble->FStar(
e);
903 fFstar.push_back(fstarVal);
904 fstarSorted.push_back(fstarVal);
908 std::sort( fstarSorted.begin(), fstarSorted.end() );
911 fFstarMedian = 0.5*(fstarSorted[ind]+fstarSorted[ind-1]);
914 fFstarMedian = fstarSorted[ind];
927 Log() << kWARNING <<
"<Optimism> Using unverified code! Check!" <<
Endl;
928 UInt_t neve = fPerfIdx2-fPerfIdx1+1;
930 Log() << kFATAL <<
"<Optimism> Invalid start/end indices!" <<
Endl;
933 const std::vector<const Event *> *events = &(fRuleFit->GetTrainingEvents());
943 for (
UInt_t i=fPerfIdx1; i<fPerfIdx2+1; i++) {
944 const Event&
e = *(*events)[i];
945 yhat = fRuleEnsemble->EvalEvent(i);
946 y = (fRuleFit->GetMethodRuleFit()->DataInfo().IsSignal(&
e) ? 1.0:-1.0);
947 w = fRuleFit->GetTrainingEventWeight(i)/fNEveEffPerf;
950 sumyhaty += w*yhat*
y;
954 Double_t cov = sumyhaty - sumyhat*sumy;
966 Log() << kWARNING <<
"<ErrorRateReg> Using unverified code! Check!" <<
Endl;
967 UInt_t neve = fPerfIdx2-fPerfIdx1+1;
969 Log() << kFATAL <<
"<ErrorRateReg> Invalid start/end indices!" <<
Endl;
971 if (fFstar.size()!=neve) {
972 Log() << kFATAL <<
"--- RuleFitParams::ErrorRateReg() - F* not initialized! BUG!!!"
973 <<
" Fstar.size() = " << fFstar.size() <<
" , N(events) = " << neve <<
Endl;
978 const std::vector<const Event *> *events = &(fRuleFit->GetTrainingEvents());
987 for (
UInt_t i=fPerfIdx1; i<fPerfIdx2+1; i++) {
988 const Event&
e = *(*events)[i];
989 sF = fRuleEnsemble->EvalEvent(
e );
991 sumdf +=
TMath::Abs(fFstar[i-fPerfIdx1] - sF);
992 sumdfmed +=
TMath::Abs(fFstar[i-fPerfIdx1] - fFstarMedian);
997 return sumdf/sumdfmed;
1010 Log() << kWARNING <<
"<ErrorRateBin> Using unverified code! Check!" <<
Endl;
1011 UInt_t neve = fPerfIdx2-fPerfIdx1+1;
1013 Log() << kFATAL <<
"<ErrorRateBin> Invalid start/end indices!" <<
Endl;
1016 const std::vector<const Event *> *events = &(fRuleFit->GetTrainingEvents());
1023 for (
UInt_t i=fPerfIdx1; i<fPerfIdx2+1; i++) {
1024 const Event&
e = *(*events)[i];
1025 sF = fRuleEnsemble->EvalEvent(
e );
1027 signF = (sF>0 ? +1:-1);
1029 signy = (fRuleFit->GetMethodRuleFit()->DataInfo().IsSignal(&
e) ? +1:-1);
1043 std::vector<Double_t> & sFbkg )
1046 std::sort(sFsig.begin(), sFsig.end());
1047 std::sort(sFbkg.begin(), sFbkg.end());
1048 const Double_t minsig = sFsig.front();
1049 const Double_t minbkg = sFbkg.front();
1050 const Double_t maxsig = sFsig.back();
1051 const Double_t maxbkg = sFbkg.back();
1052 const Double_t minf = std::min(minsig,minbkg);
1053 const Double_t maxf = std::max(maxsig,maxbkg);
1056 const Int_t np = std::min((nsig+nbkg)/4,50);
1057 const Double_t df = (maxf-minf)/(np-1);
1062 std::vector<Double_t>::const_iterator indit;
1077 for (
Int_t i=0; i<np; i++) {
1079 indit = std::find_if(sFsig.begin(), sFsig.end(),
1080 std::bind(std::greater_equal<Double_t>(), std::placeholders::_1, fcut));
1081 nesig = sFsig.end()-indit;
1084 indit = std::find_if(sFbkg.begin(), sFbkg.end(),
1085 std::bind(std::greater_equal<Double_t>(), std::placeholders::_1, fcut));
1086 nrbkg = indit-sFbkg.begin();
1098 area += 0.5*(1+rejb)*effs;
1111 Log() << kWARNING <<
"<ErrorRateRoc> Should not be used in the current version! Check!" <<
Endl;
1112 UInt_t neve = fPerfIdx2-fPerfIdx1+1;
1114 Log() << kFATAL <<
"<ErrorRateRoc> Invalid start/end indices!" <<
Endl;
1117 const std::vector<const Event *> *events = &(fRuleFit->GetTrainingEvents());
1121 std::vector<Double_t> sFsig;
1122 std::vector<Double_t> sFbkg;
1128 for (
UInt_t i=fPerfIdx1; i<fPerfIdx2+1; i++) {
1129 const Event&
e = *(*events)[i];
1130 sF = fRuleEnsemble->EvalEvent(i);
1131 if (fRuleFit->GetMethodRuleFit()->DataInfo().IsSignal(&
e)) {
1132 sFsig.push_back(sF);
1137 sFbkg.push_back(sF);
1142 fsigave = sumfsig/sFsig.size();
1143 fbkgave = sumfbkg/sFbkg.size();
1147 return ErrorRateRocRaw( sFsig, sFbkg );
1159 Log() << kWARNING <<
"<ErrorRateRocTst> Should not be used in the current version! Check!" <<
Endl;
1160 UInt_t neve = fPerfIdx2-fPerfIdx1+1;
1162 Log() << kFATAL <<
"<ErrorRateRocTst> Invalid start/end indices!" <<
Endl;
1166 const std::vector<const Event *> *events = &(fRuleFit->GetTrainingEvents());
1170 std::vector< std::vector<Double_t> > sFsig;
1171 std::vector< std::vector<Double_t> > sFbkg;
1173 sFsig.resize( fGDNTau );
1174 sFbkg.resize( fGDNTau );
1177 for (
UInt_t i=fPerfIdx1; i<fPerfIdx2+1; i++) {
1178 for (
UInt_t itau=0; itau<fGDNTau; itau++) {
1181 sF = fRuleEnsemble->EvalEvent( i, fGDOfsTst[itau], fGDCoefTst[itau], fGDCoefLinTst[itau] );
1182 if (fRuleFit->GetMethodRuleFit()->DataInfo().IsSignal((*events)[i])) {
1183 sFsig[itau].push_back(sF);
1186 sFbkg[itau].push_back(sF);
1192 for (
UInt_t itau=0; itau<fGDNTau; itau++) {
1193 err = ErrorRateRocRaw( sFsig[itau], sFbkg[itau] );
1194 fGDErrTst[itau] = err;
1205 UInt_t neve = fPerfIdx2-fPerfIdx1+1;
1207 Log() << kFATAL <<
"<ErrorRateRocTst> Invalid start/end indices!" <<
Endl;
1217 for (
UInt_t itau=0; itau<fGDNTau; itau++) {
1218 if (fGDErrTstOK[itau]) {
1220 fGDErrTst[itau] = RiskPerf(itau);
1221 sumx += fGDErrTst[itau];
1222 sumx2 += fGDErrTst[itau]*fGDErrTst[itau];
1223 if (fGDErrTst[itau]>maxx) maxx=fGDErrTst[itau];
1224 if (fGDErrTst[itau]<minx) {
1225 minx=fGDErrTst[itau];
1235 for (
UInt_t itau=0; itau<fGDNTau; itau++) {
1236 if (fGDErrTstOK[itau]) {
1237 if (fGDErrTst[itau] > maxacc) {
1238 fGDErrTstOK[itau] =
kFALSE;
1247 Log() << kVERBOSE <<
"TAU: "
1263 UInt_t neve = fPathIdx1-fPathIdx2+1;
1265 Log() << kFATAL <<
"<MakeTstGradientVector> Invalid start/end indices!" <<
Endl;
1271 const std::vector<const Event *> *events = &(fRuleFit->GetTrainingEvents());
1274 for (
UInt_t itau=0; itau<fGDNTau; itau++) {
1275 if (fGDErrTstOK[itau]) {
1276 for (
UInt_t ir=0; ir<fNRules; ir++) {
1277 fGradVecTst[itau][ir]=0;
1279 for (
UInt_t il=0; il<fNLinear; il++) {
1280 fGradVecLinTst[itau][il]=0;
1289 const std::vector<UInt_t> *eventRuleMap=0;
1295 for (
UInt_t i=fPathIdx1; i<fPathIdx2+1; i++) {
1296 const Event *
e = (*events)[i];
1298 if (fRuleEnsemble->DoRules()) {
1299 eventRuleMap = &(fRuleEnsemble->GetEventRuleMap(i));
1300 nrules = (*eventRuleMap).size();
1302 for (
UInt_t itau=0; itau<fGDNTau; itau++) {
1305 if (fGDErrTstOK[itau]) {
1306 sF = fRuleEnsemble->EvalEvent( i, fGDOfsTst[itau], fGDCoefTst[itau], fGDCoefLinTst[itau] );
1310 y = (fRuleFit->GetMethodRuleFit()->DataInfo().IsSignal(
e)?1.0:-1.0);
1311 r = norm*(
y - sF) * fRuleFit->GetTrainingEventWeight(i);
1313 for (
UInt_t ir=0; ir<nrules; ir++) {
1314 rind = (*eventRuleMap)[ir];
1315 fGradVecTst[itau][rind] +=
r;
1318 for (
UInt_t il=0; il<fNLinear; il++) {
1319 fGradVecLinTst[itau][il] +=
r*fRuleEnsemble->EvalLinEventRaw( il,i,
kTRUE );
1335 for (
UInt_t itau=0; itau<fGDNTau; itau++) {
1336 if (fGDErrTstOK[itau]) {
1338 maxr = ( (fNRules>0 ?
1339 TMath::Abs(*(std::max_element( fGradVecTst[itau].begin(), fGradVecTst[itau].end(),
AbsValue()))):0) );
1340 maxl = ( (fNLinear>0 ?
1341 TMath::Abs(*(std::max_element( fGradVecLinTst[itau].begin(), fGradVecLinTst[itau].end(),
AbsValue()))):0) );
1344 Double_t maxv = (maxr>maxl ? maxr:maxl);
1345 cthresh = maxv * fGDTauVec[itau];
1355 for (
UInt_t i=0; i<fNRules; i++) {
1356 val = fGradVecTst[itau][i];
1359 fGDCoefTst[itau][i] += fGDPathStep*val*stepScale;
1363 for (
UInt_t i=0; i<fNLinear; i++) {
1364 val = fGradVecLinTst[itau][i];
1366 fGDCoefLinTst[itau][i] += fGDPathStep*val*stepScale/fRuleEnsemble->GetLinNorm(i);
1373 CalcTstAverageResponse();
1385 UInt_t neve = fPathIdx2-fPathIdx1+1;
1387 Log() << kFATAL <<
"<MakeGradientVector> Invalid start/end indices!" <<
Endl;
1391 const Double_t norm = 2.0/fNEveEffPath;
1393 const std::vector<const Event *> *events = &(fRuleFit->GetTrainingEvents());
1396 for (
UInt_t ir=0; ir<fNRules; ir++) {
1399 for (
UInt_t il=0; il<fNLinear; il++) {
1407 const std::vector<UInt_t> *eventRuleMap=0;
1412 for (
UInt_t i=fPathIdx1; i<fPathIdx2+1; i++) {
1413 const Event *
e = (*events)[i];
1416 sF = fRuleEnsemble->EvalEvent( i );
1420 if (fRuleEnsemble->DoRules()) {
1421 eventRuleMap = &(fRuleEnsemble->GetEventRuleMap(i));
1422 nrules = (*eventRuleMap).size();
1424 y = (fRuleFit->GetMethodRuleFit()->DataInfo().IsSignal(
e)?1.0:-1.0);
1425 r = norm*(
y - sF) * fRuleFit->GetTrainingEventWeight(i);
1427 for (
UInt_t ir=0; ir<nrules; ir++) {
1428 rind = (*eventRuleMap)[ir];
1429 fGradVec[rind] +=
r;
1434 for (
UInt_t il=0; il<fNLinear; il++) {
1435 fGradVecLin[il] +=
r*fRuleEnsemble->EvalLinEventRaw( il, i,
kTRUE );
1447 Double_t maxr = ( (fRuleEnsemble->DoRules() ?
1449 Double_t maxl = ( (fRuleEnsemble->DoLinear() ?
1450 TMath::Abs(*(std::max_element( fGradVecLin.begin(), fGradVecLin.end(),
AbsValue()))):0) );
1452 Double_t maxv = (maxr>maxl ? maxr:maxl);
1460 useRThresh = cthresh;
1461 useLThresh = cthresh;
1469 for (
UInt_t i=0; i<fGradVec.size(); i++) {
1472 coef = fRuleEnsemble->GetRulesConst(i)->GetCoefficient() + fGDPathStep*gval;
1473 fRuleEnsemble->GetRules(i)->SetCoefficient(coef);
1478 for (
UInt_t i=0; i<fGradVecLin.size(); i++) {
1479 lval = fGradVecLin[i];
1481 lcoef = fRuleEnsemble->GetLinCoefficients(i) + (fGDPathStep*lval/fRuleEnsemble->GetLinNorm(i));
1482 fRuleEnsemble->SetLinCoefficient(i,lcoef);
1486 Double_t offset = CalcAverageResponse();
1487 fRuleEnsemble->SetOffset( offset );
1497 for (
UInt_t itau=0; itau<fGDNTau; itau++) {
1498 if (fGDErrTstOK[itau]) {
1499 fGDOfsTst[itau] = 0;
1500 for (
UInt_t s=0; s<fNLinear; s++) {
1501 fGDOfsTst[itau] -= fGDCoefLinTst[itau][s] * fAverageSelectorPath[s];
1504 fGDOfsTst[itau] -= fGDCoefTst[itau][
r] * fAverageRulePath[
r];
1519 for (
UInt_t s=0; s<fNLinear; s++) {
1520 ofs -= fRuleEnsemble->GetLinCoefficients(s) * fAverageSelectorPath[s];
1523 ofs -= fRuleEnsemble->GetRules(
r)->GetCoefficient() * fAverageRulePath[
r];
1533 if (fPathIdx2<=fPathIdx1) {
1534 Log() << kFATAL <<
"<CalcAverageTruth> Invalid start/end indices!" <<
Endl;
1540 const std::vector<const Event *> *events = &(fRuleFit->GetTrainingEvents());
1541 for (
UInt_t i=fPathIdx1; i<fPathIdx2+1; i++) {
1542 Double_t ew = fRuleFit->GetTrainingEventWeight(i);
1543 if (fRuleFit->GetMethodRuleFit()->DataInfo().IsSignal((*events)[i])) ensig += ew;
1545 sum += ew*(fRuleFit->GetMethodRuleFit()->DataInfo().IsSignal((*events)[i])?1.0:-1.0);
1547 Log() << kVERBOSE <<
"Effective number of signal / background = " << ensig <<
" / " << enbkg <<
Endl;
1549 return sum/fNEveEffPath;
1555 return (fRuleFit->GetMethodRuleFit()->DataInfo().IsSignal(
e) ? 1:-1);
1561 fLogger->SetMinType(t);
char * Form(const char *fmt,...)
ostringstream derivative to redirect and format output
void CalcTstAverageResponse()
calc average response for all test paths - TODO: see comment under CalcAverageResponse() note that 0 ...
void MakeGDPath()
The following finds the gradient directed path in parameter space.
void EvaluateAverage(UInt_t ind1, UInt_t ind2, std::vector< Double_t > &avsel, std::vector< Double_t > &avrul)
evaluate the average of each variable and f(x) in the given range
UInt_t RiskPerfTst()
Estimates the error rate with the current set of parameters.
Double_t Risk(UInt_t ind1, UInt_t ind2, Double_t neff) const
risk assessment
Double_t Optimism()
implementation of eq.
Int_t FindGDTau()
This finds the cutoff parameter tau by scanning several different paths.
virtual ~RuleFitParams()
destructor
RuleFitParams()
constructor
void Init()
Initializes all parameters using the RuleEnsemble and the training tree.
Double_t CalcAverageResponse()
calculate the average response - TODO : rewrite bad dependancy on EvaluateAverage() !
void SetMsgType(EMsgType t)
Double_t Penalty() const
This is the "lasso" penalty To be used for regression.
void FillCoefficients()
helper function to store the rule coefficients in local arrays
Double_t LossFunction(const Event &e) const
Implementation of squared-error ramp loss function (eq 39,40 in ref 1) This is used for binary Classi...
void InitGD()
Initialize GD path search.
Int_t Type(const Event *e) const
void ErrorRateRocTst()
Estimates the error rate with the current set of parameters.
void CalcFStar()
Estimates F* (optimum scoring function) for all events for the given sets.
void MakeTstGradientVector()
make test gradient vector for all tau same algorithm as MakeGradientVector()
Double_t CalcAverageTruth()
calculate the average truth
void UpdateTstCoefficients()
Establish maximum gradient for rules, linear terms and the offset for all taus TODO: do not need inde...
void MakeGradientVector()
make gradient vector
void UpdateCoefficients()
Establish maximum gradient for rules, linear terms and the offset.
void InitNtuple()
initializes the ntuple
Double_t ErrorRateBin()
Estimates the error rate with the current set of parameters It uses a binary estimate of (y-F*(x)) (y...
Double_t ErrorRateReg()
Estimates the error rate with the current set of parameters This code is pretty messy at the moment.
Double_t ErrorRateRoc()
Estimates the error rate with the current set of parameters.
Double_t ErrorRateRocRaw(std::vector< Double_t > &sFsig, std::vector< Double_t > &sFbkg)
Estimates the error rate with the current set of parameters.
Timing information for training and evaluation of MVA methods.
TString GetElapsedTime(Bool_t Scientific=kTRUE)
returns pretty string with elapsed time
void DrawProgressBar(Int_t, const TString &comment="")
draws progress bar in color or B&W caution:
virtual Int_t Write(const char *name=0, Int_t option=0, Int_t bufsize=0)
Write this object to the current directory.
A TTree represents a columnar dataset.
MsgLogger & Endl(MsgLogger &ml)
Short_t Max(Short_t a, Short_t b)
Double_t Sqrt(Double_t x)
Short_t Min(Short_t a, Short_t b)
static uint64_t sum(uint64_t i)
#define rprev(otri1, otri2)