81 , fGDPathStep ( 0.01 )
82 , fGDNPathSteps ( 1000 )
107 if (fNTCoeff) {
delete fNTCoeff; fNTCoeff = 0; }
108 if (fNTLinCoeff) {
delete fNTLinCoeff;fNTLinCoeff = 0; }
117 if (fRuleFit==0)
return;
118 if (fRuleFit->GetMethodRuleFit()==0) {
119 Log() << kFATAL <<
"RuleFitParams::Init() - MethodRuleFit ptr is null" <<
Endl;
121 UInt_t neve = fRuleFit->GetTrainingEvents().size();
123 fRuleEnsemble = fRuleFit->GetRuleEnsemblePtr();
124 fNRules = fRuleEnsemble->GetNRules();
125 fNLinear = fRuleEnsemble->GetNLinear();
134 fPerfIdx2 =
static_cast<UInt_t>((neve-1)*fRuleFit->GetMethodRuleFit()->GetGDValidEveFrac());
139 ofs = neve - fPerfIdx2 - 1;
149 fPathIdx2 =
static_cast<UInt_t>((neve-1)*fRuleFit->GetMethodRuleFit()->GetGDPathEveFrac());
158 for (
UInt_t ie=fPathIdx1; ie<fPathIdx2+1; ie++) {
159 fNEveEffPath += fRuleFit->GetTrainingEventWeight(ie);
163 for (
UInt_t ie=fPerfIdx1; ie<fPerfIdx2+1; ie++) {
164 fNEveEffPerf += fRuleFit->GetTrainingEventWeight(ie);
167 Log() << kVERBOSE <<
"Path constr. - event index range = [ " << fPathIdx1 <<
", " << fPathIdx2 <<
" ]"
168 <<
", effective N(events) = " << fNEveEffPath <<
Endl;
169 Log() << kVERBOSE <<
"Error estim. - event index range = [ " << fPerfIdx1 <<
", " << fPerfIdx2 <<
" ]"
170 <<
", effective N(events) = " << fNEveEffPerf <<
Endl;
172 if (fRuleEnsemble->DoRules())
173 Log() << kDEBUG <<
"Number of rules in ensemble: " << fNRules <<
Endl;
175 Log() << kDEBUG <<
"Rules are disabled " <<
Endl;
177 if (fRuleEnsemble->DoLinear())
178 Log() << kDEBUG <<
"Number of linear terms: " << fNLinear <<
Endl;
180 Log() << kDEBUG <<
"Linear terms are disabled " <<
Endl;
188 fGDNtuple=
new TTree(
"MonitorNtuple_RuleFitParams",
"RuleFit path search");
189 fGDNtuple->Branch(
"risk", &fNTRisk,
"risk/D");
190 fGDNtuple->Branch(
"error", &fNTErrorRate,
"error/D");
191 fGDNtuple->Branch(
"nuval", &fNTNuval,
"nuval/D");
192 fGDNtuple->Branch(
"coefrad", &fNTCoefRad,
"coefrad/D");
193 fGDNtuple->Branch(
"offset", &fNTOffset,
"offset/D");
195 fNTCoeff = (fNRules >0 ?
new Double_t[fNRules] : 0);
196 fNTLinCoeff = (fNLinear>0 ?
new Double_t[fNLinear] : 0);
198 for (
UInt_t i=0; i<fNRules; i++) {
199 fGDNtuple->Branch(
Form(
"a%d",i+1),&fNTCoeff[i],
Form(
"a%d/D",i+1));
201 for (
UInt_t i=0; i<fNLinear; i++) {
202 fGDNtuple->Branch(
Form(
"b%d",i+1),&fNTLinCoeff[i],
Form(
"b%d/D",i+1));
210 std::vector<Double_t> &avsel,
211 std::vector<Double_t> &avrul )
213 UInt_t neve = ind2-ind1+1;
215 Log() << kFATAL <<
"<EvaluateAverage> - no events selected for path search -> BUG!" <<
Endl;
221 if (fNLinear>0) avsel.resize(fNLinear,0);
222 if (fNRules>0) avrul.resize(fNRules,0);
223 const std::vector<UInt_t> *eventRuleMap=0;
229 if (fRuleEnsemble->IsRuleMapOK()) {
230 for (
UInt_t i=ind1; i<ind2+1; i++) {
231 ew = fRuleFit->GetTrainingEventWeight(i);
233 for (
UInt_t sel=0; sel<fNLinear; sel++ ) {
234 avsel[sel] += ew*fRuleEnsemble->EvalLinEvent(i,sel);
238 if (fRuleEnsemble->DoRules()) {
239 eventRuleMap = &(fRuleEnsemble->GetEventRuleMap(i));
240 nrules = (*eventRuleMap).size();
243 avrul[(*eventRuleMap)[
r]] += ew;
248 const std::vector<const Event *> *events = &(fRuleFit->GetTrainingEvents());
249 for (
UInt_t i=ind1; i<ind2+1; i++) {
250 ew = fRuleFit->GetTrainingEventWeight(i);
253 fRuleEnsemble->EvalLinEvent(*((*events)[i]));
254 fRuleEnsemble->EvalEvent(*((*events)[i]));
256 for (
UInt_t sel=0; sel<fNLinear; sel++ ) {
257 avsel[sel] += ew*fRuleEnsemble->GetEventLinearValNorm(sel);
261 avrul[
r] += ew*fRuleEnsemble->GetEventRuleVal(
r);
266 for (
UInt_t sel=0; sel<fNLinear; sel++ ) {
267 avsel[sel] = avsel[sel] / sumew;
271 avrul[
r] = avrul[
r] / sumew;
282 Double_t diff = (fRuleFit->GetMethodRuleFit()->DataInfo().IsSignal(&
e)?1:-1) -
h;
284 return diff*diff*
e.GetWeight();
294 Double_t diff = (fRuleFit->GetMethodRuleFit()->DataInfo().IsSignal(fRuleEnsemble->GetRuleMapEvent( evtidx ))?1:-1) -
h;
296 return diff*diff*fRuleFit->GetTrainingEventWeight(evtidx);
305 Double_t e = fRuleEnsemble->EvalEvent( evtidx , fGDOfsTst[itau], fGDCoefTst[itau], fGDCoefLinTst[itau]);
307 Double_t diff = (fRuleFit->GetMethodRuleFit()->DataInfo().IsSignal(fRuleEnsemble->GetRuleMapEvent( evtidx ))?1:-1) -
h;
309 return diff*diff*fRuleFit->GetTrainingEventWeight(evtidx);
317 UInt_t neve = ind2-ind1+1;
319 Log() << kFATAL <<
"<Risk> Invalid start/end indices! BUG!!!" <<
Endl;
324 for (
UInt_t i=ind1; i<ind2+1; i++) {
337 UInt_t neve = ind2-ind1+1;
339 Log() << kFATAL <<
"<Risk> Invalid start/end indices! BUG!!!" <<
Endl;
344 for (
UInt_t i=ind1; i<ind2+1; i++) {
359 Log() << kWARNING <<
"<Penalty> Using unverified code! Check!" <<
Endl;
361 const std::vector<Double_t> *lincoeff = & (fRuleEnsemble->GetLinCoefficients());
362 for (
UInt_t i=0; i<fNRules; i++) {
363 rval +=
TMath::Abs(fRuleEnsemble->GetRules(i)->GetCoefficient());
365 for (
UInt_t i=0; i<fNLinear; i++) {
392 fGDTauVec.resize( fGDNTau );
394 fGDTauVec[0] = fGDTau;
399 for (
UInt_t itau=0; itau<fGDNTau; itau++) {
400 fGDTauVec[itau] =
static_cast<Double_t>(itau)*dtau + fGDTauMin;
401 if (fGDTauVec[itau]>1.0) fGDTauVec[itau]=1.0;
409 fGradVecLinTst.clear();
414 fGDCoefLinTst.clear();
418 fGDCoefTst.resize(fGDNTau);
419 fGradVec.resize(fNRules,0);
420 fGradVecTst.resize(fGDNTau);
421 for (
UInt_t i=0; i<fGDNTau; i++) {
422 fGradVecTst[i].resize(fNRules,0);
423 fGDCoefTst[i].resize(fNRules,0);
428 fGDCoefLinTst.resize(fGDNTau);
429 fGradVecLin.resize(fNLinear,0);
430 fGradVecLinTst.resize(fGDNTau);
431 for (
UInt_t i=0; i<fGDNTau; i++) {
432 fGradVecLinTst[i].resize(fNLinear,0);
433 fGDCoefLinTst[i].resize(fNLinear,0);
438 fGDErrTst.resize(fGDNTau,0);
439 fGDErrTstOK.resize(fGDNTau,
kTRUE);
440 fGDOfsTst.resize(fGDNTau,0);
441 fGDNTauTstOK = fGDNTau;
452 if (fGDNTau<2)
return 0;
453 if (fGDTauScan==0)
return 0;
455 if (fGDOfsTst.size()<1)
456 Log() << kFATAL <<
"BUG! FindGDTau() has been called BEFORE InitGD()." <<
Endl;
458 Log() << kINFO <<
"Estimating the cutoff parameter tau. The estimated time is a pessimistic maximum." <<
Endl;
461 UInt_t nscan = fGDTauScan;
478 Timer timer( nscan,
"RuleFit" );
481 MakeTstGradientVector();
483 UpdateTstCoefficients();
487 if ( (ip==0) || ((ip+1)%netst==0) ) {
489 itauMin = RiskPerfTst();
490 Log() << kVERBOSE <<
Form(
"%4d",ip+1) <<
". tau = " <<
Form(
"%4.4f",fGDTauVec[itauMin])
491 <<
" => error rate = " << fGDErrTst[itauMin] <<
Endl;
494 doloop = ((ip<nscan) && (fGDNTauTstOK>3));
496 if (
Log().GetMinType()>kVERBOSE)
504 Log() << kERROR <<
"<FindGDTau> number of scanned loops is zero! Should NOT see this message." <<
Endl;
506 fGDTau = fGDTauVec[itauMin];
507 fRuleEnsemble->SetCoefficients( fGDCoefTst[itauMin] );
508 fRuleEnsemble->SetLinCoefficients( fGDCoefLinTst[itauMin] );
509 fRuleEnsemble->SetOffset( fGDOfsTst[itauMin] );
510 Log() << kINFO <<
"Best path found with tau = " <<
Form(
"%4.4f",fGDTau)
541 Log() << kINFO <<
"GD path scan - the scan stops when the max num. of steps is reached or a min is found"
543 Log() << kVERBOSE <<
"Number of events used per path step = " << fPathIdx2-fPathIdx1+1 <<
Endl;
544 Log() << kVERBOSE <<
"Number of events used for error estimation = " << fPerfIdx2-fPerfIdx1+1 <<
Endl;
547 const Bool_t isVerbose = (
Log().GetMinType()<=kVERBOSE);
548 const Bool_t isDebug = (
Log().GetMinType()<=kDEBUG);
554 EvaluateAveragePath();
555 EvaluateAveragePerf();
558 Log() << kVERBOSE <<
"Creating GD path" <<
Endl;
559 Log() << kVERBOSE <<
" N(steps) = " << fGDNPathSteps <<
Endl;
560 Log() << kVERBOSE <<
" step = " << fGDPathStep <<
Endl;
561 Log() << kVERBOSE <<
" N(tau) = " << fGDNTau <<
Endl;
562 Log() << kVERBOSE <<
" N(tau steps) = " << fGDTauScan <<
Endl;
563 Log() << kVERBOSE <<
" tau range = [ " << fGDTauVec[0] <<
" , " << fGDTauVec[fGDNTau-1] <<
" ]" <<
Endl;
566 if (isDebug) InitNtuple();
578 std::vector<Double_t> coefsMin;
579 std::vector<Double_t> lincoefsMin;
594 std::vector<Double_t> valx;
595 std::vector<Double_t> valy;
596 std::vector<Double_t> valxy;
606 int imod = fGDNPathSteps/100;
607 if (imod<100) imod = std::min(100,fGDNPathSteps);
608 if (imod>100) imod=100;
611 fAverageTruth = -CalcAverageTruth();
612 offsetMin = fAverageTruth;
613 fRuleEnsemble->SetOffset(offsetMin);
614 fRuleEnsemble->ClearCoefficients(0);
615 fRuleEnsemble->ClearLinCoefficients(0);
616 for (
UInt_t i=0; i<fGDOfsTst.size(); i++) {
617 fGDOfsTst[i] = offsetMin;
619 Log() << kVERBOSE <<
"Obtained initial offset = " << offsetMin <<
Endl;
622 Int_t nprescan = FindGDTau();
637 Int_t stopCondition=0;
639 Log() << kINFO <<
"Fitting model..." <<
Endl;
641 Timer timer( fGDNPathSteps,
"RuleFit" );
645 if (isVerbose) t0 = clock();
646 MakeGradientVector();
648 tgradvec =
Double_t(clock()-t0)/CLOCKS_PER_SEC;
649 stgradvec += tgradvec;
653 if (isVerbose) t0 = clock();
654 UpdateCoefficients();
656 tupgrade =
Double_t(clock()-t0)/CLOCKS_PER_SEC;
657 stupgrade += tupgrade;
661 docheck = ((iloop==0) ||((iloop+1)%imod==0));
667 fNTNuval =
Double_t(iloop)*fGDPathStep;
672 if (isDebug) FillCoefficients();
673 fNTCoefRad = fRuleEnsemble->CoefficientRadius();
677 fNTRisk = RiskPath();
678 trisk =
Double_t(clock()-t0)/CLOCKS_PER_SEC;
685 if (fNTRisk>=
rprev) {
688 Log() <<
"Risk(i+1)>=Risk(i) in path" <<
Endl;
689 riskFlat=(nbadrisk>3);
691 Log() << kWARNING <<
"Chaotic behaviour of risk evolution" <<
Endl;
692 Log() <<
"--- STOPPING MINIMISATION ---" <<
Endl;
693 Log() <<
"This may be OK if minimum is already found" <<
Endl;
703 if (isVerbose) t0 = clock();
713 fNTErrorRate = errroc;
716 tperf =
Double_t(clock()-t0)/CLOCKS_PER_SEC;
723 if (fNTErrorRate<=errmin) {
724 errmin = fNTErrorRate;
727 fRuleEnsemble->GetCoefficients(coefsMin);
728 lincoefsMin = fRuleEnsemble->GetLinCoefficients();
729 offsetMin = fRuleEnsemble->GetOffset();
731 if ( fNTErrorRate > fGDErrScale*errmin) found =
kTRUE;
735 if (valx.size()==npreg) {
736 valx.erase(valx.begin());
737 valy.erase(valy.begin());
738 valxy.erase(valxy.begin());
740 valx.push_back(fNTNuval);
741 valy.push_back(fNTErrorRate);
742 valxy.push_back(fNTErrorRate*fNTNuval);
747 if (isDebug) fGDNtuple->Fill();
749 Log() << kVERBOSE <<
"ParamsIRE : "
751 <<
Form(
"%8d",iloop+1) <<
" "
752 <<
Form(
"%4.4f",fNTRisk) <<
" "
753 <<
Form(
"%4.4f",riskPerf) <<
" "
754 <<
Form(
"%4.4f",fNTRisk+riskPerf) <<
" "
770 Bool_t endOfLoop = (iloop==fGDNPathSteps);
771 if ( ((riskFlat) || (endOfLoop)) && (!found) ) {
775 else if (endOfLoop) {
779 Log() << kWARNING <<
"BUG TRAP: should not be here - still, this bug is harmless;)" <<
Endl;
780 errmin = fNTErrorRate;
783 fRuleEnsemble->GetCoefficients(coefsMin);
784 lincoefsMin = fRuleEnsemble->GetLinCoefficients();
785 offsetMin = fRuleEnsemble->GetOffset();
793 Log() << kINFO <<
"----------------------------------------------------------------" <<
Endl;
794 Log() << kINFO <<
"Found minimum at step " << indMin+1 <<
" with error = " << errmin <<
Endl;
795 Log() << kINFO <<
"Reason for ending loop: ";
796 switch (stopCondition) {
798 Log() << kINFO <<
"clear minima found";
801 Log() << kINFO <<
"chaotic behaviour of risk";
804 Log() << kINFO <<
"end of loop reached";
807 Log() << kINFO <<
"unknown!";
811 Log() << kINFO <<
"----------------------------------------------------------------" <<
Endl;
815 Log() << kWARNING <<
"Reached minimum early in the search" <<
Endl;
816 Log() <<
"Check results and maybe decrease GDStep size" <<
Endl;
826 Log() << kINFO <<
"The error rate was still decreasing at the end of the path" <<
Endl;
827 Log() << kINFO <<
"Increase number of steps (GDNSteps)." <<
Endl;
833 fRuleEnsemble->SetCoefficients( coefsMin );
834 fRuleEnsemble->SetLinCoefficients( lincoefsMin );
835 fRuleEnsemble->SetOffset( offsetMin );
838 Log() << kFATAL <<
"BUG TRAP: minimum not found in MakeGDPath()" <<
Endl;
845 Double_t stloop = strisk +stupgrade + stgradvec + stperf;
846 Log() << kVERBOSE <<
"Timing per loop (ms):" <<
Endl;
847 Log() << kVERBOSE <<
" gradvec = " << 1000*stgradvec/iloop <<
Endl;
848 Log() << kVERBOSE <<
" upgrade = " << 1000*stupgrade/iloop <<
Endl;
849 Log() << kVERBOSE <<
" risk = " << 1000*strisk/iloop <<
Endl;
850 Log() << kVERBOSE <<
" perf = " << 1000*stperf/iloop <<
Endl;
851 Log() << kVERBOSE <<
" loop = " << 1000*stloop/iloop <<
Endl;
854 Log() << kVERBOSE <<
" GDPtr = " << 1000*
gGDPtr/iloop <<
Endl;
863 if (isDebug) fGDNtuple->
Write();
871 fNTOffset = fRuleEnsemble->GetOffset();
873 for (
UInt_t i=0; i<fNRules; i++) {
874 fNTCoeff[i] = fRuleEnsemble->GetRules(i)->GetCoefficient();
876 for (
UInt_t i=0; i<fNLinear; i++) {
877 fNTLinCoeff[i] = fRuleEnsemble->GetLinCoefficients(i);
888 Log() << kWARNING <<
"<CalcFStar> Using unverified code! Check!" <<
Endl;
889 UInt_t neve = fPerfIdx2-fPerfIdx1+1;
891 Log() << kFATAL <<
"<CalcFStar> Invalid start/end indices!" <<
Endl;
895 const std::vector<const Event *> *events = &(fRuleFit->GetTrainingEvents());
898 std::vector<Double_t> fstarSorted;
901 for (
UInt_t i=fPerfIdx1; i<fPerfIdx2+1; i++) {
902 const Event&
e = *(*events)[i];
903 fstarVal = fRuleEnsemble->FStar(
e);
904 fFstar.push_back(fstarVal);
905 fstarSorted.push_back(fstarVal);
909 std::sort( fstarSorted.begin(), fstarSorted.end() );
912 fFstarMedian = 0.5*(fstarSorted[ind]+fstarSorted[ind-1]);
915 fFstarMedian = fstarSorted[ind];
928 Log() << kWARNING <<
"<Optimism> Using unverified code! Check!" <<
Endl;
929 UInt_t neve = fPerfIdx2-fPerfIdx1+1;
931 Log() << kFATAL <<
"<Optimism> Invalid start/end indices!" <<
Endl;
934 const std::vector<const Event *> *events = &(fRuleFit->GetTrainingEvents());
945 for (
UInt_t i=fPerfIdx1; i<fPerfIdx2+1; i++) {
946 const Event&
e = *(*events)[i];
947 yhat = fRuleEnsemble->EvalEvent(i);
948 y = (fRuleFit->GetMethodRuleFit()->DataInfo().IsSignal(&
e) ? 1.0:-1.0);
949 w = fRuleFit->GetTrainingEventWeight(i)/fNEveEffPerf;
952 sumyhaty += w*yhat*
y;
957 Double_t cov = sumyhaty - sumyhat*sumy;
969 Log() << kWARNING <<
"<ErrorRateReg> Using unverified code! Check!" <<
Endl;
970 UInt_t neve = fPerfIdx2-fPerfIdx1+1;
972 Log() << kFATAL <<
"<ErrorRateReg> Invalid start/end indices!" <<
Endl;
974 if (fFstar.size()!=neve) {
975 Log() << kFATAL <<
"--- RuleFitParams::ErrorRateReg() - F* not initialized! BUG!!!"
976 <<
" Fstar.size() = " << fFstar.size() <<
" , N(events) = " << neve <<
Endl;
981 const std::vector<const Event *> *events = &(fRuleFit->GetTrainingEvents());
990 for (
UInt_t i=fPerfIdx1; i<fPerfIdx2+1; i++) {
991 const Event&
e = *(*events)[i];
992 sF = fRuleEnsemble->EvalEvent(
e );
994 sumdf +=
TMath::Abs(fFstar[i-fPerfIdx1] - sF);
995 sumdfmed +=
TMath::Abs(fFstar[i-fPerfIdx1] - fFstarMedian);
1000 return sumdf/sumdfmed;
1013 Log() << kWARNING <<
"<ErrorRateBin> Using unverified code! Check!" <<
Endl;
1014 UInt_t neve = fPerfIdx2-fPerfIdx1+1;
1016 Log() << kFATAL <<
"<ErrorRateBin> Invalid start/end indices!" <<
Endl;
1019 const std::vector<const Event *> *events = &(fRuleFit->GetTrainingEvents());
1026 for (
UInt_t i=fPerfIdx1; i<fPerfIdx2+1; i++) {
1027 const Event&
e = *(*events)[i];
1028 sF = fRuleEnsemble->EvalEvent(
e );
1030 signF = (sF>0 ? +1:-1);
1032 signy = (fRuleFit->GetMethodRuleFit()->DataInfo().IsSignal(&
e) ? +1:-1);
1046 std::vector<Double_t> & sFbkg )
1049 std::sort(sFsig.begin(), sFsig.end());
1050 std::sort(sFbkg.begin(), sFbkg.end());
1051 const Double_t minsig = sFsig.front();
1052 const Double_t minbkg = sFbkg.front();
1053 const Double_t maxsig = sFsig.back();
1054 const Double_t maxbkg = sFbkg.back();
1055 const Double_t minf = std::min(minsig,minbkg);
1056 const Double_t maxf = std::max(maxsig,maxbkg);
1059 const Int_t np = std::min((nsig+nbkg)/4,50);
1060 const Double_t df = (maxf-minf)/(np-1);
1065 std::vector<Double_t>::const_iterator indit;
1080 for (
Int_t i=0; i<np; i++) {
1082 indit = std::find_if(sFsig.begin(), sFsig.end(),
1083 std::bind(std::greater_equal<Double_t>(), std::placeholders::_1, fcut));
1084 nesig = sFsig.end()-indit;
1087 indit = std::find_if(sFbkg.begin(), sFbkg.end(),
1088 std::bind(std::greater_equal<Double_t>(), std::placeholders::_1, fcut));
1089 nrbkg = indit-sFbkg.begin();
1101 area += 0.5*(1+rejb)*effs;
1114 Log() << kWARNING <<
"<ErrorRateRoc> Should not be used in the current version! Check!" <<
Endl;
1115 UInt_t neve = fPerfIdx2-fPerfIdx1+1;
1117 Log() << kFATAL <<
"<ErrorRateRoc> Invalid start/end indices!" <<
Endl;
1120 const std::vector<const Event *> *events = &(fRuleFit->GetTrainingEvents());
1124 std::vector<Double_t> sFsig;
1125 std::vector<Double_t> sFbkg;
1131 for (
UInt_t i=fPerfIdx1; i<fPerfIdx2+1; i++) {
1132 const Event&
e = *(*events)[i];
1133 sF = fRuleEnsemble->EvalEvent(i);
1134 if (fRuleFit->GetMethodRuleFit()->DataInfo().IsSignal(&
e)) {
1135 sFsig.push_back(sF);
1140 sFbkg.push_back(sF);
1145 fsigave = sumfsig/sFsig.size();
1146 fbkgave = sumfbkg/sFbkg.size();
1150 return ErrorRateRocRaw( sFsig, sFbkg );
1162 Log() << kWARNING <<
"<ErrorRateRocTst> Should not be used in the current version! Check!" <<
Endl;
1163 UInt_t neve = fPerfIdx2-fPerfIdx1+1;
1165 Log() << kFATAL <<
"<ErrorRateRocTst> Invalid start/end indices!" <<
Endl;
1169 const std::vector<const Event *> *events = &(fRuleFit->GetTrainingEvents());
1173 std::vector< std::vector<Double_t> > sFsig;
1174 std::vector< std::vector<Double_t> > sFbkg;
1176 sFsig.resize( fGDNTau );
1177 sFbkg.resize( fGDNTau );
1180 for (
UInt_t i=fPerfIdx1; i<fPerfIdx2+1; i++) {
1181 for (
UInt_t itau=0; itau<fGDNTau; itau++) {
1184 sF = fRuleEnsemble->EvalEvent( i, fGDOfsTst[itau], fGDCoefTst[itau], fGDCoefLinTst[itau] );
1185 if (fRuleFit->GetMethodRuleFit()->DataInfo().IsSignal((*events)[i])) {
1186 sFsig[itau].push_back(sF);
1189 sFbkg[itau].push_back(sF);
1195 for (
UInt_t itau=0; itau<fGDNTau; itau++) {
1196 err = ErrorRateRocRaw( sFsig[itau], sFbkg[itau] );
1197 fGDErrTst[itau] = err;
1208 UInt_t neve = fPerfIdx2-fPerfIdx1+1;
1210 Log() << kFATAL <<
"<ErrorRateRocTst> Invalid start/end indices!" <<
Endl;
1220 for (
UInt_t itau=0; itau<fGDNTau; itau++) {
1221 if (fGDErrTstOK[itau]) {
1223 fGDErrTst[itau] = RiskPerf(itau);
1224 sumx += fGDErrTst[itau];
1225 sumx2 += fGDErrTst[itau]*fGDErrTst[itau];
1226 if (fGDErrTst[itau]>maxx) maxx=fGDErrTst[itau];
1227 if (fGDErrTst[itau]<minx) {
1228 minx=fGDErrTst[itau];
1238 for (
UInt_t itau=0; itau<fGDNTau; itau++) {
1239 if (fGDErrTstOK[itau]) {
1240 if (fGDErrTst[itau] > maxacc) {
1241 fGDErrTstOK[itau] =
kFALSE;
1250 Log() << kVERBOSE <<
"TAU: "
1266 UInt_t neve = fPathIdx1-fPathIdx2+1;
1268 Log() << kFATAL <<
"<MakeTstGradientVector> Invalid start/end indices!" <<
Endl;
1274 const std::vector<const Event *> *events = &(fRuleFit->GetTrainingEvents());
1277 for (
UInt_t itau=0; itau<fGDNTau; itau++) {
1278 if (fGDErrTstOK[itau]) {
1279 for (
UInt_t ir=0; ir<fNRules; ir++) {
1280 fGradVecTst[itau][ir]=0;
1282 for (
UInt_t il=0; il<fNLinear; il++) {
1283 fGradVecLinTst[itau][il]=0;
1292 const std::vector<UInt_t> *eventRuleMap=0;
1298 for (
UInt_t i=fPathIdx1; i<fPathIdx2+1; i++) {
1299 const Event *
e = (*events)[i];
1301 if (fRuleEnsemble->DoRules()) {
1302 eventRuleMap = &(fRuleEnsemble->GetEventRuleMap(i));
1303 nrules = (*eventRuleMap).size();
1305 for (
UInt_t itau=0; itau<fGDNTau; itau++) {
1308 if (fGDErrTstOK[itau]) {
1309 sF = fRuleEnsemble->EvalEvent( i, fGDOfsTst[itau], fGDCoefTst[itau], fGDCoefLinTst[itau] );
1313 y = (fRuleFit->GetMethodRuleFit()->DataInfo().IsSignal(
e)?1.0:-1.0);
1314 r = norm*(
y - sF) * fRuleFit->GetTrainingEventWeight(i);
1316 for (
UInt_t ir=0; ir<nrules; ir++) {
1317 rind = (*eventRuleMap)[ir];
1318 fGradVecTst[itau][rind] +=
r;
1321 for (
UInt_t il=0; il<fNLinear; il++) {
1322 fGradVecLinTst[itau][il] +=
r*fRuleEnsemble->EvalLinEventRaw( il,i,
kTRUE );
1338 for (
UInt_t itau=0; itau<fGDNTau; itau++) {
1339 if (fGDErrTstOK[itau]) {
1341 maxr = ( (fNRules>0 ?
1342 TMath::Abs(*(std::max_element( fGradVecTst[itau].begin(), fGradVecTst[itau].end(),
AbsValue()))):0) );
1343 maxl = ( (fNLinear>0 ?
1344 TMath::Abs(*(std::max_element( fGradVecLinTst[itau].begin(), fGradVecLinTst[itau].end(),
AbsValue()))):0) );
1347 Double_t maxv = (maxr>maxl ? maxr:maxl);
1348 cthresh = maxv * fGDTauVec[itau];
1358 for (
UInt_t i=0; i<fNRules; i++) {
1359 val = fGradVecTst[itau][i];
1362 fGDCoefTst[itau][i] += fGDPathStep*val*stepScale;
1366 for (
UInt_t i=0; i<fNLinear; i++) {
1367 val = fGradVecLinTst[itau][i];
1369 fGDCoefLinTst[itau][i] += fGDPathStep*val*stepScale/fRuleEnsemble->GetLinNorm(i);
1376 CalcTstAverageResponse();
1388 UInt_t neve = fPathIdx2-fPathIdx1+1;
1390 Log() << kFATAL <<
"<MakeGradientVector> Invalid start/end indices!" <<
Endl;
1394 const Double_t norm = 2.0/fNEveEffPath;
1396 const std::vector<const Event *> *events = &(fRuleFit->GetTrainingEvents());
1399 for (
UInt_t ir=0; ir<fNRules; ir++) {
1402 for (
UInt_t il=0; il<fNLinear; il++) {
1410 const std::vector<UInt_t> *eventRuleMap=0;
1415 for (
UInt_t i=fPathIdx1; i<fPathIdx2+1; i++) {
1416 const Event *
e = (*events)[i];
1419 sF = fRuleEnsemble->EvalEvent( i );
1423 if (fRuleEnsemble->DoRules()) {
1424 eventRuleMap = &(fRuleEnsemble->GetEventRuleMap(i));
1425 nrules = (*eventRuleMap).size();
1427 y = (fRuleFit->GetMethodRuleFit()->DataInfo().IsSignal(
e)?1.0:-1.0);
1428 r = norm*(
y - sF) * fRuleFit->GetTrainingEventWeight(i);
1430 for (
UInt_t ir=0; ir<nrules; ir++) {
1431 rind = (*eventRuleMap)[ir];
1432 fGradVec[rind] +=
r;
1437 for (
UInt_t il=0; il<fNLinear; il++) {
1438 fGradVecLin[il] +=
r*fRuleEnsemble->EvalLinEventRaw( il, i,
kTRUE );
1450 Double_t maxr = ( (fRuleEnsemble->DoRules() ?
1452 Double_t maxl = ( (fRuleEnsemble->DoLinear() ?
1453 TMath::Abs(*(std::max_element( fGradVecLin.begin(), fGradVecLin.end(),
AbsValue()))):0) );
1455 Double_t maxv = (maxr>maxl ? maxr:maxl);
1463 useRThresh = cthresh;
1464 useLThresh = cthresh;
1472 for (
UInt_t i=0; i<fGradVec.size(); i++) {
1475 coef = fRuleEnsemble->GetRulesConst(i)->GetCoefficient() + fGDPathStep*gval;
1476 fRuleEnsemble->GetRules(i)->SetCoefficient(coef);
1481 for (
UInt_t i=0; i<fGradVecLin.size(); i++) {
1482 lval = fGradVecLin[i];
1484 lcoef = fRuleEnsemble->GetLinCoefficients(i) + (fGDPathStep*lval/fRuleEnsemble->GetLinNorm(i));
1485 fRuleEnsemble->SetLinCoefficient(i,lcoef);
1489 Double_t offset = CalcAverageResponse();
1490 fRuleEnsemble->SetOffset( offset );
1500 for (
UInt_t itau=0; itau<fGDNTau; itau++) {
1501 if (fGDErrTstOK[itau]) {
1502 fGDOfsTst[itau] = 0;
1504 fGDOfsTst[itau] -= fGDCoefLinTst[itau][
s] * fAverageSelectorPath[
s];
1507 fGDOfsTst[itau] -= fGDCoefTst[itau][
r] * fAverageRulePath[
r];
1523 ofs -= fRuleEnsemble->GetLinCoefficients(
s) * fAverageSelectorPath[
s];
1526 ofs -= fRuleEnsemble->GetRules(
r)->GetCoefficient() * fAverageRulePath[
r];
1536 if (fPathIdx2<=fPathIdx1) {
1537 Log() << kFATAL <<
"<CalcAverageTruth> Invalid start/end indices!" <<
Endl;
1543 const std::vector<const Event *> *events = &(fRuleFit->GetTrainingEvents());
1544 for (
UInt_t i=fPathIdx1; i<fPathIdx2+1; i++) {
1545 Double_t ew = fRuleFit->GetTrainingEventWeight(i);
1546 if (fRuleFit->GetMethodRuleFit()->DataInfo().IsSignal((*events)[i])) ensig += ew;
1548 sum += ew*(fRuleFit->GetMethodRuleFit()->DataInfo().IsSignal((*events)[i])?1.0:-1.0);
1550 Log() << kVERBOSE <<
"Effective number of signal / background = " << ensig <<
" / " << enbkg <<
Endl;
1552 return sum/fNEveEffPath;
1558 return (fRuleFit->GetMethodRuleFit()->DataInfo().IsSignal(
e) ? 1:-1);
1564 fLogger->SetMinType(t);
char * Form(const char *fmt,...)
ostringstream derivative to redirect and format output
void CalcTstAverageResponse()
calc average response for all test paths - TODO: see comment under CalcAverageResponse() note that 0 ...
void MakeGDPath()
The following finds the gradient directed path in parameter space.
void EvaluateAverage(UInt_t ind1, UInt_t ind2, std::vector< Double_t > &avsel, std::vector< Double_t > &avrul)
evaluate the average of each variable and f(x) in the given range
UInt_t RiskPerfTst()
Estimates the error rate with the current set of parameters.
Double_t Risk(UInt_t ind1, UInt_t ind2, Double_t neff) const
risk assessment
Double_t Optimism()
implementation of eq.
Int_t FindGDTau()
This finds the cutoff parameter tau by scanning several different paths.
virtual ~RuleFitParams()
destructor
RuleFitParams()
constructor
void Init()
Initializes all parameters using the RuleEnsemble and the training tree.
Double_t CalcAverageResponse()
calculate the average response - TODO : rewrite bad dependancy on EvaluateAverage() !
void SetMsgType(EMsgType t)
Double_t Penalty() const
This is the "lasso" penalty To be used for regression.
void FillCoefficients()
helper function to store the rule coefficients in local arrays
Double_t LossFunction(const Event &e) const
Implementation of squared-error ramp loss function (eq 39,40 in ref 1) This is used for binary Classi...
void InitGD()
Initialize GD path search.
Int_t Type(const Event *e) const
void ErrorRateRocTst()
Estimates the error rate with the current set of parameters.
void CalcFStar()
Estimates F* (optimum scoring function) for all events for the given sets.
void MakeTstGradientVector()
make test gradient vector for all tau same algorithm as MakeGradientVector()
Double_t CalcAverageTruth()
calculate the average truth
void UpdateTstCoefficients()
Establish maximum gradient for rules, linear terms and the offset for all taus TODO: do not need inde...
void MakeGradientVector()
make gradient vector
void UpdateCoefficients()
Establish maximum gradient for rules, linear terms and the offset.
void InitNtuple()
initializes the ntuple
Double_t ErrorRateBin()
Estimates the error rate with the current set of parameters It uses a binary estimate of (y-F*(x)) (y...
Double_t ErrorRateReg()
Estimates the error rate with the current set of parameters This code is pretty messy at the moment.
Double_t ErrorRateRoc()
Estimates the error rate with the current set of parameters.
Double_t ErrorRateRocRaw(std::vector< Double_t > &sFsig, std::vector< Double_t > &sFbkg)
Estimates the error rate with the current set of parameters.
Timing information for training and evaluation of MVA methods.
TString GetElapsedTime(Bool_t Scientific=kTRUE)
returns pretty string with elapsed time
void DrawProgressBar(Int_t, const TString &comment="")
draws progress bar in color or B&W caution:
virtual Int_t Write(const char *name=0, Int_t option=0, Int_t bufsize=0)
Write this object to the current directory.
A TTree represents a columnar dataset.
static constexpr double s
MsgLogger & Endl(MsgLogger &ml)
Short_t Max(Short_t a, Short_t b)
Double_t Sqrt(Double_t x)
Short_t Min(Short_t a, Short_t b)
static long int sum(long int i)
#define rprev(otri1, otri2)