73 , fGDPathStep ( 0.01 )
74 , fGDNPathSteps ( 1000 )
99 if (fNTCoeff) {
delete fNTCoeff; fNTCoeff = 0; }
100 if (fNTLinCoeff) {
delete fNTLinCoeff;fNTLinCoeff = 0; }
109 if (fRuleFit==0)
return;
110 if (fRuleFit->GetMethodRuleFit()==0) {
111 Log() <<
kFATAL <<
"RuleFitParams::Init() - MethodRuleFit ptr is null" <<
Endl;
113 UInt_t neve = fRuleFit->GetTrainingEvents().size();
115 fRuleEnsemble = fRuleFit->GetRuleEnsemblePtr();
116 fNRules = fRuleEnsemble->GetNRules();
117 fNLinear = fRuleEnsemble->GetNLinear();
126 fPerfIdx2 =
static_cast<UInt_t>((neve-1)*fRuleFit->GetMethodRuleFit()->GetGDValidEveFrac());
131 ofs = neve - fPerfIdx2 - 1;
141 fPathIdx2 =
static_cast<UInt_t>((neve-1)*fRuleFit->GetMethodRuleFit()->GetGDPathEveFrac());
150 for (
UInt_t ie=fPathIdx1; ie<fPathIdx2+1; ie++) {
151 fNEveEffPath += fRuleFit->GetTrainingEventWeight(ie);
155 for (
UInt_t ie=fPerfIdx1; ie<fPerfIdx2+1; ie++) {
156 fNEveEffPerf += fRuleFit->GetTrainingEventWeight(ie);
159 Log() <<
kVERBOSE <<
"Path constr. - event index range = [ " << fPathIdx1 <<
", " << fPathIdx2 <<
" ]"
160 <<
", effective N(events) = " << fNEveEffPath <<
Endl;
161 Log() <<
kVERBOSE <<
"Error estim. - event index range = [ " << fPerfIdx1 <<
", " << fPerfIdx2 <<
" ]"
162 <<
", effective N(events) = " << fNEveEffPerf <<
Endl;
164 if (fRuleEnsemble->DoRules())
165 Log() <<
kDEBUG <<
"Number of rules in ensemble: " << fNRules <<
Endl;
169 if (fRuleEnsemble->DoLinear())
170 Log() <<
kDEBUG <<
"Number of linear terms: " << fNLinear <<
Endl;
180 fGDNtuple=
new TTree(
"MonitorNtuple_RuleFitParams",
"RuleFit path search");
181 fGDNtuple->Branch(
"risk", &fNTRisk,
"risk/D");
182 fGDNtuple->Branch(
"error", &fNTErrorRate,
"error/D");
183 fGDNtuple->Branch(
"nuval", &fNTNuval,
"nuval/D");
184 fGDNtuple->Branch(
"coefrad", &fNTCoefRad,
"coefrad/D");
185 fGDNtuple->Branch(
"offset", &fNTOffset,
"offset/D");
187 fNTCoeff = (fNRules >0 ?
new Double_t[fNRules] : 0);
188 fNTLinCoeff = (fNLinear>0 ?
new Double_t[fNLinear] : 0);
190 for (
UInt_t i=0; i<fNRules; i++) {
191 fGDNtuple->Branch(
Form(
"a%d",i+1),&fNTCoeff[i],
Form(
"a%d/D",i+1));
193 for (
UInt_t i=0; i<fNLinear; i++) {
194 fGDNtuple->Branch(
Form(
"b%d",i+1),&fNTLinCoeff[i],
Form(
"b%d/D",i+1));
202 std::vector<Double_t> &avsel,
203 std::vector<Double_t> &avrul )
205 UInt_t neve = ind2-ind1+1;
207 Log() <<
kFATAL <<
"<EvaluateAverage> - no events selected for path search -> BUG!" <<
Endl;
213 if (fNLinear>0) avsel.resize(fNLinear,0);
214 if (fNRules>0) avrul.resize(fNRules,0);
215 const std::vector<UInt_t> *eventRuleMap=0;
221 if (fRuleEnsemble->IsRuleMapOK()) {
222 for (
UInt_t i=ind1; i<ind2+1; i++) {
223 ew = fRuleFit->GetTrainingEventWeight(i);
225 for (
UInt_t sel=0; sel<fNLinear; sel++ ) {
226 avsel[sel] += ew*fRuleEnsemble->EvalLinEvent(i,sel);
230 if (fRuleEnsemble->DoRules()) {
231 eventRuleMap = &(fRuleEnsemble->GetEventRuleMap(i));
232 nrules = (*eventRuleMap).size();
235 avrul[(*eventRuleMap)[
r]] += ew;
240 const std::vector<const Event *> *events = &(fRuleFit->GetTrainingEvents());
241 for (
UInt_t i=ind1; i<ind2+1; i++) {
242 ew = fRuleFit->GetTrainingEventWeight(i);
245 fRuleEnsemble->EvalLinEvent(*((*events)[i]));
246 fRuleEnsemble->EvalEvent(*((*events)[i]));
248 for (
UInt_t sel=0; sel<fNLinear; sel++ ) {
249 avsel[sel] += ew*fRuleEnsemble->GetEventLinearValNorm(sel);
253 avrul[
r] += ew*fRuleEnsemble->GetEventRuleVal(
r);
258 for (
UInt_t sel=0; sel<fNLinear; sel++ ) {
259 avsel[sel] = avsel[sel] / sumew;
263 avrul[
r] = avrul[
r] / sumew;
274 Double_t diff = (fRuleFit->GetMethodRuleFit()->DataInfo().IsSignal(&e)?1:-1) - h;
286 Double_t diff = (fRuleFit->GetMethodRuleFit()->DataInfo().IsSignal(fRuleEnsemble->GetRuleMapEvent( evtidx ))?1:-1) -
h;
288 return diff*diff*fRuleFit->GetTrainingEventWeight(evtidx);
297 Double_t e = fRuleEnsemble->EvalEvent( evtidx , fGDOfsTst[itau], fGDCoefTst[itau], fGDCoefLinTst[itau]);
299 Double_t diff = (fRuleFit->GetMethodRuleFit()->DataInfo().IsSignal(fRuleEnsemble->GetRuleMapEvent( evtidx ))?1:-1) -
h;
301 return diff*diff*fRuleFit->GetTrainingEventWeight(evtidx);
309 UInt_t neve = ind2-ind1+1;
311 Log() <<
kFATAL <<
"<Risk> Invalid start/end indices! BUG!!!" <<
Endl;
316 for (
UInt_t i=ind1; i<ind2+1; i++) {
317 rval += LossFunction(i);
329 UInt_t neve = ind2-ind1+1;
331 Log() <<
kFATAL <<
"<Risk> Invalid start/end indices! BUG!!!" <<
Endl;
336 for (
UInt_t i=ind1; i<ind2+1; i++) {
337 rval += LossFunction(i,itau);
351 Log() <<
kWARNING <<
"<Penalty> Using unverified code! Check!" <<
Endl;
353 const std::vector<Double_t> *lincoeff = & (fRuleEnsemble->GetLinCoefficients());
354 for (
UInt_t i=0; i<fNRules; i++) {
355 rval +=
TMath::Abs(fRuleEnsemble->GetRules(i)->GetCoefficient());
357 for (
UInt_t i=0; i<fNLinear; i++) {
384 fGDTauVec.resize( fGDNTau );
386 fGDTauVec[0] = fGDTau;
390 Double_t dtau = (fGDTauMax - fGDTauMin)/static_cast<Double_t>(fGDNTau-1);
391 for (
UInt_t itau=0; itau<fGDNTau; itau++) {
392 fGDTauVec[itau] =
static_cast<Double_t>(itau)*dtau + fGDTauMin;
393 if (fGDTauVec[itau]>1.0) fGDTauVec[itau]=1.0;
401 fGradVecLinTst.clear();
406 fGDCoefLinTst.clear();
410 fGDCoefTst.resize(fGDNTau);
411 fGradVec.resize(fNRules,0);
412 fGradVecTst.resize(fGDNTau);
413 for (
UInt_t i=0; i<fGDNTau; i++) {
414 fGradVecTst[i].resize(fNRules,0);
415 fGDCoefTst[i].resize(fNRules,0);
420 fGDCoefLinTst.resize(fGDNTau);
421 fGradVecLin.resize(fNLinear,0);
422 fGradVecLinTst.resize(fGDNTau);
423 for (
UInt_t i=0; i<fGDNTau; i++) {
424 fGradVecLinTst[i].resize(fNLinear,0);
425 fGDCoefLinTst[i].resize(fNLinear,0);
430 fGDErrTst.resize(fGDNTau,0);
431 fGDErrTstOK.resize(fGDNTau,
kTRUE);
432 fGDOfsTst.resize(fGDNTau,0);
433 fGDNTauTstOK = fGDNTau;
444 if (fGDNTau<2)
return 0;
445 if (fGDTauScan==0)
return 0;
447 if (fGDOfsTst.size()<1)
448 Log() <<
kFATAL <<
"BUG! FindGDTau() has been called BEFORE InitGD()." <<
Endl;
450 Log() <<
kINFO <<
"Estimating the cutoff parameter tau. The estimated time is a pessimistic maximum." <<
Endl;
453 UInt_t nscan = fGDTauScan;
473 MakeTstGradientVector();
475 UpdateTstCoefficients();
479 if ( (ip==0) || ((ip+1)%netst==0) ) {
481 itauMin = RiskPerfTst();
483 <<
" => error rate = " << fGDErrTst[itauMin] <<
Endl;
486 doloop = ((ip<nscan) && (fGDNTauTstOK>3));
496 Log() <<
kERROR <<
"<FindGDTau> number of scanned loops is zero! Should NOT see this message." <<
Endl;
498 fGDTau = fGDTauVec[itauMin];
499 fRuleEnsemble->SetCoefficients( fGDCoefTst[itauMin] );
500 fRuleEnsemble->SetLinCoefficients( fGDCoefLinTst[itauMin] );
501 fRuleEnsemble->SetOffset( fGDOfsTst[itauMin] );
502 Log() <<
kINFO <<
"Best path found with tau = " <<
Form(
"%4.4f",fGDTau)
532 Log() <<
kINFO <<
"GD path scan - the scan stops when the max num. of steps is reached or a min is found"
534 Log() <<
kVERBOSE <<
"Number of events used per path step = " << fPathIdx2-fPathIdx1+1 <<
Endl;
535 Log() <<
kVERBOSE <<
"Number of events used for error estimation = " << fPerfIdx2-fPerfIdx1+1 <<
Endl;
545 EvaluateAveragePath();
546 EvaluateAveragePerf();
554 Log() <<
kVERBOSE <<
" tau range = [ " << fGDTauVec[0] <<
" , " << fGDTauVec[fGDNTau-1] <<
" ]" <<
Endl;
557 if (isDebug) InitNtuple();
569 std::vector<Double_t> coefsMin;
570 std::vector<Double_t> lincoefsMin;
585 std::vector<Double_t> valx;
586 std::vector<Double_t> valy;
587 std::vector<Double_t> valxy;
597 int imod = fGDNPathSteps/100;
598 if (imod<100) imod =
std::min(100,fGDNPathSteps);
599 if (imod>100) imod=100;
602 fAverageTruth = -CalcAverageTruth();
603 offsetMin = fAverageTruth;
604 fRuleEnsemble->SetOffset(offsetMin);
605 fRuleEnsemble->ClearCoefficients(0);
606 fRuleEnsemble->ClearLinCoefficients(0);
607 for (
UInt_t i=0; i<fGDOfsTst.size(); i++) {
608 fGDOfsTst[i] = offsetMin;
610 Log() <<
kVERBOSE <<
"Obtained initial offset = " << offsetMin <<
Endl;
613 Int_t nprescan = FindGDTau();
628 Int_t stopCondition=0;
635 if (isVerbose) t0 = clock();
636 MakeGradientVector();
638 tgradvec =
Double_t(clock()-t0)/CLOCKS_PER_SEC;
639 stgradvec += tgradvec;
643 if (isVerbose) t0 = clock();
644 UpdateCoefficients();
646 tupgrade =
Double_t(clock()-t0)/CLOCKS_PER_SEC;
647 stupgrade += tupgrade;
651 docheck = ((iloop==0) ||((iloop+1)%imod==0));
657 fNTNuval =
Double_t(iloop)*fGDPathStep;
662 if (isDebug) FillCoefficients();
663 fNTCoefRad = fRuleEnsemble->CoefficientRadius();
667 fNTRisk = RiskPath();
668 trisk =
Double_t(clock()-t0)/CLOCKS_PER_SEC;
675 if (fNTRisk>=rprev) {
679 riskFlat=(nbadrisk>3);
683 Log() <<
kWARNING <<
"This may be OK if minimum is already found" <<
Endl;
693 if (isVerbose) t0 = clock();
703 fNTErrorRate = errroc;
706 tperf =
Double_t(clock()-t0)/CLOCKS_PER_SEC;
713 if (fNTErrorRate<=errmin) {
714 errmin = fNTErrorRate;
717 fRuleEnsemble->GetCoefficients(coefsMin);
718 lincoefsMin = fRuleEnsemble->GetLinCoefficients();
719 offsetMin = fRuleEnsemble->GetOffset();
721 if ( fNTErrorRate > fGDErrScale*errmin) found =
kTRUE;
725 if (valx.size()==npreg) {
726 valx.erase(valx.begin());
727 valy.erase(valy.begin());
728 valxy.erase(valxy.begin());
730 valx.push_back(fNTNuval);
731 valy.push_back(fNTErrorRate);
732 valxy.push_back(fNTErrorRate*fNTNuval);
737 if (isDebug) fGDNtuple->Fill();
741 <<
Form(
"%8d",iloop+1) <<
" "
742 <<
Form(
"%4.4f",fNTRisk) <<
" "
743 <<
Form(
"%4.4f",riskPerf) <<
" "
744 <<
Form(
"%4.4f",fNTRisk+riskPerf) <<
" "
760 Bool_t endOfLoop = (iloop==fGDNPathSteps);
761 if ( ((riskFlat) || (endOfLoop)) && (!found) ) {
765 else if (endOfLoop) {
769 Log() <<
kWARNING <<
"BUG TRAP: should not be here - still, this bug is harmless;)" <<
Endl;
770 errmin = fNTErrorRate;
773 fRuleEnsemble->GetCoefficients(coefsMin);
774 lincoefsMin = fRuleEnsemble->GetLinCoefficients();
775 offsetMin = fRuleEnsemble->GetOffset();
782 Log() <<
kINFO <<
"----------------------------------------------------------------" <<
Endl;
783 Log() <<
kINFO <<
"Found minimum at step " << indMin+1 <<
" with error = " << errmin <<
Endl;
784 Log() <<
kINFO <<
"Reason for ending loop: ";
785 switch (stopCondition) {
787 Log() <<
kINFO <<
"clear minima found";
790 Log() <<
kINFO <<
"chaotic behaviour of risk";
793 Log() <<
kINFO <<
"end of loop reached";
800 Log() <<
kINFO <<
"----------------------------------------------------------------" <<
Endl;
805 Log() <<
kWARNING <<
"Check results and maybe decrease GDStep size" <<
Endl;
815 Log() <<
kINFO <<
"The error rate was still decreasing at the end of the path" <<
Endl;
816 Log() <<
kINFO <<
"Increase number of steps (GDNSteps)." <<
Endl;
822 fRuleEnsemble->SetCoefficients( coefsMin );
823 fRuleEnsemble->SetLinCoefficients( lincoefsMin );
824 fRuleEnsemble->SetOffset( offsetMin );
827 Log() <<
kFATAL <<
"BUG TRAP: minimum not found in MakeGDPath()" <<
Endl;
834 Double_t stloop = strisk +stupgrade + stgradvec + stperf;
852 if (isDebug) fGDNtuple->
Write();
860 fNTOffset = fRuleEnsemble->GetOffset();
862 for (
UInt_t i=0; i<fNRules; i++) {
863 fNTCoeff[i] = fRuleEnsemble->GetRules(i)->GetCoefficient();
865 for (
UInt_t i=0; i<fNLinear; i++) {
866 fNTLinCoeff[i] = fRuleEnsemble->GetLinCoefficients(i);
878 Log() <<
kWARNING <<
"<CalcFStar> Using unverified code! Check!" <<
Endl;
879 UInt_t neve = fPerfIdx2-fPerfIdx1+1;
881 Log() <<
kFATAL <<
"<CalcFStar> Invalid start/end indices!" <<
Endl;
885 const std::vector<const Event *> *events = &(fRuleFit->GetTrainingEvents());
888 std::vector<Double_t> fstarSorted;
891 for (
UInt_t i=fPerfIdx1; i<fPerfIdx2+1; i++) {
892 const Event& e = *(*events)[i];
893 fstarVal = fRuleEnsemble->FStar(e);
894 fFstar.push_back(fstarVal);
895 fstarSorted.push_back(fstarVal);
899 std::sort( fstarSorted.begin(), fstarSorted.end() );
902 fFstarMedian = 0.5*(fstarSorted[ind]+fstarSorted[ind-1]);
905 fFstarMedian = fstarSorted[ind];
919 Log() <<
kWARNING <<
"<Optimism> Using unverified code! Check!" <<
Endl;
920 UInt_t neve = fPerfIdx2-fPerfIdx1+1;
922 Log() <<
kFATAL <<
"<Optimism> Invalid start/end indices!" <<
Endl;
925 const std::vector<const Event *> *events = &(fRuleFit->GetTrainingEvents());
936 for (
UInt_t i=fPerfIdx1; i<fPerfIdx2+1; i++) {
937 const Event& e = *(*events)[i];
938 yhat = fRuleEnsemble->EvalEvent(i);
939 y = (fRuleFit->GetMethodRuleFit()->DataInfo().IsSignal(&e) ? 1.0:-1.0);
940 w = fRuleFit->GetTrainingEventWeight(i)/fNEveEffPerf;
943 sumyhaty += w*yhat*
y;
948 Double_t cov = sumyhaty - sumyhat*sumy;
961 Log() <<
kWARNING <<
"<ErrorRateReg> Using unverified code! Check!" <<
Endl;
962 UInt_t neve = fPerfIdx2-fPerfIdx1+1;
964 Log() <<
kFATAL <<
"<ErrorRateReg> Invalid start/end indices!" <<
Endl;
966 if (fFstar.size()!=neve) {
967 Log() <<
kFATAL <<
"--- RuleFitParams::ErrorRateReg() - F* not initialized! BUG!!!"
968 <<
" Fstar.size() = " << fFstar.size() <<
" , N(events) = " << neve <<
Endl;
973 const std::vector<const Event *> *events = &(fRuleFit->GetTrainingEvents());
982 for (
UInt_t i=fPerfIdx1; i<fPerfIdx2+1; i++) {
983 const Event& e = *(*events)[i];
984 sF = fRuleEnsemble->EvalEvent( e );
986 sumdf +=
TMath::Abs(fFstar[i-fPerfIdx1] - sF);
987 sumdfmed +=
TMath::Abs(fFstar[i-fPerfIdx1] - fFstarMedian);
992 return sumdf/sumdfmed;
1006 Log() <<
kWARNING <<
"<ErrorRateBin> Using unverified code! Check!" <<
Endl;
1007 UInt_t neve = fPerfIdx2-fPerfIdx1+1;
1009 Log() <<
kFATAL <<
"<ErrorRateBin> Invalid start/end indices!" <<
Endl;
1012 const std::vector<const Event *> *events = &(fRuleFit->GetTrainingEvents());
1019 for (
UInt_t i=fPerfIdx1; i<fPerfIdx2+1; i++) {
1020 const Event& e = *(*events)[i];
1021 sF = fRuleEnsemble->EvalEvent( e );
1023 signF = (sF>0 ? +1:-1);
1025 signy = (fRuleFit->GetMethodRuleFit()->DataInfo().IsSignal(&e) ? +1:-1);
1036 std::vector<Double_t> & sFbkg )
1044 std::sort(sFsig.begin(), sFsig.end());
1045 std::sort(sFbkg.begin(), sFbkg.end());
1046 const Double_t minsig = sFsig.front();
1047 const Double_t minbkg = sFbkg.front();
1048 const Double_t maxsig = sFsig.back();
1049 const Double_t maxbkg = sFbkg.back();
1055 const Double_t df = (maxf-minf)/(np-1);
1060 std::vector<Double_t>::const_iterator indit;
1075 for (
Int_t i=0; i<np; i++) {
1077 indit = std::find_if( sFsig.begin(), sFsig.end(), std::bind2nd(std::greater_equal<Double_t>(), fcut));
1078 nesig = sFsig.end()-indit;
1081 indit = std::find_if( sFbkg.begin(), sFbkg.end(), std::bind2nd(std::greater_equal<Double_t>(), fcut));
1082 nrbkg = indit-sFbkg.begin();
1094 area += 0.5*(1+rejb)*effs;
1109 Log() <<
kWARNING <<
"<ErrorRateRoc> Should not be used in the current version! Check!" <<
Endl;
1110 UInt_t neve = fPerfIdx2-fPerfIdx1+1;
1112 Log() <<
kFATAL <<
"<ErrorRateRoc> Invalid start/end indices!" <<
Endl;
1115 const std::vector<const Event *> *events = &(fRuleFit->GetTrainingEvents());
1119 std::vector<Double_t> sFsig;
1120 std::vector<Double_t> sFbkg;
1126 for (
UInt_t i=fPerfIdx1; i<fPerfIdx2+1; i++) {
1127 const Event& e = *(*events)[i];
1128 sF = fRuleEnsemble->EvalEvent(i);
1129 if (fRuleFit->GetMethodRuleFit()->DataInfo().IsSignal(&e)) {
1130 sFsig.push_back(sF);
1135 sFbkg.push_back(sF);
1140 fsigave = sumfsig/sFsig.size();
1141 fbkgave = sumfbkg/sFbkg.size();
1145 return ErrorRateRocRaw( sFsig, sFbkg );
1159 Log() <<
kWARNING <<
"<ErrorRateRocTst> Should not be used in the current version! Check!" <<
Endl;
1160 UInt_t neve = fPerfIdx2-fPerfIdx1+1;
1162 Log() <<
kFATAL <<
"<ErrorRateRocTst> Invalid start/end indices!" <<
Endl;
1166 const std::vector<const Event *> *events = &(fRuleFit->GetTrainingEvents());
1170 std::vector< std::vector<Double_t> > sFsig;
1171 std::vector< std::vector<Double_t> > sFbkg;
1173 sFsig.resize( fGDNTau );
1174 sFbkg.resize( fGDNTau );
1177 for (
UInt_t i=fPerfIdx1; i<fPerfIdx2+1; i++) {
1178 for (
UInt_t itau=0; itau<fGDNTau; itau++) {
1181 sF = fRuleEnsemble->EvalEvent( i, fGDOfsTst[itau], fGDCoefTst[itau], fGDCoefLinTst[itau] );
1182 if (fRuleFit->GetMethodRuleFit()->DataInfo().IsSignal((*events)[i])) {
1183 sFsig[itau].push_back(sF);
1186 sFbkg[itau].push_back(sF);
1192 for (
UInt_t itau=0; itau<fGDNTau; itau++) {
1193 err = ErrorRateRocRaw( sFsig[itau], sFbkg[itau] );
1194 fGDErrTst[itau] = err;
1207 UInt_t neve = fPerfIdx2-fPerfIdx1+1;
1209 Log() <<
kFATAL <<
"<ErrorRateRocTst> Invalid start/end indices!" <<
Endl;
1219 for (
UInt_t itau=0; itau<fGDNTau; itau++) {
1220 if (fGDErrTstOK[itau]) {
1222 fGDErrTst[itau] = RiskPerf(itau);
1223 sumx += fGDErrTst[itau];
1224 sumx2 += fGDErrTst[itau]*fGDErrTst[itau];
1225 if (fGDErrTst[itau]>maxx) maxx=fGDErrTst[itau];
1226 if (fGDErrTst[itau]<minx) {
1227 minx=fGDErrTst[itau];
1237 for (
UInt_t itau=0; itau<fGDNTau; itau++) {
1238 if (fGDErrTstOK[itau]) {
1239 if (fGDErrTst[itau] > maxacc) {
1240 fGDErrTstOK[itau] =
kFALSE;
1265 UInt_t neve = fPathIdx1-fPathIdx2+1;
1267 Log() <<
kFATAL <<
"<MakeTstGradientVector> Invalid start/end indices!" <<
Endl;
1273 const std::vector<const Event *> *events = &(fRuleFit->GetTrainingEvents());
1276 for (
UInt_t itau=0; itau<fGDNTau; itau++) {
1277 if (fGDErrTstOK[itau]) {
1278 for (
UInt_t ir=0; ir<fNRules; ir++) {
1279 fGradVecTst[itau][ir]=0;
1281 for (
UInt_t il=0; il<fNLinear; il++) {
1282 fGradVecLinTst[itau][il]=0;
1291 const std::vector<UInt_t> *eventRuleMap=0;
1297 for (
UInt_t i=fPathIdx1; i<fPathIdx2+1; i++) {
1298 const Event *e = (*events)[i];
1300 if (fRuleEnsemble->DoRules()) {
1301 eventRuleMap = &(fRuleEnsemble->GetEventRuleMap(i));
1302 nrules = (*eventRuleMap).size();
1304 for (
UInt_t itau=0; itau<fGDNTau; itau++) {
1307 if (fGDErrTstOK[itau]) {
1308 sF = fRuleEnsemble->EvalEvent( i, fGDOfsTst[itau], fGDCoefTst[itau], fGDCoefLinTst[itau] );
1312 y = (fRuleFit->GetMethodRuleFit()->DataInfo().IsSignal(e)?1.0:-1.0);
1313 r = norm*(y - sF) * fRuleFit->GetTrainingEventWeight(i);
1315 for (
UInt_t ir=0; ir<nrules; ir++) {
1316 rind = (*eventRuleMap)[ir];
1317 fGradVecTst[itau][rind] +=
r;
1320 for (
UInt_t il=0; il<fNLinear; il++) {
1321 fGradVecLinTst[itau][il] += r*fRuleEnsemble->EvalLinEventRaw( il,i,
kTRUE );
1338 for (
UInt_t itau=0; itau<fGDNTau; itau++) {
1339 if (fGDErrTstOK[itau]) {
1341 maxr = ( (fNRules>0 ?
1342 TMath::Abs(*(std::max_element( fGradVecTst[itau].begin(), fGradVecTst[itau].end(),
AbsValue()))):0) );
1343 maxl = ( (fNLinear>0 ?
1344 TMath::Abs(*(std::max_element( fGradVecLinTst[itau].begin(), fGradVecLinTst[itau].end(),
AbsValue()))):0) );
1347 Double_t maxv = (maxr>maxl ? maxr:maxl);
1348 cthresh = maxv * fGDTauVec[itau];
1358 for (
UInt_t i=0; i<fNRules; i++) {
1359 val = fGradVecTst[itau][i];
1361 if (TMath::Abs(val)>=cthresh) {
1362 fGDCoefTst[itau][i] += fGDPathStep*val*stepScale;
1366 for (
UInt_t i=0; i<fNLinear; i++) {
1367 val = fGradVecLinTst[itau][i];
1368 if (TMath::Abs(val)>=cthresh) {
1369 fGDCoefLinTst[itau][i] += fGDPathStep*val*stepScale/fRuleEnsemble->GetLinNorm(i);
1376 CalcTstAverageResponse();
1389 UInt_t neve = fPathIdx2-fPathIdx1+1;
1391 Log() <<
kFATAL <<
"<MakeGradientVector> Invalid start/end indices!" <<
Endl;
1397 const std::vector<const Event *> *events = &(fRuleFit->GetTrainingEvents());
1400 for (
UInt_t ir=0; ir<fNRules; ir++) {
1403 for (
UInt_t il=0; il<fNLinear; il++) {
1411 const std::vector<UInt_t> *eventRuleMap=0;
1416 for (
UInt_t i=fPathIdx1; i<fPathIdx2+1; i++) {
1417 const Event *e = (*events)[i];
1420 sF = fRuleEnsemble->EvalEvent( i );
1424 if (fRuleEnsemble->DoRules()) {
1425 eventRuleMap = &(fRuleEnsemble->GetEventRuleMap(i));
1426 nrules = (*eventRuleMap).size();
1428 y = (fRuleFit->GetMethodRuleFit()->DataInfo().IsSignal(e)?1.0:-1.0);
1429 r = norm*(y - sF) * fRuleFit->GetTrainingEventWeight(i);
1431 for (
UInt_t ir=0; ir<nrules; ir++) {
1432 rind = (*eventRuleMap)[ir];
1433 fGradVec[rind] +=
r;
1438 for (
UInt_t il=0; il<fNLinear; il++) {
1439 fGradVecLin[il] += r*fRuleEnsemble->EvalLinEventRaw( il, i,
kTRUE );
1453 Double_t maxr = ( (fRuleEnsemble->DoRules() ?
1455 Double_t maxl = ( (fRuleEnsemble->DoLinear() ?
1456 TMath::Abs(*(std::max_element( fGradVecLin.begin(), fGradVecLin.end(),
AbsValue()))):0) );
1458 Double_t maxv = (maxr>maxl ? maxr:maxl);
1466 useRThresh = cthresh;
1467 useLThresh = cthresh;
1475 for (
UInt_t i=0; i<fGradVec.size(); i++) {
1478 coef = fRuleEnsemble->GetRulesConst(i)->GetCoefficient() + fGDPathStep*gval;
1479 fRuleEnsemble->GetRules(i)->SetCoefficient(coef);
1484 for (
UInt_t i=0; i<fGradVecLin.size(); i++) {
1485 lval = fGradVecLin[i];
1487 lcoef = fRuleEnsemble->GetLinCoefficients(i) + (fGDPathStep*lval/fRuleEnsemble->GetLinNorm(i));
1488 fRuleEnsemble->SetLinCoefficient(i,lcoef);
1492 Double_t offset = CalcAverageResponse();
1493 fRuleEnsemble->SetOffset( offset );
1503 for (
UInt_t itau=0; itau<fGDNTau; itau++) {
1504 if (fGDErrTstOK[itau]) {
1505 fGDOfsTst[itau] = 0;
1506 for (
UInt_t s=0; s<fNLinear; s++) {
1507 fGDOfsTst[itau] -= fGDCoefLinTst[itau][s] * fAverageSelectorPath[s];
1510 fGDOfsTst[itau] -= fGDCoefTst[itau][
r] * fAverageRulePath[
r];
1525 for (
UInt_t s=0; s<fNLinear; s++) {
1526 ofs -= fRuleEnsemble->GetLinCoefficients(s) * fAverageSelectorPath[s];
1529 ofs -= fRuleEnsemble->GetRules(
r)->GetCoefficient() * fAverageRulePath[
r];
1539 if (fPathIdx2<=fPathIdx1) {
1540 Log() <<
kFATAL <<
"<CalcAverageTruth> Invalid start/end indices!" <<
Endl;
1546 const std::vector<const Event *> *events = &(fRuleFit->GetTrainingEvents());
1547 for (
UInt_t i=fPathIdx1; i<fPathIdx2+1; i++) {
1548 Double_t ew = fRuleFit->GetTrainingEventWeight(i);
1549 if (fRuleFit->GetMethodRuleFit()->DataInfo().IsSignal((*events)[i])) ensig += ew;
1551 sum += ew*(fRuleFit->GetMethodRuleFit()->DataInfo().IsSignal((*events)[i])?1.0:-1.0);
1553 Log() <<
kVERBOSE <<
"Effective number of signal / background = " << ensig <<
" / " << enbkg <<
Endl;
1555 return sum/fNEveEffPath;
1561 return (fRuleFit->GetMethodRuleFit()->DataInfo().IsSignal(e) ? 1:-1);
1568 fLogger->SetMinType(t);
virtual Int_t Write(const char *name=0, Int_t option=0, Int_t bufsize=0)
Write this object to the current directory.
static Vc_ALWAYS_INLINE int_v min(const int_v &x, const int_v &y)
MsgLogger & Endl(MsgLogger &ml)
void MakeGradientVector()
make gradient vector
void FillCoefficients()
helper function to store the rule coefficients in local arrays
Double_t Penalty() const
This is the "lasso" penalty To be used for regression.
Short_t Min(Short_t a, Short_t b)
Double_t GetWeight() const
return the event weight - depending on whether the flag IgnoreNegWeightsInTraining is or not...
void ErrorRateRocTst()
Estimates the error rate with the current set of parameters.
void MakeTstGradientVector()
make test gradient vector for all tau same algorithm as MakeGradientVector()
#define rprev(otri1, otri2)
Int_t FindGDTau()
This finds the cutoff parameter tau by scanning several different paths.
Double_t CalcAverageTruth()
calulate the average truth
Double_t ErrorRateBin()
Estimates the error rate with the current set of parameters It uses a binary estimate of (y-F*(x)) (y...
void SetMsgType(EMsgType t)
TString GetElapsedTime(Bool_t Scientific=kTRUE)
Int_t Type(const Event *e) const
Double_t ErrorRateReg()
Estimates the error rate with the current set of parameters This code is pretty messy at the moment...
Double_t ErrorRateRoc()
Estimates the error rate with the current set of parameters.
Double_t Optimism()
implementation of eq.
char * Form(const char *fmt,...)
Double_t LossFunction(const Event &e) const
Implementation of squared-error ramp loss function (eq 39,40 in ref 1) This is used for binary Classi...
void InitNtuple()
initializes the ntuple
void CalcFStar()
Estimates F* (optimum scoring function) for all events for the given sets.
virtual ~RuleFitParams()
destructor
static Vc_ALWAYS_INLINE int_v max(const int_v &x, const int_v &y)
void UpdateCoefficients()
Establish maximum gradient for rules, linear terms and the offset.
Double_t ErrorRateRocRaw(std::vector< Double_t > &sFsig, std::vector< Double_t > &sFbkg)
RuleFitParams()
constructor
Short_t Max(Short_t a, Short_t b)
void Init()
Initializes all parameters using the RuleEnsemble and the training tree.
Double_t CalcAverageResponse()
calulate the average response - TODO : rewrite bad dependancy on EvaluateAverage() ! ...
Double_t Risk(UInt_t ind1, UInt_t ind2, Double_t neff) const
risk asessment
void DrawProgressBar(Int_t, const TString &comment="")
draws progress bar in color or B&W caution:
void InitGD()
Initialize GD path search.
A TTree object has a header with a name and a title.
void UpdateTstCoefficients()
Establish maximum gradient for rules, linear terms and the offset for all taus TODO: do not need inde...
UInt_t RiskPerfTst()
Estimates the error rate with the current set of parameters.
Double_t Sqrt(Double_t x)
void EvaluateAverage(UInt_t ind1, UInt_t ind2, std::vector< Double_t > &avsel, std::vector< Double_t > &avrul)
evaluate the average of each variable and f(x) in the given range
void MakeGDPath()
The following finds the gradient directed path in parameter space.
double norm(double *x, double *p)
void CalcTstAverageResponse()
calc average response for all test paths - TODO: see comment under CalcAverageResponse() note that 0 ...