Logo ROOT  
Reference Guide
RuleFitParams.cxx
Go to the documentation of this file.
1// @(#)root/tmva $Id$
2// Author: Andreas Hoecker, Joerg Stelzer, Fredrik Tegenfeldt, Helge Voss
3
4/**********************************************************************************
5 * Project: TMVA - a Root-integrated toolkit for multivariate data analysis *
6 * Package: TMVA *
7 * Class : RuleFitParams *
8 * Web : http://tmva.sourceforge.net *
9 * *
10 * Description: *
11 * Implementation *
12 * *
13 * Authors (alphabetical): *
14 * Fredrik Tegenfeldt <Fredrik.Tegenfeldt@cern.ch> - Iowa State U., USA *
15 * Helge Voss <Helge.Voss@cern.ch> - MPI-KP Heidelberg, Ger. *
16 * *
17 * Copyright (c) 2005: *
18 * CERN, Switzerland *
19 * Iowa State U. *
20 * MPI-K Heidelberg, Germany *
21 * *
22 * Redistribution and use in source and binary forms, with or without *
23 * modification, are permitted according to the terms listed in LICENSE *
24 * (http://tmva.sourceforge.net/LICENSE) *
25 **********************************************************************************/
26
27/*! \class TMVA::RuleFitParams
28\ingroup TMVA
29A class doing the actual fitting of a linear model using rules as base functions.
30*/
31#include "TMVA/RuleFitParams.h"
32
33#include "TMVA/DataSetInfo.h"
34#include "TMVA/MethodRuleFit.h"
35#include "TMVA/MsgLogger.h"
36#include "TMVA/RuleEnsemble.h"
37#include "TMVA/RuleFit.h"
38#include "TMVA/Timer.h"
39#include "TMVA/Tools.h"
40#include "TMVA/Types.h"
41
42#include "TTree.h"
43#include "TMath.h"
44
45#include <iomanip>
46#include <numeric>
47#include <algorithm>
48#include <functional>
49
52
60
61////////////////////////////////////////////////////////////////////////////////
62/// constructor
63
65 : fRuleFit ( 0 )
66 , fRuleEnsemble ( 0 )
67 , fNRules ( 0 )
68 , fNLinear ( 0 )
69 , fPathIdx1 ( 0 )
70 , fPathIdx2 ( 0 )
71 , fPerfIdx1 ( 0 )
72 , fPerfIdx2 ( 0 )
73 , fGDNTauTstOK( 0 )
74 , fGDNTau ( 51 )
75 , fGDTauPrec ( 0.02 )
76 , fGDTauScan ( 1000 )
77 , fGDTauMin ( 0.0 )
78 , fGDTauMax ( 1.0 )
79 , fGDTau ( -1.0 )
80 , fGDPathStep ( 0.01 )
81 , fGDNPathSteps ( 1000 )
82 , fGDErrScale ( 1.1 )
83 , fAverageTruth( 0 )
84 , fFstarMedian ( 0 )
85 , fGDNtuple ( 0 )
86 , fNTRisk ( 0 )
87 , fNTErrorRate ( 0 )
88 , fNTNuval ( 0 )
89 , fNTCoefRad ( 0 )
90 , fNTOffset ( 0 )
91 , fNTCoeff ( 0 )
92 , fNTLinCoeff ( 0 )
93 , fsigave( 0 )
94 , fsigrms( 0 )
95 , fbkgave( 0 )
96 , fbkgrms( 0 )
97 , fLogger( new MsgLogger("RuleFit") )
98{
99 Init();
100}
101////////////////////////////////////////////////////////////////////////////////
102/// destructor
103
105{
106 if (fNTCoeff) { delete [] fNTCoeff; fNTCoeff = nullptr; }
107 if (fNTLinCoeff) { delete [] fNTLinCoeff; fNTLinCoeff = nullptr; }
108 delete fLogger;
109}
110
111////////////////////////////////////////////////////////////////////////////////
112/// Initializes all parameters using the RuleEnsemble and the training tree
113
115{
116 if (fRuleFit==0) return;
117 if (fRuleFit->GetMethodRuleFit()==0) {
118 Log() << kFATAL << "RuleFitParams::Init() - MethodRuleFit ptr is null" << Endl;
119 }
120 UInt_t neve = fRuleFit->GetTrainingEvents().size();
121 //
122 fRuleEnsemble = fRuleFit->GetRuleEnsemblePtr();
123 fNRules = fRuleEnsemble->GetNRules();
124 fNLinear = fRuleEnsemble->GetNLinear();
125
126 //
127 // Fraction of events used for validation should be close of unity..
128 // Always selection from the END
129 //
130 UInt_t ofs;
131 fPerfIdx1 = 0;
132 if (neve>1) {
133 fPerfIdx2 = static_cast<UInt_t>((neve-1)*fRuleFit->GetMethodRuleFit()->GetGDValidEveFrac());
134 }
135 else {
136 fPerfIdx2 = 0;
137 }
138 ofs = neve - fPerfIdx2 - 1;
139 fPerfIdx1 += ofs;
140 fPerfIdx2 += ofs;
141 //
142 // Fraction of events used for the path search can be allowed to be a smaller value, say 0.5
143 // Alwas select events from the BEGINNING.
144 // This means that the validation and search samples will not overlap if both fractions are <0.5.
145 //
146 fPathIdx1 = 0;
147 if (neve>1) {
148 fPathIdx2 = static_cast<UInt_t>((neve-1)*fRuleFit->GetMethodRuleFit()->GetGDPathEveFrac());
149 }
150 else {
151 fPathIdx2 = 0;
152 }
153 //
154 // summarize weights
155 //
156 fNEveEffPath = 0;;
157 for (UInt_t ie=fPathIdx1; ie<fPathIdx2+1; ie++) {
158 fNEveEffPath += fRuleFit->GetTrainingEventWeight(ie);
159 }
160
161 fNEveEffPerf=0;
162 for (UInt_t ie=fPerfIdx1; ie<fPerfIdx2+1; ie++) {
163 fNEveEffPerf += fRuleFit->GetTrainingEventWeight(ie);
164 }
165 //
166 Log() << kVERBOSE << "Path constr. - event index range = [ " << fPathIdx1 << ", " << fPathIdx2 << " ]"
167 << ", effective N(events) = " << fNEveEffPath << Endl;
168 Log() << kVERBOSE << "Error estim. - event index range = [ " << fPerfIdx1 << ", " << fPerfIdx2 << " ]"
169 << ", effective N(events) = " << fNEveEffPerf << Endl;
170 //
171 if (fRuleEnsemble->DoRules())
172 Log() << kDEBUG << "Number of rules in ensemble: " << fNRules << Endl;
173 else
174 Log() << kDEBUG << "Rules are disabled " << Endl;
175
176 if (fRuleEnsemble->DoLinear())
177 Log() << kDEBUG << "Number of linear terms: " << fNLinear << Endl;
178 else
179 Log() << kDEBUG << "Linear terms are disabled " << Endl;
180}
181
182////////////////////////////////////////////////////////////////////////////////
183/// initializes the ntuple
184
186{
187 fGDNtuple= new TTree("MonitorNtuple_RuleFitParams","RuleFit path search");
188 fGDNtuple->Branch("risk", &fNTRisk, "risk/D");
189 fGDNtuple->Branch("error", &fNTErrorRate,"error/D");
190 fGDNtuple->Branch("nuval", &fNTNuval, "nuval/D");
191 fGDNtuple->Branch("coefrad", &fNTCoefRad, "coefrad/D");
192 fGDNtuple->Branch("offset", &fNTOffset, "offset/D");
193 //
194 fNTCoeff = (fNRules >0 ? new Double_t[fNRules] : 0);
195 fNTLinCoeff = (fNLinear>0 ? new Double_t[fNLinear] : 0);
196
197 for (UInt_t i=0; i<fNRules; i++) {
198 fGDNtuple->Branch(Form("a%d",i+1),&fNTCoeff[i],Form("a%d/D",i+1));
199 }
200 for (UInt_t i=0; i<fNLinear; i++) {
201 fGDNtuple->Branch(Form("b%d",i+1),&fNTLinCoeff[i],Form("b%d/D",i+1));
202 }
203}
204
205////////////////////////////////////////////////////////////////////////////////
206/// evaluate the average of each variable and f(x) in the given range
207
209 std::vector<Double_t> &avsel,
210 std::vector<Double_t> &avrul )
211{
212 UInt_t neve = ind2-ind1+1;
213 if (neve<1) {
214 Log() << kFATAL << "<EvaluateAverage> - no events selected for path search -> BUG!" << Endl;
215 }
216 //
217 avsel.clear();
218 avrul.clear();
219 //
220 if (fNLinear>0) avsel.resize(fNLinear,0);
221 if (fNRules>0) avrul.resize(fNRules,0);
222 const std::vector<UInt_t> *eventRuleMap=0;
223 Double_t ew;
224 Double_t sumew=0;
225 //
226 // Loop over events and calculate average of linear terms (normalised) and rule response.
227 //
228 if (fRuleEnsemble->IsRuleMapOK()) { // MakeRuleMap() has been called
229 for ( UInt_t i=ind1; i<ind2+1; i++) {
230 ew = fRuleFit->GetTrainingEventWeight(i);
231 sumew += ew;
232 for ( UInt_t sel=0; sel<fNLinear; sel++ ) {
233 avsel[sel] += ew*fRuleEnsemble->EvalLinEvent(i,sel);
234 }
235 // loop over rules
236 UInt_t nrules=0;
237 if (fRuleEnsemble->DoRules()) {
238 eventRuleMap = &(fRuleEnsemble->GetEventRuleMap(i));
239 nrules = (*eventRuleMap).size();
240 }
241 for (UInt_t r=0; r<nrules; r++) {
242 avrul[(*eventRuleMap)[r]] += ew;
243 }
244 }
245 }
246 else { // MakeRuleMap() has not yet been called
247 const std::vector<const Event *> *events = &(fRuleFit->GetTrainingEvents());
248 for ( UInt_t i=ind1; i<ind2+1; i++) {
249 ew = fRuleFit->GetTrainingEventWeight(i);
250 sumew += ew;
251 // first cache rule/lin response
252 /* Double_t val = */ fRuleEnsemble->EvalLinEvent(*((*events)[i]));
253 /* val = */ fRuleEnsemble->EvalEvent(*((*events)[i]));
254 // loop over linear terms
255 for ( UInt_t sel=0; sel<fNLinear; sel++ ) {
256 avsel[sel] += ew*fRuleEnsemble->GetEventLinearValNorm(sel);
257 }
258 // loop over rules
259 for (UInt_t r=0; r<fNRules; r++) {
260 avrul[r] += ew*fRuleEnsemble->GetEventRuleVal(r);
261 }
262 }
263 }
264 // average variable
265 for ( UInt_t sel=0; sel<fNLinear; sel++ ) {
266 avsel[sel] = avsel[sel] / sumew;
267 }
268 // average rule response, excl coeff
269 for (UInt_t r=0; r<fNRules; r++) {
270 avrul[r] = avrul[r] / sumew;
271 }
272}
273
274////////////////////////////////////////////////////////////////////////////////
275/// Implementation of squared-error ramp loss function (eq 39,40 in ref 1)
276/// This is used for binary Classifications where y = {+1,-1} for (sig,bkg)
277
279{
280 Double_t h = TMath::Max( -1.0, TMath::Min(1.0,fRuleEnsemble->EvalEvent( e )) );
281 Double_t diff = (fRuleFit->GetMethodRuleFit()->DataInfo().IsSignal(&e)?1:-1) - h;
282 //
283 return diff*diff*e.GetWeight();
284}
285
286////////////////////////////////////////////////////////////////////////////////
287/// Implementation of squared-error ramp loss function (eq 39,40 in ref 1)
288/// This is used for binary Classifications where y = {+1,-1} for (sig,bkg)
289
291{
292 Double_t h = TMath::Max( -1.0, TMath::Min(1.0,fRuleEnsemble->EvalEvent( evtidx )) );
293 Double_t diff = (fRuleFit->GetMethodRuleFit()->DataInfo().IsSignal(fRuleEnsemble->GetRuleMapEvent( evtidx ))?1:-1) - h;
294 //
295 return diff*diff*fRuleFit->GetTrainingEventWeight(evtidx);
296}
297
298////////////////////////////////////////////////////////////////////////////////
299/// Implementation of squared-error ramp loss function (eq 39,40 in ref 1)
300/// This is used for binary Classifications where y = {+1,-1} for (sig,bkg)
301
303{
304 Double_t e = fRuleEnsemble->EvalEvent( evtidx , fGDOfsTst[itau], fGDCoefTst[itau], fGDCoefLinTst[itau]);
305 Double_t h = TMath::Max( -1.0, TMath::Min(1.0,e) );
306 Double_t diff = (fRuleFit->GetMethodRuleFit()->DataInfo().IsSignal(fRuleEnsemble->GetRuleMapEvent( evtidx ))?1:-1) - h;
307 //
308 return diff*diff*fRuleFit->GetTrainingEventWeight(evtidx);
309}
310
311////////////////////////////////////////////////////////////////////////////////
312/// risk assessment
313
315{
316 UInt_t neve = ind2-ind1+1;
317 if (neve<1) {
318 Log() << kFATAL << "<Risk> Invalid start/end indices! BUG!!!" << Endl;
319 }
320 Double_t rval=0;
321 //
322 // const std::vector<Event *> *events = &(fRuleFit->GetTrainingEvents());
323 for ( UInt_t i=ind1; i<ind2+1; i++) {
324 rval += LossFunction(i);
325 }
326 rval = rval/neff;
327
328 return rval;
329}
330
331////////////////////////////////////////////////////////////////////////////////
332/// risk assessment for tau model `<itau>`
333
335{
336 UInt_t neve = ind2-ind1+1;
337 if (neve<1) {
338 Log() << kFATAL << "<Risk> Invalid start/end indices! BUG!!!" << Endl;
339 }
340 Double_t rval=0;
341 //
342 // const std::vector<Event *> *events = &(fRuleFit->GetTrainingEvents());
343 for ( UInt_t i=ind1; i<ind2+1; i++) {
344 rval += LossFunction(i,itau);
345 }
346 rval = rval/neff;
347
348 return rval;
349}
350
351////////////////////////////////////////////////////////////////////////////////
352/// This is the "lasso" penalty
353/// To be used for regression.
354/// --- NOT USED ---
355
357{
358 Log() << kWARNING << "<Penalty> Using unverified code! Check!" << Endl;
359 Double_t rval=0;
360 const std::vector<Double_t> *lincoeff = & (fRuleEnsemble->GetLinCoefficients());
361 for (UInt_t i=0; i<fNRules; i++) {
362 rval += TMath::Abs(fRuleEnsemble->GetRules(i)->GetCoefficient());
363 }
364 for (UInt_t i=0; i<fNLinear; i++) {
365 rval += TMath::Abs((*lincoeff)[i]);
366 }
367 return rval;
368}
369
370////////////////////////////////////////////////////////////////////////////////
371/// Initialize GD path search
372
374{
375 if (fGDNTau<2) {
376 fGDNTau = 1;
377 fGDTauScan = 0;
378 }
379 if (fGDTau<0.0) {
380 // fGDNTau = 50; already set in MethodRuleFit
381 fGDTauScan = 1000;
382 fGDTauMin = 0.0;
383 fGDTauMax = 1.0;
384 }
385 else {
386 fGDNTau = 1;
387 fGDTauScan = 0;
388 }
389 // set all taus
390 fGDTauVec.clear();
391 fGDTauVec.resize( fGDNTau );
392 if (fGDNTau==1) {
393 fGDTauVec[0] = fGDTau;
394 }
395 else {
396 // set tau vector - TODO: make a smarter choice of range in tau
397 Double_t dtau = (fGDTauMax - fGDTauMin)/static_cast<Double_t>(fGDNTau-1);
398 for (UInt_t itau=0; itau<fGDNTau; itau++) {
399 fGDTauVec[itau] = static_cast<Double_t>(itau)*dtau + fGDTauMin;
400 if (fGDTauVec[itau]>1.0) fGDTauVec[itau]=1.0;
401 }
402 }
403 // initialize path search vectors
404
405 fGradVec.clear();
406 fGradVecLin.clear();
407 fGradVecTst.clear();
408 fGradVecLinTst.clear();
409 fGDErrTst.clear();
410 fGDErrTstOK.clear();
411 fGDOfsTst.clear();
412 fGDCoefTst.clear();
413 fGDCoefLinTst.clear();
414 //
415 // rules
416 //
417 fGDCoefTst.resize(fGDNTau);
418 fGradVec.resize(fNRules,0);
419 fGradVecTst.resize(fGDNTau);
420 for (UInt_t i=0; i<fGDNTau; i++) {
421 fGradVecTst[i].resize(fNRules,0);
422 fGDCoefTst[i].resize(fNRules,0);
423 }
424 //
425 // linear terms
426 //
427 fGDCoefLinTst.resize(fGDNTau);
428 fGradVecLin.resize(fNLinear,0);
429 fGradVecLinTst.resize(fGDNTau);
430 for (UInt_t i=0; i<fGDNTau; i++) {
431 fGradVecLinTst[i].resize(fNLinear,0);
432 fGDCoefLinTst[i].resize(fNLinear,0);
433 }
434 //
435 // error, coefs etc
436 //
437 fGDErrTst.resize(fGDNTau,0);
438 fGDErrTstOK.resize(fGDNTau,kTRUE);
439 fGDOfsTst.resize(fGDNTau,0);
440 fGDNTauTstOK = fGDNTau;
441 //
442 // calculate average selectors and rule responses for the path sample size
443 //
444}
445
446////////////////////////////////////////////////////////////////////////////////
447/// This finds the cutoff parameter tau by scanning several different paths
448
450{
451 if (fGDNTau<2) return 0;
452 if (fGDTauScan==0) return 0;
453
454 if (fGDOfsTst.size()<1)
455 Log() << kFATAL << "BUG! FindGDTau() has been called BEFORE InitGD()." << Endl;
456 //
457 Log() << kINFO << "Estimating the cutoff parameter tau. The estimated time is a pessimistic maximum." << Endl;
458 //
459 // Find how many points to scan and how often to calculate the error
460 UInt_t nscan = fGDTauScan; //std::min(static_cast<Int_t>(fGDTauScan),fGDNPathSteps);
461 UInt_t netst = std::min(nscan,UInt_t(100));
462 UInt_t nscanned=0;
463 //
464 //--------------------
465 // loop over the paths
466 //--------------------
467 // The number of MAXIMUM loops is given by nscan.
468 // At each loop, the paths being far away from the minimum
469 // are rejected. Hence at each check (every netst events), the number
470 // of paths searched will be reduced.
471 // The maximum 'distance' from the minimum error rate is
472 // 1 sigma. See RiskPerfTst() for details.
473 //
474 Bool_t doloop=kTRUE;
475 UInt_t ip=0;
476 UInt_t itauMin=0;
477 Timer timer( nscan, "RuleFit" );
478 while (doloop) {
479 // make gradvec
480 MakeTstGradientVector();
481 // update coefs
482 UpdateTstCoefficients();
483 // estimate error and do the sum
484 // do this at index=0, netst-1, 2*netst-1 ...
485 nscanned++;
486 if ( (ip==0) || ((ip+1)%netst==0) ) {
487 // ErrorRateRocTst( );
488 itauMin = RiskPerfTst();
489 Log() << kVERBOSE << Form("%4d",ip+1) << ". tau = " << Form("%4.4f",fGDTauVec[itauMin])
490 << " => error rate = " << fGDErrTst[itauMin] << Endl;
491 }
492 ip++;
493 doloop = ((ip<nscan) && (fGDNTauTstOK>3));
495 if (Log().GetMinType()>kVERBOSE)
496 timer.DrawProgressBar(ip);
497 }
498 //
499 // Set tau and coefs
500 // Downscale tau slightly in order to avoid numerical problems
501 //
502 if (nscanned==0) {
503 Log() << kERROR << "<FindGDTau> number of scanned loops is zero! Should NOT see this message." << Endl;
504 }
505 fGDTau = fGDTauVec[itauMin];
506 fRuleEnsemble->SetCoefficients( fGDCoefTst[itauMin] );
507 fRuleEnsemble->SetLinCoefficients( fGDCoefLinTst[itauMin] );
508 fRuleEnsemble->SetOffset( fGDOfsTst[itauMin] );
509 Log() << kINFO << "Best path found with tau = " << Form("%4.4f",fGDTau)
510 << " after " << timer.GetElapsedTime() << " " << Endl;
511
512 return nscan;
513}
514
515////////////////////////////////////////////////////////////////////////////////
516/// The following finds the gradient directed path in parameter space.
517/// More work is needed... FT, 24/9/2006
518///
519/// The algorithm is currently as follows (if not otherwise stated, the sample
520/// used below is [fPathIdx1,fPathIdx2]):
521///
522/// 1. Set offset to -average(y(true)) and all coefs=0 => average of F(x)==0
523/// 2. FindGDTau() : start scanning using several paths defined by different tau
524/// choose the tau yielding the best path
525/// 3. start the scanning the chosen path
526/// 4. check error rate at a given frequency
527/// data used for check: [fPerfIdx1,fPerfIdx2]
528/// 5. stop when either of the following conditions are fullfilled:
529/// 1. loop index==fGDNPathSteps
530/// 2. error > fGDErrScale*errmin
531/// 3. only in DEBUG mode: risk is not monotonously decreasing
532///
533/// The algorithm will warn if:
534/// 1. the error rate was still decreasing when loop finished -> increase fGDNPathSteps!
535/// 2. minimum was found at an early stage -> decrease fGDPathStep
536/// 3. DEBUG: risk > previous risk -> entered chaotic region (regularization is too small)
537
539{
540 Log() << kINFO << "GD path scan - the scan stops when the max num. of steps is reached or a min is found"
541 << Endl;
542 Log() << kVERBOSE << "Number of events used per path step = " << fPathIdx2-fPathIdx1+1 << Endl;
543 Log() << kVERBOSE << "Number of events used for error estimation = " << fPerfIdx2-fPerfIdx1+1 << Endl;
544
545 // check if debug mode
546 const Bool_t isVerbose = (Log().GetMinType()<=kVERBOSE);
547 const Bool_t isDebug = (Log().GetMinType()<=kDEBUG);
548
549 // init GD parameters and clear coeff vectors
550 InitGD();
551
552 // evaluate average response of rules/linear terms (with event weights)
553 EvaluateAveragePath();
554 EvaluateAveragePerf();
555
556 // initial estimate; all other a(i) are zero
557 Log() << kVERBOSE << "Creating GD path" << Endl;
558 Log() << kVERBOSE << " N(steps) = " << fGDNPathSteps << Endl;
559 Log() << kVERBOSE << " step = " << fGDPathStep << Endl;
560 Log() << kVERBOSE << " N(tau) = " << fGDNTau << Endl;
561 Log() << kVERBOSE << " N(tau steps) = " << fGDTauScan << Endl;
562 Log() << kVERBOSE << " tau range = [ " << fGDTauVec[0] << " , " << fGDTauVec[fGDNTau-1] << " ]" << Endl;
563
564 // init ntuple
565 if (isDebug) InitNtuple();
566
567 // DEBUG: risk scan
568 Int_t nbadrisk=0; // number of points where risk(i+1)>risk(i)
569 Double_t trisk=0; // time per risk evaluation
570 Double_t strisk=0; // total time
571 Double_t rprev=1e32; // previous risk
572
573 // parameters set at point with min error
574 Double_t errmin=1e32; // min error
575 // Double_t riskMin=0; // risk
576 Int_t indMin=-1; // index
577 std::vector<Double_t> coefsMin; // rule coefs
578 std::vector<Double_t> lincoefsMin; // linear coefs
579 Double_t offsetMin; // offset
580
581
582 // DEBUG: timing
583 clock_t t0=0;
584 Double_t tgradvec;
585 Double_t tupgrade;
586 Double_t tperf;
587 Double_t stgradvec=0;
588 Double_t stupgrade=0;
589 Double_t stperf=0;
590
591 // linear regression to estimate slope of error rate evolution
592 const UInt_t npreg=5;
593 std::vector<Double_t> valx;
594 std::vector<Double_t> valy;
595 std::vector<Double_t> valxy;
596
597 // loop related
598 Bool_t docheck; // true if an error rate check is to be done
599 Int_t iloop=0; // loop index
600 Bool_t found=kFALSE; // true if minimum is found
601 Bool_t riskFlat=kFALSE; // DEBUG: flag is true if risk evolution behaves badly
602 Bool_t done = kFALSE; // flag that the scan is done
603
604 // calculate how often to check error rate
605 int imod = fGDNPathSteps/100;
606 if (imod<100) imod = std::min(100,fGDNPathSteps);
607 if (imod>100) imod=100;
608
609 // reset coefficients
610 fAverageTruth = -CalcAverageTruth();
611 offsetMin = fAverageTruth;
612 fRuleEnsemble->SetOffset(offsetMin);
613 fRuleEnsemble->ClearCoefficients(0);
614 fRuleEnsemble->ClearLinCoefficients(0);
615 for (UInt_t i=0; i<fGDOfsTst.size(); i++) {
616 fGDOfsTst[i] = offsetMin;
617 }
618 Log() << kVERBOSE << "Obtained initial offset = " << offsetMin << Endl;
619
620 // find the best tau - returns the number of steps performed in scan
621 Int_t nprescan = FindGDTau();
622
623 //
624 //
625 // calculate F*
626 //
627 // CalcFStar(fPerfIdx1, fPerfIdx2);
628 //
629
630 // set some ntuple values
631 fNTRisk = rprev;
632 fNTCoefRad = -1.0;
633 fNTErrorRate = 0;
634
635 // a local flag indicating for what reason the search was stopped
636 Int_t stopCondition=0;
637
638 Log() << kINFO << "Fitting model..." << Endl;
639 // start loop with timer
640 Timer timer( fGDNPathSteps, "RuleFit" );
641 Log() << kWARNING;
642 while (!done) {
643 // Make gradient vector (eq 44, ref 1)
644 if (isVerbose) t0 = clock();
645 MakeGradientVector();
646 if (isVerbose) {
647 tgradvec = Double_t(clock()-t0)/CLOCKS_PER_SEC;
648 stgradvec += tgradvec;
649 }
650
651 // Calculate the direction in parameter space (eq 25, ref 1) and update coeffs (eq 22, ref 1)
652 if (isVerbose) t0 = clock();
653 UpdateCoefficients();
654 if (isVerbose) {
655 tupgrade = Double_t(clock()-t0)/CLOCKS_PER_SEC;
656 stupgrade += tupgrade;
657 }
658
659 // don't check error rate every loop
660 docheck = ((iloop==0) ||((iloop+1)%imod==0));
661
662 if (docheck) {
663 // draw progressbar only if not debug
664 if (!isVerbose)
665 timer.DrawProgressBar(iloop);
666 fNTNuval = Double_t(iloop)*fGDPathStep;
667 fNTRisk = 0.0;
668
669 // check risk evolution
670
671 if (isDebug) FillCoefficients();
672 fNTCoefRad = fRuleEnsemble->CoefficientRadius();
673
674 // calculate risk
675 t0 = clock();
676 fNTRisk = RiskPath();
677 trisk = Double_t(clock()-t0)/CLOCKS_PER_SEC;
678 strisk += trisk;
679 //
680 // Check for an increase in risk.
681 // Such an increase would imply that the regularization is too small.
682 // Stop the iteration if this happens.
683 //
684 if (fNTRisk>=rprev) {
685 if (fNTRisk>rprev) {
686 nbadrisk++;
687 Log() << "Risk(i+1)>=Risk(i) in path" << Endl;
688 riskFlat=(nbadrisk>3);
689 if (riskFlat) {
690 Log() << kWARNING << "Chaotic behaviour of risk evolution" << Endl;
691 Log() << "--- STOPPING MINIMISATION ---" << Endl;
692 Log() << "This may be OK if minimum is already found" << Endl;
693 }
694 }
695 }
696 rprev = fNTRisk;
697 //
698 // Estimate the error rate using cross validation
699 // Well, not quite full cross validation since we only
700 // use ONE model.
701 //
702 if (isVerbose) t0 = clock();
703 fNTErrorRate = 0;
704
705 // Check error rate
706 Double_t errroc;//= ErrorRateRoc();
707 Double_t riskPerf = RiskPerf();
708 // Double_t optimism = Optimism();
709 //
710 errroc = riskPerf;
711 //
712 fNTErrorRate = errroc;
713 //
714 if (isVerbose) {
715 tperf = Double_t(clock()-t0)/CLOCKS_PER_SEC;
716 stperf +=tperf;
717 }
718 //
719 // Always take the last min.
720 // For each step the risk is reduced.
721 //
722 if (fNTErrorRate<=errmin) {
723 errmin = fNTErrorRate;
724 // riskMin = fNTRisk;
725 indMin = iloop;
726 fRuleEnsemble->GetCoefficients(coefsMin);
727 lincoefsMin = fRuleEnsemble->GetLinCoefficients();
728 offsetMin = fRuleEnsemble->GetOffset();
729 }
730 if ( fNTErrorRate > fGDErrScale*errmin) found = kTRUE;
731 //
732 // check slope of last couple of points
733 //
734 if (valx.size()==npreg) {
735 valx.erase(valx.begin());
736 valy.erase(valy.begin());
737 valxy.erase(valxy.begin());
738 }
739 valx.push_back(fNTNuval);
740 valy.push_back(fNTErrorRate);
741 valxy.push_back(fNTErrorRate*fNTNuval);
742
744
745 //
746 if (isDebug) fGDNtuple->Fill();
747 if (isVerbose) {
748 Log() << kVERBOSE << "ParamsIRE : "
749 << std::setw(10)
750 << Form("%8d",iloop+1) << " "
751 << Form("%4.4f",fNTRisk) << " "
752 << Form("%4.4f",riskPerf) << " "
753 << Form("%4.4f",fNTRisk+riskPerf) << " "
754 // << Form("%4.4f",fsigave+fbkgave) << " "
755 // << Form("%4.4f",fsigave) << " "
756 // << Form("%4.4f",fsigrms) << " "
757 // << Form("%4.4f",fbkgave) << " "
758 // << Form("%4.4f",fbkgrms) << " "
759
760 // << Form("%4.4f",fRuleEnsemble->CoefficientRadius())
761 << Endl;
762 }
763 }
764 iloop++;
765 // Stop iteration under various conditions
766 // * The condition R(i+1)<R(i) is no longer true (when then implicit regularization is too weak)
767 // * If the current error estimate is > factor*errmin (factor = 1.1)
768 // * We have reach the last step...
769 Bool_t endOfLoop = (iloop==fGDNPathSteps);
770 if ( ((riskFlat) || (endOfLoop)) && (!found) ) {
771 if (riskFlat) {
772 stopCondition = 1;
773 }
774 else if (endOfLoop) {
775 stopCondition = 2;
776 }
777 if (indMin<0) {
778 Log() << kWARNING << "BUG TRAP: should not be here - still, this bug is harmless;)" << Endl;
779 errmin = fNTErrorRate;
780 // riskMin = fNTRisk;
781 indMin = iloop;
782 fRuleEnsemble->GetCoefficients(coefsMin);
783 lincoefsMin = fRuleEnsemble->GetLinCoefficients();
784 offsetMin = fRuleEnsemble->GetOffset();
785 }
786 found = kTRUE;
787 }
788 done = (found);
789 }
790 Log() << Endl;
791 Log() << kINFO << "Minimisation elapsed time : " << timer.GetElapsedTime() << " " << Endl;
792 Log() << kINFO << "----------------------------------------------------------------" << Endl;
793 Log() << kINFO << "Found minimum at step " << indMin+1 << " with error = " << errmin << Endl;
794 Log() << kINFO << "Reason for ending loop: ";
795 switch (stopCondition) {
796 case 0:
797 Log() << kINFO << "clear minima found";
798 break;
799 case 1:
800 Log() << kINFO << "chaotic behaviour of risk";
801 break;
802 case 2:
803 Log() << kINFO << "end of loop reached";
804 break;
805 default:
806 Log() << kINFO << "unknown!";
807 break;
808 }
809 Log() << Endl;
810 Log() << kINFO << "----------------------------------------------------------------" << Endl;
811
812 // check if early minima - might be an indication of too large stepsize
813 if ( Double_t(indMin)/Double_t(nprescan+fGDNPathSteps) < 0.05 ) {
814 Log() << kWARNING << "Reached minimum early in the search" << Endl;
815 Log() << "Check results and maybe decrease GDStep size" << Endl;
816 }
817 //
818 // quick check of the sign of the slope for the last npreg points
819 //
820 Double_t sumx = std::accumulate( valx.begin(), valx.end(), Double_t() );
821 Double_t sumxy = std::accumulate( valxy.begin(), valxy.end(), Double_t() );
822 Double_t sumy = std::accumulate( valy.begin(), valy.end(), Double_t() );
823 Double_t slope = Double_t(valx.size())*sumxy - sumx*sumy;
824 if (slope<0) {
825 Log() << kINFO << "The error rate was still decreasing at the end of the path" << Endl;
826 Log() << kINFO << "Increase number of steps (GDNSteps)." << Endl;
827 }
828 //
829 // set coefficients
830 //
831 if (found) {
832 fRuleEnsemble->SetCoefficients( coefsMin );
833 fRuleEnsemble->SetLinCoefficients( lincoefsMin );
834 fRuleEnsemble->SetOffset( offsetMin );
835 }
836 else {
837 Log() << kFATAL << "BUG TRAP: minimum not found in MakeGDPath()" << Endl;
838 }
839
840 //
841 // print timing info (VERBOSE mode)
842 //
843 if (isVerbose) {
844 Double_t stloop = strisk +stupgrade + stgradvec + stperf;
845 Log() << kVERBOSE << "Timing per loop (ms):" << Endl;
846 Log() << kVERBOSE << " gradvec = " << 1000*stgradvec/iloop << Endl;
847 Log() << kVERBOSE << " upgrade = " << 1000*stupgrade/iloop << Endl;
848 Log() << kVERBOSE << " risk = " << 1000*strisk/iloop << Endl;
849 Log() << kVERBOSE << " perf = " << 1000*stperf/iloop << Endl;
850 Log() << kVERBOSE << " loop = " << 1000*stloop/iloop << Endl;
851 //
852 Log() << kVERBOSE << " GDInit = " << 1000*gGDInit/iloop << Endl;
853 Log() << kVERBOSE << " GDPtr = " << 1000*gGDPtr/iloop << Endl;
854 Log() << kVERBOSE << " GDEval = " << 1000*gGDEval/iloop << Endl;
855 Log() << kVERBOSE << " GDEvalRule = " << 1000*gGDEvalRule/iloop << Endl;
856 Log() << kVERBOSE << " GDNorm = " << 1000*gGDNorm/iloop << Endl;
857 Log() << kVERBOSE << " GDRuleLoop = " << 1000*gGDRuleLoop/iloop << Endl;
858 Log() << kVERBOSE << " GDLinLoop = " << 1000*gGDLinLoop/iloop << Endl;
859 }
860 //
861 // write ntuple (DEBUG)
862 if (isDebug) fGDNtuple->Write();
863}
864
865////////////////////////////////////////////////////////////////////////////////
866/// helper function to store the rule coefficients in local arrays
867
869{
870 fNTOffset = fRuleEnsemble->GetOffset();
871 //
872 for (UInt_t i=0; i<fNRules; i++) {
873 fNTCoeff[i] = fRuleEnsemble->GetRules(i)->GetCoefficient();
874 }
875 for (UInt_t i=0; i<fNLinear; i++) {
876 fNTLinCoeff[i] = fRuleEnsemble->GetLinCoefficients(i);
877 }
878}
879
880////////////////////////////////////////////////////////////////////////////////
881/// Estimates F* (optimum scoring function) for all events for the given sets.
882/// The result is used in ErrorRateReg().
883/// --- NOT USED ---
884
886{
887 Log() << kWARNING << "<CalcFStar> Using unverified code! Check!" << Endl;
888 UInt_t neve = fPerfIdx2-fPerfIdx1+1;
889 if (neve<1) {
890 Log() << kFATAL << "<CalcFStar> Invalid start/end indices!" << Endl;
891 return;
892 }
893 //
894 const std::vector<const Event *> *events = &(fRuleFit->GetTrainingEvents());
895 //
896 fFstar.clear();
897 std::vector<Double_t> fstarSorted;
898 Double_t fstarVal;
899 // loop over all events and estimate F* for each event
900 for (UInt_t i=fPerfIdx1; i<fPerfIdx2+1; i++) {
901 const Event& e = *(*events)[i];
902 fstarVal = fRuleEnsemble->FStar(e);
903 fFstar.push_back(fstarVal);
904 fstarSorted.push_back(fstarVal);
905 if (TMath::IsNaN(fstarVal)) Log() << kFATAL << "F* is NAN!" << Endl;
906 }
907 // sort F* and find median
908 std::sort( fstarSorted.begin(), fstarSorted.end() );
909 UInt_t ind = neve/2;
910 if (neve&1) { // odd number of events
911 fFstarMedian = 0.5*(fstarSorted[ind]+fstarSorted[ind-1]);
912 }
913 else { // even
914 fFstarMedian = fstarSorted[ind];
915 }
916}
917
918////////////////////////////////////////////////////////////////////////////////
919/// implementation of eq. 7.17 in Hastie,Tibshirani & Friedman book
920/// this is the covariance between the estimated response yhat and the
921/// true value y.
922/// NOT REALLY SURE IF THIS IS CORRECT!
923/// --- THIS IS NOT USED ---
924
926{
927 Log() << kWARNING << "<Optimism> Using unverified code! Check!" << Endl;
928 UInt_t neve = fPerfIdx2-fPerfIdx1+1;
929 if (neve<1) {
930 Log() << kFATAL << "<Optimism> Invalid start/end indices!" << Endl;
931 }
932 //
933 const std::vector<const Event *> *events = &(fRuleFit->GetTrainingEvents());
934 //
935 Double_t sumy=0;
936 Double_t sumyhat=0;
937 Double_t sumyhaty=0;
938 Double_t sumw2=0;
939 Double_t yhat;
940 Double_t y;
941 Double_t w;
942 //
943 for (UInt_t i=fPerfIdx1; i<fPerfIdx2+1; i++) {
944 const Event& e = *(*events)[i];
945 yhat = fRuleEnsemble->EvalEvent(i); // evaluated using the model
946 y = (fRuleFit->GetMethodRuleFit()->DataInfo().IsSignal(&e) ? 1.0:-1.0); // the truth
947 w = fRuleFit->GetTrainingEventWeight(i)/fNEveEffPerf; // the weight, reweighted such that sum=1
948 sumy += w*y;
949 sumyhat += w*yhat;
950 sumyhaty += w*yhat*y;
951 sumw2 += w*w;
952 }
953 Double_t div = 1.0-sumw2;
954 Double_t cov = sumyhaty - sumyhat*sumy;
955 return 2.0*cov/div;
956}
957
958////////////////////////////////////////////////////////////////////////////////
959/// Estimates the error rate with the current set of parameters
960/// This code is pretty messy at the moment.
961/// Cleanup is needed.
962/// -- NOT USED ---
963
965{
966 Log() << kWARNING << "<ErrorRateReg> Using unverified code! Check!" << Endl;
967 UInt_t neve = fPerfIdx2-fPerfIdx1+1;
968 if (neve<1) {
969 Log() << kFATAL << "<ErrorRateReg> Invalid start/end indices!" << Endl;
970 }
971 if (fFstar.size()!=neve) {
972 Log() << kFATAL << "--- RuleFitParams::ErrorRateReg() - F* not initialized! BUG!!!"
973 << " Fstar.size() = " << fFstar.size() << " , N(events) = " << neve << Endl;
974 }
975 //
976 Double_t sF;
977 //
978 const std::vector<const Event *> *events = &(fRuleFit->GetTrainingEvents());
979 //
980 Double_t sumdf = 0;
981 Double_t sumdfmed = 0;
982 //
983 // A bit messy here.
984 // I believe the binary error classification is appropriate here.
985 // The problem is stability.
986 //
987 for (UInt_t i=fPerfIdx1; i<fPerfIdx2+1; i++) {
988 const Event& e = *(*events)[i];
989 sF = fRuleEnsemble->EvalEvent( e );
990 // scaled abs error, eq 20 in RuleFit paper
991 sumdf += TMath::Abs(fFstar[i-fPerfIdx1] - sF);
992 sumdfmed += TMath::Abs(fFstar[i-fPerfIdx1] - fFstarMedian);
993 }
994 // scaled abs error, eq 20
995 // This error (df) is large - need to think on how to compensate...
996 //
997 return sumdf/sumdfmed;
998}
999
1000////////////////////////////////////////////////////////////////////////////////
1001///
1002/// Estimates the error rate with the current set of parameters
1003/// It uses a binary estimate of (y-F*(x))
1004/// (y-F*(x)) = (Num of events where sign(F)!=sign(y))/Neve
1005/// y = {+1 if event is signal, -1 otherwise}
1006/// --- NOT USED ---
1007
1009{
1010 Log() << kWARNING << "<ErrorRateBin> Using unverified code! Check!" << Endl;
1011 UInt_t neve = fPerfIdx2-fPerfIdx1+1;
1012 if (neve<1) {
1013 Log() << kFATAL << "<ErrorRateBin> Invalid start/end indices!" << Endl;
1014 }
1015 //
1016 const std::vector<const Event *> *events = &(fRuleFit->GetTrainingEvents());
1017 //
1018 Double_t sumdfbin = 0;
1019 Double_t dneve = Double_t(neve);
1020 Int_t signF, signy;
1021 Double_t sF;
1022 //
1023 for (UInt_t i=fPerfIdx1; i<fPerfIdx2+1; i++) {
1024 const Event& e = *(*events)[i];
1025 sF = fRuleEnsemble->EvalEvent( e );
1026 // Double_t sFstar = fRuleEnsemble->FStar(e); // THIS CAN BE CALCULATED ONCE!
1027 signF = (sF>0 ? +1:-1);
1028 // signy = (sFStar>0 ? +1:-1);
1029 signy = (fRuleFit->GetMethodRuleFit()->DataInfo().IsSignal(&e) ? +1:-1);
1030 sumdfbin += TMath::Abs(Double_t(signF-signy))*0.5;
1031 }
1032 Double_t f = sumdfbin/dneve;
1033 // Double_t df = f*TMath::Sqrt((1.0/sumdfbin) + (1.0/dneve));
1034 return f;
1035}
1036
1037////////////////////////////////////////////////////////////////////////////////
1038/// Estimates the error rate with the current set of parameters.
1039/// It calculates the area under the bkg rejection vs signal efficiency curve.
1040/// The value returned is 1-area.
1041
1042Double_t TMVA::RuleFitParams::ErrorRateRocRaw( std::vector<Double_t> & sFsig,
1043 std::vector<Double_t> & sFbkg )
1044
1045{
1046 std::sort(sFsig.begin(), sFsig.end());
1047 std::sort(sFbkg.begin(), sFbkg.end());
1048 const Double_t minsig = sFsig.front();
1049 const Double_t minbkg = sFbkg.front();
1050 const Double_t maxsig = sFsig.back();
1051 const Double_t maxbkg = sFbkg.back();
1052 const Double_t minf = std::min(minsig,minbkg);
1053 const Double_t maxf = std::max(maxsig,maxbkg);
1054 const Int_t nsig = Int_t(sFsig.size());
1055 const Int_t nbkg = Int_t(sFbkg.size());
1056 const Int_t np = std::min((nsig+nbkg)/4,50);
1057 const Double_t df = (maxf-minf)/(np-1);
1058 //
1059 // calculate area under rejection/efficiency curve
1060 //
1061 Double_t fcut;
1062 std::vector<Double_t>::const_iterator indit;
1063 Int_t nrbkg;
1064 Int_t nesig;
1065 Int_t pnesig=0;
1066 Double_t rejb=0;
1067 Double_t effs=1.0;
1068 Double_t prejb=0;
1069 Double_t peffs=1.0;
1070 // Double_t drejb;
1071 Double_t deffs;
1072 Double_t area=0;
1073 Int_t npok=0;
1074 //
1075 // loop over range of F [minf,maxf]
1076 //
1077 for (Int_t i=0; i<np; i++) {
1078 fcut = minf + df*Double_t(i);
1079 indit = std::find_if(sFsig.begin(), sFsig.end(),
1080 std::bind(std::greater_equal<Double_t>(), std::placeholders::_1, fcut));
1081 nesig = sFsig.end()-indit; // number of sig accepted with F>cut
1082 if (TMath::Abs(pnesig-nesig)>0) {
1083 npok++;
1084 indit = std::find_if(sFbkg.begin(), sFbkg.end(),
1085 std::bind(std::greater_equal<Double_t>(), std::placeholders::_1, fcut));
1086 nrbkg = indit-sFbkg.begin(); // number of bkg rejected with F>cut
1087 rejb = Double_t(nrbkg)/Double_t(nbkg);
1088 effs = Double_t(nesig)/Double_t(nsig);
1089 //
1090 // drejb = rejb-prejb;
1091 deffs = effs-peffs;
1092 area += 0.5*TMath::Abs(deffs)*(rejb+prejb); // trapezoid
1093 prejb = rejb;
1094 peffs = effs;
1095 }
1096 pnesig = nesig;
1097 }
1098 area += 0.5*(1+rejb)*effs; // extrapolate to the end point
1099
1100 return (1.0-area);
1101}
1102
1103////////////////////////////////////////////////////////////////////////////////
1104/// Estimates the error rate with the current set of parameters.
1105/// It calculates the area under the bkg rejection vs signal efficiency curve.
1106/// The value returned is 1-area.
1107/// This works but is less efficient than calculating the Risk using RiskPerf().
1108
1110{
1111 Log() << kWARNING << "<ErrorRateRoc> Should not be used in the current version! Check!" << Endl;
1112 UInt_t neve = fPerfIdx2-fPerfIdx1+1;
1113 if (neve<1) {
1114 Log() << kFATAL << "<ErrorRateRoc> Invalid start/end indices!" << Endl;
1115 }
1116 //
1117 const std::vector<const Event *> *events = &(fRuleFit->GetTrainingEvents());
1118 //
1119 Double_t sF;
1120 //
1121 std::vector<Double_t> sFsig;
1122 std::vector<Double_t> sFbkg;
1123 Double_t sumfsig=0;
1124 Double_t sumfbkg=0;
1125 Double_t sumf2sig=0;
1126 Double_t sumf2bkg=0;
1127 //
1128 for (UInt_t i=fPerfIdx1; i<fPerfIdx2+1; i++) {
1129 const Event& e = *(*events)[i];
1130 sF = fRuleEnsemble->EvalEvent(i);// * fRuleFit->GetTrainingEventWeight(i);
1131 if (fRuleFit->GetMethodRuleFit()->DataInfo().IsSignal(&e)) {
1132 sFsig.push_back(sF);
1133 sumfsig +=sF;
1134 sumf2sig +=sF*sF;
1135 }
1136 else {
1137 sFbkg.push_back(sF);
1138 sumfbkg +=sF;
1139 sumf2bkg +=sF*sF;
1140 }
1141 }
1142 fsigave = sumfsig/sFsig.size();
1143 fbkgave = sumfbkg/sFbkg.size();
1144 fsigrms = TMath::Sqrt(gTools().ComputeVariance(sumf2sig,sumfsig,sFsig.size()));
1145 fbkgrms = TMath::Sqrt(gTools().ComputeVariance(sumf2bkg,sumfbkg,sFbkg.size()));
1146 //
1147 return ErrorRateRocRaw( sFsig, sFbkg );
1148}
1149
1150////////////////////////////////////////////////////////////////////////////////
1151/// Estimates the error rate with the current set of parameters.
1152/// It calculates the area under the bkg rejection vs signal efficiency curve.
1153/// The value returned is 1-area.
1154///
1155/// See comment under ErrorRateRoc().
1156
1158{
1159 Log() << kWARNING << "<ErrorRateRocTst> Should not be used in the current version! Check!" << Endl;
1160 UInt_t neve = fPerfIdx2-fPerfIdx1+1;
1161 if (neve<1) {
1162 Log() << kFATAL << "<ErrorRateRocTst> Invalid start/end indices!" << Endl;
1163 return;
1164 }
1165 //
1166 const std::vector<const Event *> *events = &(fRuleFit->GetTrainingEvents());
1167 //
1168 // std::vector<Double_t> sF;
1169 Double_t sF;
1170 std::vector< std::vector<Double_t> > sFsig;
1171 std::vector< std::vector<Double_t> > sFbkg;
1172 //
1173 sFsig.resize( fGDNTau );
1174 sFbkg.resize( fGDNTau );
1175 // sF.resize( fGDNTau );
1176
1177 for (UInt_t i=fPerfIdx1; i<fPerfIdx2+1; i++) {
1178 for (UInt_t itau=0; itau<fGDNTau; itau++) {
1179 // if (itau==0) sF = fRuleEnsemble->EvalEvent( *(*events)[i], fGDOfsTst[itau], fGDCoefTst[itau], fGDCoefLinTst[itau] );
1180 // else sF = fRuleEnsemble->EvalEvent( fGDOfsTst[itau], fGDCoefTst[itau], fGDCoefLinTst[itau] );
1181 sF = fRuleEnsemble->EvalEvent( i, fGDOfsTst[itau], fGDCoefTst[itau], fGDCoefLinTst[itau] );
1182 if (fRuleFit->GetMethodRuleFit()->DataInfo().IsSignal((*events)[i])) {
1183 sFsig[itau].push_back(sF);
1184 }
1185 else {
1186 sFbkg[itau].push_back(sF);
1187 }
1188 }
1189 }
1190 Double_t err;
1191
1192 for (UInt_t itau=0; itau<fGDNTau; itau++) {
1193 err = ErrorRateRocRaw( sFsig[itau], sFbkg[itau] );
1194 fGDErrTst[itau] = err;
1195 }
1196}
1197
1198////////////////////////////////////////////////////////////////////////////////
1199/// Estimates the error rate with the current set of parameters.
1200/// using the `<Perf>` subsample.
1201/// Return the tau index giving the lowest error
1202
1204{
1205 UInt_t neve = fPerfIdx2-fPerfIdx1+1;
1206 if (neve<1) {
1207 Log() << kFATAL << "<ErrorRateRocTst> Invalid start/end indices!" << Endl;
1208 return 0;
1209 }
1210 //
1211 Double_t sumx = 0;
1212 Double_t sumx2 = 0;
1213 Double_t maxx = -100.0;
1214 Double_t minx = 1e30;
1215 UInt_t itaumin = 0;
1216 UInt_t nok=0;
1217 for (UInt_t itau=0; itau<fGDNTau; itau++) {
1218 if (fGDErrTstOK[itau]) {
1219 nok++;
1220 fGDErrTst[itau] = RiskPerf(itau);
1221 sumx += fGDErrTst[itau];
1222 sumx2 += fGDErrTst[itau]*fGDErrTst[itau];
1223 if (fGDErrTst[itau]>maxx) maxx=fGDErrTst[itau];
1224 if (fGDErrTst[itau]<minx) {
1225 minx=fGDErrTst[itau];
1226 itaumin = itau;
1227 }
1228 }
1229 }
1230 Double_t sigx = TMath::Sqrt(gTools().ComputeVariance( sumx2, sumx, nok ) );
1231 Double_t maxacc = minx+sigx;
1232 //
1233 if (nok>0) {
1234 nok = 0;
1235 for (UInt_t itau=0; itau<fGDNTau; itau++) {
1236 if (fGDErrTstOK[itau]) {
1237 if (fGDErrTst[itau] > maxacc) {
1238 fGDErrTstOK[itau] = kFALSE;
1239 }
1240 else {
1241 nok++;
1242 }
1243 }
1244 }
1245 }
1246 fGDNTauTstOK = nok;
1247 Log() << kVERBOSE << "TAU: "
1248 << itaumin << " "
1249 << nok << " "
1250 << minx << " "
1251 << maxx << " "
1252 << sigx << Endl;
1253 //
1254 return itaumin;
1255}
1256
1257////////////////////////////////////////////////////////////////////////////////
1258/// make test gradient vector for all tau
1259/// same algorithm as MakeGradientVector()
1260
1262{
1263 UInt_t neve = fPathIdx1-fPathIdx2+1;
1264 if (neve<1) {
1265 Log() << kFATAL << "<MakeTstGradientVector> Invalid start/end indices!" << Endl;
1266 return;
1267 }
1268 //
1269 Double_t norm = 2.0/fNEveEffPath;
1270 //
1271 const std::vector<const Event *> *events = &(fRuleFit->GetTrainingEvents());
1272
1273 // Clear gradient vectors
1274 for (UInt_t itau=0; itau<fGDNTau; itau++) {
1275 if (fGDErrTstOK[itau]) {
1276 for (UInt_t ir=0; ir<fNRules; ir++) {
1277 fGradVecTst[itau][ir]=0;
1278 }
1279 for (UInt_t il=0; il<fNLinear; il++) {
1280 fGradVecLinTst[itau][il]=0;
1281 }
1282 }
1283 }
1284 //
1285 // Double_t val; // temp store
1286 Double_t sF; // score function value
1287 Double_t r; // eq 35, ref 1
1288 Double_t y; // true score (+1 or -1)
1289 const std::vector<UInt_t> *eventRuleMap=0;
1290 UInt_t rind;
1291 //
1292 // Loop over all events
1293 //
1294 UInt_t nsfok=0;
1295 for (UInt_t i=fPathIdx1; i<fPathIdx2+1; i++) {
1296 const Event *e = (*events)[i];
1297 UInt_t nrules=0;
1298 if (fRuleEnsemble->DoRules()) {
1299 eventRuleMap = &(fRuleEnsemble->GetEventRuleMap(i));
1300 nrules = (*eventRuleMap).size();
1301 }
1302 for (UInt_t itau=0; itau<fGDNTau; itau++) { // loop over tau
1303 // if (itau==0) sF = fRuleEnsemble->EvalEvent( *e, fGDOfsTst[itau], fGDCoefTst[itau], fGDCoefLinTst[itau] );
1304 // else sF = fRuleEnsemble->EvalEvent( fGDOfsTst[itau], fGDCoefTst[itau], fGDCoefLinTst[itau] );
1305 if (fGDErrTstOK[itau]) {
1306 sF = fRuleEnsemble->EvalEvent( i, fGDOfsTst[itau], fGDCoefTst[itau], fGDCoefLinTst[itau] );
1307 if (TMath::Abs(sF)<1.0) {
1308 nsfok++;
1309 r = 0;
1310 y = (fRuleFit->GetMethodRuleFit()->DataInfo().IsSignal(e)?1.0:-1.0);
1311 r = norm*(y - sF) * fRuleFit->GetTrainingEventWeight(i);
1312 // rule gradient vector
1313 for (UInt_t ir=0; ir<nrules; ir++) {
1314 rind = (*eventRuleMap)[ir];
1315 fGradVecTst[itau][rind] += r;
1316 }
1317 // linear terms
1318 for (UInt_t il=0; il<fNLinear; il++) {
1319 fGradVecLinTst[itau][il] += r*fRuleEnsemble->EvalLinEventRaw( il,i, kTRUE );
1320 }
1321 } // if (TMath::Abs(F)<xxx)
1322 }
1323 }
1324 }
1325}
1326
1327////////////////////////////////////////////////////////////////////////////////
1328/// Establish maximum gradient for rules, linear terms and the offset
1329/// for all taus TODO: do not need index range!
1330
1332{
1333 Double_t maxr, maxl, cthresh, val;
1334 // loop over all taus
1335 for (UInt_t itau=0; itau<fGDNTau; itau++) {
1336 if (fGDErrTstOK[itau]) {
1337 // find max gradient
1338 maxr = ( (fNRules>0 ?
1339 TMath::Abs(*(std::max_element( fGradVecTst[itau].begin(), fGradVecTst[itau].end(), AbsValue()))):0) );
1340 maxl = ( (fNLinear>0 ?
1341 TMath::Abs(*(std::max_element( fGradVecLinTst[itau].begin(), fGradVecLinTst[itau].end(), AbsValue()))):0) );
1342
1343 // Use the maximum as a threshold
1344 Double_t maxv = (maxr>maxl ? maxr:maxl);
1345 cthresh = maxv * fGDTauVec[itau];
1346
1347 // Add to offset, if gradient is large enough:
1348 // Loop over the gradient vector and move to next set of coefficients
1349 // size of GradVec (and GradVecLin) should be 0 if learner is disabled
1350 //
1351 // Step-size is divided by 10 when looking for the best path.
1352 //
1353 if (maxv>0) {
1354 const Double_t stepScale=1.0;
1355 for (UInt_t i=0; i<fNRules; i++) {
1356 val = fGradVecTst[itau][i];
1357
1358 if (TMath::Abs(val)>=cthresh) {
1359 fGDCoefTst[itau][i] += fGDPathStep*val*stepScale;
1360 }
1361 }
1362 // Loop over the gradient vector for the linear part and move to next set of coefficients
1363 for (UInt_t i=0; i<fNLinear; i++) {
1364 val = fGradVecLinTst[itau][i];
1365 if (TMath::Abs(val)>=cthresh) {
1366 fGDCoefLinTst[itau][i] += fGDPathStep*val*stepScale/fRuleEnsemble->GetLinNorm(i);
1367 }
1368 }
1369 }
1370 }
1371 }
1372 // set the offset - should be outside the itau loop!
1373 CalcTstAverageResponse();
1374}
1375
1376////////////////////////////////////////////////////////////////////////////////
1377/// make gradient vector
1378
1380{
1381 clock_t t0;
1382 // clock_t t10;
1383 t0 = clock();
1384 //
1385 UInt_t neve = fPathIdx2-fPathIdx1+1;
1386 if (neve<1) {
1387 Log() << kFATAL << "<MakeGradientVector> Invalid start/end indices!" << Endl;
1388 return;
1389 }
1390 //
1391 const Double_t norm = 2.0/fNEveEffPath;
1392 //
1393 const std::vector<const Event *> *events = &(fRuleFit->GetTrainingEvents());
1394
1395 // Clear gradient vectors
1396 for (UInt_t ir=0; ir<fNRules; ir++) {
1397 fGradVec[ir]=0;
1398 }
1399 for (UInt_t il=0; il<fNLinear; il++) {
1400 fGradVecLin[il]=0;
1401 }
1402 //
1403 // Double_t val; // temp store
1404 Double_t sF; // score function value
1405 Double_t r; // eq 35, ref 1
1406 Double_t y; // true score (+1 or -1)
1407 const std::vector<UInt_t> *eventRuleMap=0;
1408 UInt_t rind;
1409 //
1410 gGDInit += Double_t(clock()-t0)/CLOCKS_PER_SEC;
1411
1412 for (UInt_t i=fPathIdx1; i<fPathIdx2+1; i++) {
1413 const Event *e = (*events)[i];
1414
1415 // t0 = clock(); //DEB
1416 sF = fRuleEnsemble->EvalEvent( i ); // should not contain the weight
1417 // gGDEval += Double_t(clock()-t0)/CLOCKS_PER_SEC;
1418 if (TMath::Abs(sF)<1.0) {
1419 UInt_t nrules=0;
1420 if (fRuleEnsemble->DoRules()) {
1421 eventRuleMap = &(fRuleEnsemble->GetEventRuleMap(i));
1422 nrules = (*eventRuleMap).size();
1423 }
1424 y = (fRuleFit->GetMethodRuleFit()->DataInfo().IsSignal(e)?1.0:-1.0);
1425 r = norm*(y - sF) * fRuleFit->GetTrainingEventWeight(i);
1426 // rule gradient vector
1427 for (UInt_t ir=0; ir<nrules; ir++) {
1428 rind = (*eventRuleMap)[ir];
1429 fGradVec[rind] += r;
1430 }
1431 // gGDRuleLoop += Double_t(clock()-t0)/CLOCKS_PER_SEC;
1432 // linear terms
1433 // t0 = clock(); //DEB
1434 for (UInt_t il=0; il<fNLinear; il++) {
1435 fGradVecLin[il] += r*fRuleEnsemble->EvalLinEventRaw( il, i, kTRUE );
1436 }
1437 // gGDLinLoop += Double_t(clock()-t0)/CLOCKS_PER_SEC;
1438 } // if (TMath::Abs(F)<xxx)
1439 }
1440}
1441
1442////////////////////////////////////////////////////////////////////////////////
1443/// Establish maximum gradient for rules, linear terms and the offset
1444
1446{
1447 Double_t maxr = ( (fRuleEnsemble->DoRules() ?
1448 TMath::Abs(*(std::max_element( fGradVec.begin(), fGradVec.end(), AbsValue()))):0) );
1449 Double_t maxl = ( (fRuleEnsemble->DoLinear() ?
1450 TMath::Abs(*(std::max_element( fGradVecLin.begin(), fGradVecLin.end(), AbsValue()))):0) );
1451 // Use the maximum as a threshold
1452 Double_t maxv = (maxr>maxl ? maxr:maxl);
1453 Double_t cthresh = maxv * fGDTau;
1454
1455 Double_t useRThresh;
1456 Double_t useLThresh;
1457 //
1458 // Choose thresholds.
1459 //
1460 useRThresh = cthresh;
1461 useLThresh = cthresh;
1462
1463 Double_t gval, lval, coef, lcoef;
1464
1465 // Add to offset, if gradient is large enough:
1466 // Loop over the gradient vector and move to next set of coefficients
1467 // size of GradVec (and GradVecLin) should be 0 if learner is disabled
1468 if (maxv>0) {
1469 for (UInt_t i=0; i<fGradVec.size(); i++) {
1470 gval = fGradVec[i];
1471 if (TMath::Abs(gval)>=useRThresh) {
1472 coef = fRuleEnsemble->GetRulesConst(i)->GetCoefficient() + fGDPathStep*gval;
1473 fRuleEnsemble->GetRules(i)->SetCoefficient(coef);
1474 }
1475 }
1476
1477 // Loop over the gradient vector for the linear part and move to next set of coefficients
1478 for (UInt_t i=0; i<fGradVecLin.size(); i++) {
1479 lval = fGradVecLin[i];
1480 if (TMath::Abs(lval)>=useLThresh) {
1481 lcoef = fRuleEnsemble->GetLinCoefficients(i) + (fGDPathStep*lval/fRuleEnsemble->GetLinNorm(i));
1482 fRuleEnsemble->SetLinCoefficient(i,lcoef);
1483 }
1484 }
1485 // Set the offset
1486 Double_t offset = CalcAverageResponse();
1487 fRuleEnsemble->SetOffset( offset );
1488 }
1489}
1490
1491////////////////////////////////////////////////////////////////////////////////
1492/// calc average response for all test paths - TODO: see comment under CalcAverageResponse()
1493/// note that 0 offset is used
1494
1496{
1497 for (UInt_t itau=0; itau<fGDNTau; itau++) {
1498 if (fGDErrTstOK[itau]) {
1499 fGDOfsTst[itau] = 0;
1500 for (UInt_t s=0; s<fNLinear; s++) {
1501 fGDOfsTst[itau] -= fGDCoefLinTst[itau][s] * fAverageSelectorPath[s];
1502 }
1503 for (UInt_t r=0; r<fNRules; r++) {
1504 fGDOfsTst[itau] -= fGDCoefTst[itau][r] * fAverageRulePath[r];
1505 }
1506 }
1507 }
1508 //
1509}
1510
1511////////////////////////////////////////////////////////////////////////////////
1512/// calculate the average response - TODO : rewrite bad dependancy on EvaluateAverage() !
1513///
1514/// note that 0 offset is used
1515
1517{
1518 Double_t ofs = 0;
1519 for (UInt_t s=0; s<fNLinear; s++) {
1520 ofs -= fRuleEnsemble->GetLinCoefficients(s) * fAverageSelectorPath[s];
1521 }
1522 for (UInt_t r=0; r<fNRules; r++) {
1523 ofs -= fRuleEnsemble->GetRules(r)->GetCoefficient() * fAverageRulePath[r];
1524 }
1525 return ofs;
1526}
1527
1528////////////////////////////////////////////////////////////////////////////////
1529/// calculate the average truth
1530
1532{
1533 if (fPathIdx2<=fPathIdx1) {
1534 Log() << kFATAL << "<CalcAverageTruth> Invalid start/end indices!" << Endl;
1535 return 0;
1536 }
1537 Double_t sum=0;
1538 Double_t ensig=0;
1539 Double_t enbkg=0;
1540 const std::vector<const Event *> *events = &(fRuleFit->GetTrainingEvents());
1541 for (UInt_t i=fPathIdx1; i<fPathIdx2+1; i++) {
1542 Double_t ew = fRuleFit->GetTrainingEventWeight(i);
1543 if (fRuleFit->GetMethodRuleFit()->DataInfo().IsSignal((*events)[i])) ensig += ew;
1544 else enbkg += ew;
1545 sum += ew*(fRuleFit->GetMethodRuleFit()->DataInfo().IsSignal((*events)[i])?1.0:-1.0);
1546 }
1547 Log() << kVERBOSE << "Effective number of signal / background = " << ensig << " / " << enbkg << Endl;
1548
1549 return sum/fNEveEffPath;
1550}
1551
1552////////////////////////////////////////////////////////////////////////////////
1553
1555 return (fRuleFit->GetMethodRuleFit()->DataInfo().IsSignal(e) ? 1:-1);
1556}
1557
1558////////////////////////////////////////////////////////////////////////////////
1559
1561 fLogger->SetMinType(t);
1562}
#define f(i)
Definition: RSha256.hxx:104
#define h(i)
Definition: RSha256.hxx:106
#define e(i)
Definition: RSha256.hxx:103
bool Bool_t
Definition: RtypesCore.h:63
int Int_t
Definition: RtypesCore.h:45
const Bool_t kFALSE
Definition: RtypesCore.h:101
unsigned int UInt_t
Definition: RtypesCore.h:46
double Double_t
Definition: RtypesCore.h:59
const Bool_t kTRUE
Definition: RtypesCore.h:100
Double_t gGDNorm
Double_t gGDLinLoop
Double_t gGDInit
Double_t gGDEval
Bool_t gFIRSTTST
Double_t gGDPtr
Double_t gGDEvalRule
Bool_t gFIRSTORG
Double_t gGDRuleLoop
Option_t Option_t TPoint TPoint const char GetTextMagnitude GetFillStyle GetLineColor GetLineWidth GetMarkerStyle GetTextAlign GetTextColor GetTextSize void char Point_t Rectangle_t WindowAttributes_t Float_t Float_t Float_t Int_t Int_t UInt_t UInt_t Rectangle_t Int_t Int_t Window_t TString Int_t GCValues_t GetPrimarySelectionOwner GetDisplay GetScreen GetColormap GetNativeEvent const char const char dpyName wid window const char font_name cursor keysym reg const char only_if_exist regb h Point_t winding char text const char depth char const char Int_t count const char ColorStruct_t color const char Pixmap_t Pixmap_t PictureAttributes_t attr const char char ret_data h unsigned char height h Atom_t Int_t ULong_t ULong_t unsigned char prop_list Atom_t sel
Option_t Option_t TPoint TPoint const char GetTextMagnitude GetFillStyle GetLineColor GetLineWidth GetMarkerStyle GetTextAlign GetTextColor GetTextSize void char Point_t Rectangle_t WindowAttributes_t Float_t Float_t Float_t Int_t Int_t UInt_t UInt_t Rectangle_t Int_t Int_t Window_t TString Int_t GCValues_t GetPrimarySelectionOwner GetDisplay GetScreen GetColormap GetNativeEvent const char const char dpyName wid window const char font_name cursor keysym reg const char only_if_exist regb h Point_t winding char text const char depth char const char Int_t count const char ColorStruct_t color const char Pixmap_t Pixmap_t PictureAttributes_t attr const char char ret_data h unsigned char height h offset
Option_t Option_t TPoint TPoint const char GetTextMagnitude GetFillStyle GetLineColor GetLineWidth GetMarkerStyle GetTextAlign GetTextColor GetTextSize void char Point_t Rectangle_t WindowAttributes_t Float_t Float_t Float_t Int_t Int_t UInt_t UInt_t Rectangle_t Int_t Int_t Window_t TString Int_t GCValues_t GetPrimarySelectionOwner GetDisplay GetScreen GetColormap GetNativeEvent const char const char dpyName wid window const char font_name cursor keysym reg const char only_if_exist regb h Point_t np
Option_t Option_t TPoint TPoint const char GetTextMagnitude GetFillStyle GetLineColor GetLineWidth GetMarkerStyle GetTextAlign GetTextColor GetTextSize void char Point_t Rectangle_t WindowAttributes_t Float_t r
Option_t Option_t TPoint TPoint const char GetTextMagnitude GetFillStyle GetLineColor GetLineWidth GetMarkerStyle GetTextAlign GetTextColor GetTextSize void char Point_t Rectangle_t WindowAttributes_t Float_t Float_t Float_t Int_t Int_t UInt_t UInt_t Rectangle_t Int_t Int_t Window_t TString Int_t GCValues_t gval
char * Form(const char *fmt,...)
Formats a string in a circular formatting buffer.
Definition: TString.cxx:2447
ostringstream derivative to redirect and format output
Definition: MsgLogger.h:57
void CalcTstAverageResponse()
calc average response for all test paths - TODO: see comment under CalcAverageResponse() note that 0 ...
void MakeGDPath()
The following finds the gradient directed path in parameter space.
void EvaluateAverage(UInt_t ind1, UInt_t ind2, std::vector< Double_t > &avsel, std::vector< Double_t > &avrul)
evaluate the average of each variable and f(x) in the given range
UInt_t RiskPerfTst()
Estimates the error rate with the current set of parameters.
Double_t Risk(UInt_t ind1, UInt_t ind2, Double_t neff) const
risk assessment
Double_t Optimism()
implementation of eq.
Int_t FindGDTau()
This finds the cutoff parameter tau by scanning several different paths.
virtual ~RuleFitParams()
destructor
RuleFitParams()
constructor
void Init()
Initializes all parameters using the RuleEnsemble and the training tree.
Double_t CalcAverageResponse()
calculate the average response - TODO : rewrite bad dependancy on EvaluateAverage() !
void SetMsgType(EMsgType t)
Double_t Penalty() const
This is the "lasso" penalty To be used for regression.
void FillCoefficients()
helper function to store the rule coefficients in local arrays
Double_t LossFunction(const Event &e) const
Implementation of squared-error ramp loss function (eq 39,40 in ref 1) This is used for binary Classi...
void InitGD()
Initialize GD path search.
Int_t Type(const Event *e) const
void ErrorRateRocTst()
Estimates the error rate with the current set of parameters.
void CalcFStar()
Estimates F* (optimum scoring function) for all events for the given sets.
void MakeTstGradientVector()
make test gradient vector for all tau same algorithm as MakeGradientVector()
Double_t CalcAverageTruth()
calculate the average truth
void UpdateTstCoefficients()
Establish maximum gradient for rules, linear terms and the offset for all taus TODO: do not need inde...
void MakeGradientVector()
make gradient vector
void UpdateCoefficients()
Establish maximum gradient for rules, linear terms and the offset.
void InitNtuple()
initializes the ntuple
Double_t ErrorRateBin()
Estimates the error rate with the current set of parameters It uses a binary estimate of (y-F*(x)) (y...
Double_t ErrorRateReg()
Estimates the error rate with the current set of parameters This code is pretty messy at the moment.
Double_t ErrorRateRoc()
Estimates the error rate with the current set of parameters.
Double_t ErrorRateRocRaw(std::vector< Double_t > &sFsig, std::vector< Double_t > &sFbkg)
Estimates the error rate with the current set of parameters.
Timing information for training and evaluation of MVA methods.
Definition: Timer.h:58
TString GetElapsedTime(Bool_t Scientific=kTRUE)
returns pretty string with elapsed time
Definition: Timer.cxx:146
void DrawProgressBar(Int_t, const TString &comment="")
draws progress bar in color or B&W caution:
Definition: Timer.cxx:202
EMsgType
Definition: Types.h:55
@ kDEBUG
Definition: Types.h:56
@ kVERBOSE
Definition: Types.h:57
@ kERROR
Definition: Types.h:60
@ kINFO
Definition: Types.h:58
@ kWARNING
Definition: Types.h:59
@ kFATAL
Definition: Types.h:61
virtual Int_t Write(const char *name=0, Int_t option=0, Int_t bufsize=0)
Write this object to the current directory.
Definition: TObject.cxx:798
A TTree represents a columnar dataset.
Definition: TTree.h:79
Double_t y[n]
Definition: legend1.C:17
static constexpr double s
Tools & gTools()
MsgLogger & Endl(MsgLogger &ml)
Definition: MsgLogger.h:148
Bool_t IsNaN(Double_t x)
Definition: TMath.h:889
Short_t Max(Short_t a, Short_t b)
Returns the largest of a and b.
Definition: TMathBase.h:250
Double_t Log(Double_t x)
Returns the natural logarithm of x.
Definition: TMath.h:753
Double_t Sqrt(Double_t x)
Returns the square root of x.
Definition: TMath.h:659
Short_t Min(Short_t a, Short_t b)
Returns the smallest of a and b.
Definition: TMathBase.h:198
Short_t Abs(Short_t d)
Returns the absolute value of parameter Short_t d.
Definition: TMathBase.h:123
#define rprev(otri1, otri2)
Definition: triangle.c:1025