Logo ROOT   6.18/05
Reference Guide
RuleFitParams.cxx
Go to the documentation of this file.
1// @(#)root/tmva $Id$
2// Author: Andreas Hoecker, Joerg Stelzer, Fredrik Tegenfeldt, Helge Voss
3
4/**********************************************************************************
5 * Project: TMVA - a Root-integrated toolkit for multivariate data analysis *
6 * Package: TMVA *
7 * Class : RuleFitParams *
8 * Web : http://tmva.sourceforge.net *
9 * *
10 * Description: *
11 * Implementation *
12 * *
13 * Authors (alphabetical): *
14 * Fredrik Tegenfeldt <Fredrik.Tegenfeldt@cern.ch> - Iowa State U., USA *
15 * Helge Voss <Helge.Voss@cern.ch> - MPI-KP Heidelberg, Ger. *
16 * *
17 * Copyright (c) 2005: *
18 * CERN, Switzerland *
19 * Iowa State U. *
20 * MPI-K Heidelberg, Germany *
21 * *
22 * Redistribution and use in source and binary forms, with or without *
23 * modification, are permitted according to the terms listed in LICENSE *
24 * (http://tmva.sourceforge.net/LICENSE) *
25 **********************************************************************************/
26
27/*! \class TMVA::RuleFitParams
28\ingroup TMVA
29A class doing the actual fitting of a linear model using rules as base functions.
30*/
31#include "TMVA/RuleFitParams.h"
32
33#include "TMVA/DataSetInfo.h"
34#include "TMVA/MethodRuleFit.h"
35#include "TMVA/MsgLogger.h"
36#include "TMVA/RuleEnsemble.h"
37#include "TMVA/RuleFit.h"
38#include "TMVA/Timer.h"
39#include "TMVA/Tools.h"
40#include "TMVA/Types.h"
41
42#include "TTree.h"
43#include "TMath.h"
44
45#include <iostream>
46#include <iomanip>
47#include <numeric>
48#include <algorithm>
49#include <functional>
50
53
61
62////////////////////////////////////////////////////////////////////////////////
63/// constructor
64
66 : fRuleFit ( 0 )
67 , fRuleEnsemble ( 0 )
68 , fNRules ( 0 )
69 , fNLinear ( 0 )
70 , fPathIdx1 ( 0 )
71 , fPathIdx2 ( 0 )
72 , fPerfIdx1 ( 0 )
73 , fPerfIdx2 ( 0 )
74 , fGDNTauTstOK( 0 )
75 , fGDNTau ( 51 )
76 , fGDTauPrec ( 0.02 )
77 , fGDTauScan ( 1000 )
78 , fGDTauMin ( 0.0 )
79 , fGDTauMax ( 1.0 )
80 , fGDTau ( -1.0 )
81 , fGDPathStep ( 0.01 )
82 , fGDNPathSteps ( 1000 )
83 , fGDErrScale ( 1.1 )
84 , fAverageTruth( 0 )
85 , fFstarMedian ( 0 )
86 , fGDNtuple ( 0 )
87 , fNTRisk ( 0 )
88 , fNTErrorRate ( 0 )
89 , fNTNuval ( 0 )
90 , fNTCoefRad ( 0 )
91 , fNTOffset ( 0 )
92 , fNTCoeff ( 0 )
93 , fNTLinCoeff ( 0 )
94 , fsigave( 0 )
95 , fsigrms( 0 )
96 , fbkgave( 0 )
97 , fbkgrms( 0 )
98 , fLogger( new MsgLogger("RuleFit") )
99{
100 Init();
101}
102////////////////////////////////////////////////////////////////////////////////
103/// destructor
104
106{
107 if (fNTCoeff) { delete fNTCoeff; fNTCoeff = 0; }
108 if (fNTLinCoeff) { delete fNTLinCoeff;fNTLinCoeff = 0; }
109 delete fLogger;
110}
111
112////////////////////////////////////////////////////////////////////////////////
113/// Initializes all parameters using the RuleEnsemble and the training tree
114
116{
117 if (fRuleFit==0) return;
118 if (fRuleFit->GetMethodRuleFit()==0) {
119 Log() << kFATAL << "RuleFitParams::Init() - MethodRuleFit ptr is null" << Endl;
120 }
121 UInt_t neve = fRuleFit->GetTrainingEvents().size();
122 //
123 fRuleEnsemble = fRuleFit->GetRuleEnsemblePtr();
124 fNRules = fRuleEnsemble->GetNRules();
125 fNLinear = fRuleEnsemble->GetNLinear();
126
127 //
128 // Fraction of events used for validation should be close of unity..
129 // Always selection from the END
130 //
131 UInt_t ofs;
132 fPerfIdx1 = 0;
133 if (neve>1) {
134 fPerfIdx2 = static_cast<UInt_t>((neve-1)*fRuleFit->GetMethodRuleFit()->GetGDValidEveFrac());
135 }
136 else {
137 fPerfIdx2 = 0;
138 }
139 ofs = neve - fPerfIdx2 - 1;
140 fPerfIdx1 += ofs;
141 fPerfIdx2 += ofs;
142 //
143 // Fraction of events used for the path search can be allowed to be a smaller value, say 0.5
144 // Alwas select events from the BEGINNING.
145 // This means that the validation and search samples will not overlap if both fractions are <0.5.
146 //
147 fPathIdx1 = 0;
148 if (neve>1) {
149 fPathIdx2 = static_cast<UInt_t>((neve-1)*fRuleFit->GetMethodRuleFit()->GetGDPathEveFrac());
150 }
151 else {
152 fPathIdx2 = 0;
153 }
154 //
155 // summarize weights
156 //
157 fNEveEffPath = 0;;
158 for (UInt_t ie=fPathIdx1; ie<fPathIdx2+1; ie++) {
159 fNEveEffPath += fRuleFit->GetTrainingEventWeight(ie);
160 }
161
162 fNEveEffPerf=0;
163 for (UInt_t ie=fPerfIdx1; ie<fPerfIdx2+1; ie++) {
164 fNEveEffPerf += fRuleFit->GetTrainingEventWeight(ie);
165 }
166 //
167 Log() << kVERBOSE << "Path constr. - event index range = [ " << fPathIdx1 << ", " << fPathIdx2 << " ]"
168 << ", effective N(events) = " << fNEveEffPath << Endl;
169 Log() << kVERBOSE << "Error estim. - event index range = [ " << fPerfIdx1 << ", " << fPerfIdx2 << " ]"
170 << ", effective N(events) = " << fNEveEffPerf << Endl;
171 //
172 if (fRuleEnsemble->DoRules())
173 Log() << kDEBUG << "Number of rules in ensemble: " << fNRules << Endl;
174 else
175 Log() << kDEBUG << "Rules are disabled " << Endl;
176
177 if (fRuleEnsemble->DoLinear())
178 Log() << kDEBUG << "Number of linear terms: " << fNLinear << Endl;
179 else
180 Log() << kDEBUG << "Linear terms are disabled " << Endl;
181}
182
183////////////////////////////////////////////////////////////////////////////////
184/// initializes the ntuple
185
187{
188 fGDNtuple= new TTree("MonitorNtuple_RuleFitParams","RuleFit path search");
189 fGDNtuple->Branch("risk", &fNTRisk, "risk/D");
190 fGDNtuple->Branch("error", &fNTErrorRate,"error/D");
191 fGDNtuple->Branch("nuval", &fNTNuval, "nuval/D");
192 fGDNtuple->Branch("coefrad", &fNTCoefRad, "coefrad/D");
193 fGDNtuple->Branch("offset", &fNTOffset, "offset/D");
194 //
195 fNTCoeff = (fNRules >0 ? new Double_t[fNRules] : 0);
196 fNTLinCoeff = (fNLinear>0 ? new Double_t[fNLinear] : 0);
197
198 for (UInt_t i=0; i<fNRules; i++) {
199 fGDNtuple->Branch(Form("a%d",i+1),&fNTCoeff[i],Form("a%d/D",i+1));
200 }
201 for (UInt_t i=0; i<fNLinear; i++) {
202 fGDNtuple->Branch(Form("b%d",i+1),&fNTLinCoeff[i],Form("b%d/D",i+1));
203 }
204}
205
206////////////////////////////////////////////////////////////////////////////////
207/// evaluate the average of each variable and f(x) in the given range
208
210 std::vector<Double_t> &avsel,
211 std::vector<Double_t> &avrul )
212{
213 UInt_t neve = ind2-ind1+1;
214 if (neve<1) {
215 Log() << kFATAL << "<EvaluateAverage> - no events selected for path search -> BUG!" << Endl;
216 }
217 //
218 avsel.clear();
219 avrul.clear();
220 //
221 if (fNLinear>0) avsel.resize(fNLinear,0);
222 if (fNRules>0) avrul.resize(fNRules,0);
223 const std::vector<UInt_t> *eventRuleMap=0;
224 Double_t ew;
225 Double_t sumew=0;
226 //
227 // Loop over events and calculate average of linear terms (normalised) and rule response.
228 //
229 if (fRuleEnsemble->IsRuleMapOK()) { // MakeRuleMap() has been called
230 for ( UInt_t i=ind1; i<ind2+1; i++) {
231 ew = fRuleFit->GetTrainingEventWeight(i);
232 sumew += ew;
233 for ( UInt_t sel=0; sel<fNLinear; sel++ ) {
234 avsel[sel] += ew*fRuleEnsemble->EvalLinEvent(i,sel);
235 }
236 // loop over rules
237 UInt_t nrules=0;
238 if (fRuleEnsemble->DoRules()) {
239 eventRuleMap = &(fRuleEnsemble->GetEventRuleMap(i));
240 nrules = (*eventRuleMap).size();
241 }
242 for (UInt_t r=0; r<nrules; r++) {
243 avrul[(*eventRuleMap)[r]] += ew;
244 }
245 }
246 }
247 else { // MakeRuleMap() has not yet been called
248 const std::vector<const Event *> *events = &(fRuleFit->GetTrainingEvents());
249 for ( UInt_t i=ind1; i<ind2+1; i++) {
250 ew = fRuleFit->GetTrainingEventWeight(i);
251 sumew += ew;
252 // first cache rule/lin response
253 /* Double_t val = */ fRuleEnsemble->EvalLinEvent(*((*events)[i]));
254 /* val = */ fRuleEnsemble->EvalEvent(*((*events)[i]));
255 // loop over linear terms
256 for ( UInt_t sel=0; sel<fNLinear; sel++ ) {
257 avsel[sel] += ew*fRuleEnsemble->GetEventLinearValNorm(sel);
258 }
259 // loop over rules
260 for (UInt_t r=0; r<fNRules; r++) {
261 avrul[r] += ew*fRuleEnsemble->GetEventRuleVal(r);
262 }
263 }
264 }
265 // average variable
266 for ( UInt_t sel=0; sel<fNLinear; sel++ ) {
267 avsel[sel] = avsel[sel] / sumew;
268 }
269 // average rule response, excl coeff
270 for (UInt_t r=0; r<fNRules; r++) {
271 avrul[r] = avrul[r] / sumew;
272 }
273}
274
275////////////////////////////////////////////////////////////////////////////////
276/// Implementation of squared-error ramp loss function (eq 39,40 in ref 1)
277/// This is used for binary Classifications where y = {+1,-1} for (sig,bkg)
278
280{
281 Double_t h = TMath::Max( -1.0, TMath::Min(1.0,fRuleEnsemble->EvalEvent( e )) );
282 Double_t diff = (fRuleFit->GetMethodRuleFit()->DataInfo().IsSignal(&e)?1:-1) - h;
283 //
284 return diff*diff*e.GetWeight();
285}
286
287////////////////////////////////////////////////////////////////////////////////
288/// Implementation of squared-error ramp loss function (eq 39,40 in ref 1)
289/// This is used for binary Classifications where y = {+1,-1} for (sig,bkg)
290
292{
293 Double_t h = TMath::Max( -1.0, TMath::Min(1.0,fRuleEnsemble->EvalEvent( evtidx )) );
294 Double_t diff = (fRuleFit->GetMethodRuleFit()->DataInfo().IsSignal(fRuleEnsemble->GetRuleMapEvent( evtidx ))?1:-1) - h;
295 //
296 return diff*diff*fRuleFit->GetTrainingEventWeight(evtidx);
297}
298
299////////////////////////////////////////////////////////////////////////////////
300/// Implementation of squared-error ramp loss function (eq 39,40 in ref 1)
301/// This is used for binary Classifications where y = {+1,-1} for (sig,bkg)
302
304{
305 Double_t e = fRuleEnsemble->EvalEvent( evtidx , fGDOfsTst[itau], fGDCoefTst[itau], fGDCoefLinTst[itau]);
306 Double_t h = TMath::Max( -1.0, TMath::Min(1.0,e) );
307 Double_t diff = (fRuleFit->GetMethodRuleFit()->DataInfo().IsSignal(fRuleEnsemble->GetRuleMapEvent( evtidx ))?1:-1) - h;
308 //
309 return diff*diff*fRuleFit->GetTrainingEventWeight(evtidx);
310}
311
312////////////////////////////////////////////////////////////////////////////////
313/// risk assessment
314
316{
317 UInt_t neve = ind2-ind1+1;
318 if (neve<1) {
319 Log() << kFATAL << "<Risk> Invalid start/end indices! BUG!!!" << Endl;
320 }
321 Double_t rval=0;
322 //
323 // const std::vector<Event *> *events = &(fRuleFit->GetTrainingEvents());
324 for ( UInt_t i=ind1; i<ind2+1; i++) {
325 rval += LossFunction(i);
326 }
327 rval = rval/neff;
328
329 return rval;
330}
331
332////////////////////////////////////////////////////////////////////////////////
333/// risk assessment for tau model <itau>
334
336{
337 UInt_t neve = ind2-ind1+1;
338 if (neve<1) {
339 Log() << kFATAL << "<Risk> Invalid start/end indices! BUG!!!" << Endl;
340 }
341 Double_t rval=0;
342 //
343 // const std::vector<Event *> *events = &(fRuleFit->GetTrainingEvents());
344 for ( UInt_t i=ind1; i<ind2+1; i++) {
345 rval += LossFunction(i,itau);
346 }
347 rval = rval/neff;
348
349 return rval;
350}
351
352////////////////////////////////////////////////////////////////////////////////
353/// This is the "lasso" penalty
354/// To be used for regression.
355/// --- NOT USED ---
356
358{
359 Log() << kWARNING << "<Penalty> Using unverified code! Check!" << Endl;
360 Double_t rval=0;
361 const std::vector<Double_t> *lincoeff = & (fRuleEnsemble->GetLinCoefficients());
362 for (UInt_t i=0; i<fNRules; i++) {
363 rval += TMath::Abs(fRuleEnsemble->GetRules(i)->GetCoefficient());
364 }
365 for (UInt_t i=0; i<fNLinear; i++) {
366 rval += TMath::Abs((*lincoeff)[i]);
367 }
368 return rval;
369}
370
371////////////////////////////////////////////////////////////////////////////////
372/// Initialize GD path search
373
375{
376 if (fGDNTau<2) {
377 fGDNTau = 1;
378 fGDTauScan = 0;
379 }
380 if (fGDTau<0.0) {
381 // fGDNTau = 50; already set in MethodRuleFit
382 fGDTauScan = 1000;
383 fGDTauMin = 0.0;
384 fGDTauMax = 1.0;
385 }
386 else {
387 fGDNTau = 1;
388 fGDTauScan = 0;
389 }
390 // set all taus
391 fGDTauVec.clear();
392 fGDTauVec.resize( fGDNTau );
393 if (fGDNTau==1) {
394 fGDTauVec[0] = fGDTau;
395 }
396 else {
397 // set tau vector - TODO: make a smarter choice of range in tau
398 Double_t dtau = (fGDTauMax - fGDTauMin)/static_cast<Double_t>(fGDNTau-1);
399 for (UInt_t itau=0; itau<fGDNTau; itau++) {
400 fGDTauVec[itau] = static_cast<Double_t>(itau)*dtau + fGDTauMin;
401 if (fGDTauVec[itau]>1.0) fGDTauVec[itau]=1.0;
402 }
403 }
404 // initialize path search vectors
405
406 fGradVec.clear();
407 fGradVecLin.clear();
408 fGradVecTst.clear();
409 fGradVecLinTst.clear();
410 fGDErrTst.clear();
411 fGDErrTstOK.clear();
412 fGDOfsTst.clear();
413 fGDCoefTst.clear();
414 fGDCoefLinTst.clear();
415 //
416 // rules
417 //
418 fGDCoefTst.resize(fGDNTau);
419 fGradVec.resize(fNRules,0);
420 fGradVecTst.resize(fGDNTau);
421 for (UInt_t i=0; i<fGDNTau; i++) {
422 fGradVecTst[i].resize(fNRules,0);
423 fGDCoefTst[i].resize(fNRules,0);
424 }
425 //
426 // linear terms
427 //
428 fGDCoefLinTst.resize(fGDNTau);
429 fGradVecLin.resize(fNLinear,0);
430 fGradVecLinTst.resize(fGDNTau);
431 for (UInt_t i=0; i<fGDNTau; i++) {
432 fGradVecLinTst[i].resize(fNLinear,0);
433 fGDCoefLinTst[i].resize(fNLinear,0);
434 }
435 //
436 // error, coefs etc
437 //
438 fGDErrTst.resize(fGDNTau,0);
439 fGDErrTstOK.resize(fGDNTau,kTRUE);
440 fGDOfsTst.resize(fGDNTau,0);
441 fGDNTauTstOK = fGDNTau;
442 //
443 // calculate average selectors and rule responses for the path sample size
444 //
445}
446
447////////////////////////////////////////////////////////////////////////////////
448/// This finds the cutoff parameter tau by scanning several different paths
449
451{
452 if (fGDNTau<2) return 0;
453 if (fGDTauScan==0) return 0;
454
455 if (fGDOfsTst.size()<1)
456 Log() << kFATAL << "BUG! FindGDTau() has been called BEFORE InitGD()." << Endl;
457 //
458 Log() << kINFO << "Estimating the cutoff parameter tau. The estimated time is a pessimistic maximum." << Endl;
459 //
460 // Find how many points to scan and how often to calculate the error
461 UInt_t nscan = fGDTauScan; //std::min(static_cast<Int_t>(fGDTauScan),fGDNPathSteps);
462 UInt_t netst = std::min(nscan,UInt_t(100));
463 UInt_t nscanned=0;
464 //
465 //--------------------
466 // loop over the paths
467 //--------------------
468 // The number of MAXIMUM loops is given by nscan.
469 // At each loop, the paths being far away from the minimum
470 // are rejected. Hence at each check (every netst events), the number
471 // of paths searched will be reduced.
472 // The maximum 'distance' from the minimum error rate is
473 // 1 sigma. See RiskPerfTst() for details.
474 //
475 Bool_t doloop=kTRUE;
476 UInt_t ip=0;
477 UInt_t itauMin=0;
478 Timer timer( nscan, "RuleFit" );
479 while (doloop) {
480 // make gradvec
481 MakeTstGradientVector();
482 // update coefs
483 UpdateTstCoefficients();
484 // estimate error and do the sum
485 // do this at index=0, netst-1, 2*netst-1 ...
486 nscanned++;
487 if ( (ip==0) || ((ip+1)%netst==0) ) {
488 // ErrorRateRocTst( );
489 itauMin = RiskPerfTst();
490 Log() << kVERBOSE << Form("%4d",ip+1) << ". tau = " << Form("%4.4f",fGDTauVec[itauMin])
491 << " => error rate = " << fGDErrTst[itauMin] << Endl;
492 }
493 ip++;
494 doloop = ((ip<nscan) && (fGDNTauTstOK>3));
496 if (Log().GetMinType()>kVERBOSE)
497 timer.DrawProgressBar(ip);
498 }
499 //
500 // Set tau and coefs
501 // Downscale tau slightly in order to avoid numerical problems
502 //
503 if (nscanned==0) {
504 Log() << kERROR << "<FindGDTau> number of scanned loops is zero! Should NOT see this message." << Endl;
505 }
506 fGDTau = fGDTauVec[itauMin];
507 fRuleEnsemble->SetCoefficients( fGDCoefTst[itauMin] );
508 fRuleEnsemble->SetLinCoefficients( fGDCoefLinTst[itauMin] );
509 fRuleEnsemble->SetOffset( fGDOfsTst[itauMin] );
510 Log() << kINFO << "Best path found with tau = " << Form("%4.4f",fGDTau)
511 << " after " << timer.GetElapsedTime() << " " << Endl;
512
513 return nscan;
514}
515
516////////////////////////////////////////////////////////////////////////////////
517/// The following finds the gradient directed path in parameter space.
518/// More work is needed... FT, 24/9/2006
519///
520/// The algorithm is currently as follows (if not otherwise stated, the sample
521/// used below is [fPathIdx1,fPathIdx2]):
522///
523/// 1. Set offset to -average(y(true)) and all coefs=0 => average of F(x)==0
524/// 2. FindGDTau() : start scanning using several paths defined by different tau
525/// choose the tau yielding the best path
526/// 3. start the scanning the chosen path
527/// 4. check error rate at a given frequency
528/// data used for check: [fPerfIdx1,fPerfIdx2]
529/// 5. stop when either of the following conditions are fullfilled:
530/// 1. loop index==fGDNPathSteps
531/// 2. error > fGDErrScale*errmin
532/// 3. only in DEBUG mode: risk is not monotonously decreasing
533///
534/// The algorithm will warn if:
535/// 1. the error rate was still decreasing when loop finished -> increase fGDNPathSteps!
536/// 2. minimum was found at an early stage -> decrease fGDPathStep
537/// 3. DEBUG: risk > previous risk -> entered chaotic region (regularization is too small)
538
540{
541 Log() << kINFO << "GD path scan - the scan stops when the max num. of steps is reached or a min is found"
542 << Endl;
543 Log() << kVERBOSE << "Number of events used per path step = " << fPathIdx2-fPathIdx1+1 << Endl;
544 Log() << kVERBOSE << "Number of events used for error estimation = " << fPerfIdx2-fPerfIdx1+1 << Endl;
545
546 // check if debug mode
547 const Bool_t isVerbose = (Log().GetMinType()<=kVERBOSE);
548 const Bool_t isDebug = (Log().GetMinType()<=kDEBUG);
549
550 // init GD parameters and clear coeff vectors
551 InitGD();
552
553 // evaluate average response of rules/linear terms (with event weights)
554 EvaluateAveragePath();
555 EvaluateAveragePerf();
556
557 // initial estimate; all other a(i) are zero
558 Log() << kVERBOSE << "Creating GD path" << Endl;
559 Log() << kVERBOSE << " N(steps) = " << fGDNPathSteps << Endl;
560 Log() << kVERBOSE << " step = " << fGDPathStep << Endl;
561 Log() << kVERBOSE << " N(tau) = " << fGDNTau << Endl;
562 Log() << kVERBOSE << " N(tau steps) = " << fGDTauScan << Endl;
563 Log() << kVERBOSE << " tau range = [ " << fGDTauVec[0] << " , " << fGDTauVec[fGDNTau-1] << " ]" << Endl;
564
565 // init ntuple
566 if (isDebug) InitNtuple();
567
568 // DEBUG: risk scan
569 Int_t nbadrisk=0; // number of points where risk(i+1)>risk(i)
570 Double_t trisk=0; // time per risk evaluation
571 Double_t strisk=0; // total time
572 Double_t rprev=1e32; // previous risk
573
574 // parameters set at point with min error
575 Double_t errmin=1e32; // min error
576 // Double_t riskMin=0; // risk
577 Int_t indMin=-1; // index
578 std::vector<Double_t> coefsMin; // rule coefs
579 std::vector<Double_t> lincoefsMin; // linear coefs
580 Double_t offsetMin; // offset
581
582
583 // DEBUG: timing
584 clock_t t0=0;
585 Double_t tgradvec;
586 Double_t tupgrade;
587 Double_t tperf;
588 Double_t stgradvec=0;
589 Double_t stupgrade=0;
590 Double_t stperf=0;
591
592 // linear regression to estimate slope of error rate evolution
593 const UInt_t npreg=5;
594 std::vector<Double_t> valx;
595 std::vector<Double_t> valy;
596 std::vector<Double_t> valxy;
597
598 // loop related
599 Bool_t docheck; // true if an error rate check is to be done
600 Int_t iloop=0; // loop index
601 Bool_t found=kFALSE; // true if minimum is found
602 Bool_t riskFlat=kFALSE; // DEBUG: flag is true if risk evolution behaves badly
603 Bool_t done = kFALSE; // flag that the scan is done
604
605 // calculate how often to check error rate
606 int imod = fGDNPathSteps/100;
607 if (imod<100) imod = std::min(100,fGDNPathSteps);
608 if (imod>100) imod=100;
609
610 // reset coefficients
611 fAverageTruth = -CalcAverageTruth();
612 offsetMin = fAverageTruth;
613 fRuleEnsemble->SetOffset(offsetMin);
614 fRuleEnsemble->ClearCoefficients(0);
615 fRuleEnsemble->ClearLinCoefficients(0);
616 for (UInt_t i=0; i<fGDOfsTst.size(); i++) {
617 fGDOfsTst[i] = offsetMin;
618 }
619 Log() << kVERBOSE << "Obtained initial offset = " << offsetMin << Endl;
620
621 // find the best tau - returns the number of steps performed in scan
622 Int_t nprescan = FindGDTau();
623
624 //
625 //
626 // calculate F*
627 //
628 // CalcFStar(fPerfIdx1, fPerfIdx2);
629 //
630
631 // set some ntuple values
632 fNTRisk = rprev;
633 fNTCoefRad = -1.0;
634 fNTErrorRate = 0;
635
636 // a local flag indicating for what reason the search was stopped
637 Int_t stopCondition=0;
638
639 Log() << kINFO << "Fitting model..." << Endl;
640 // start loop with timer
641 Timer timer( fGDNPathSteps, "RuleFit" );
642 Log() << kWARNING;
643 while (!done) {
644 // Make gradient vector (eq 44, ref 1)
645 if (isVerbose) t0 = clock();
646 MakeGradientVector();
647 if (isVerbose) {
648 tgradvec = Double_t(clock()-t0)/CLOCKS_PER_SEC;
649 stgradvec += tgradvec;
650 }
651
652 // Calculate the direction in parameter space (eq 25, ref 1) and update coeffs (eq 22, ref 1)
653 if (isVerbose) t0 = clock();
654 UpdateCoefficients();
655 if (isVerbose) {
656 tupgrade = Double_t(clock()-t0)/CLOCKS_PER_SEC;
657 stupgrade += tupgrade;
658 }
659
660 // don't check error rate every loop
661 docheck = ((iloop==0) ||((iloop+1)%imod==0));
662
663 if (docheck) {
664 // draw progressbar only if not debug
665 if (!isVerbose)
666 timer.DrawProgressBar(iloop);
667 fNTNuval = Double_t(iloop)*fGDPathStep;
668 fNTRisk = 0.0;
669
670 // check risk evolution
671
672 if (isDebug) FillCoefficients();
673 fNTCoefRad = fRuleEnsemble->CoefficientRadius();
674
675 // calculate risk
676 t0 = clock();
677 fNTRisk = RiskPath();
678 trisk = Double_t(clock()-t0)/CLOCKS_PER_SEC;
679 strisk += trisk;
680 //
681 // Check for an increase in risk.
682 // Such an increase would imply that the regularization is too small.
683 // Stop the iteration if this happens.
684 //
685 if (fNTRisk>=rprev) {
686 if (fNTRisk>rprev) {
687 nbadrisk++;
688 Log() << "Risk(i+1)>=Risk(i) in path" << Endl;
689 riskFlat=(nbadrisk>3);
690 if (riskFlat) {
691 Log() << kWARNING << "Chaotic behaviour of risk evolution" << Endl;
692 Log() << "--- STOPPING MINIMISATION ---" << Endl;
693 Log() << "This may be OK if minimum is already found" << Endl;
694 }
695 }
696 }
697 rprev = fNTRisk;
698 //
699 // Estimate the error rate using cross validation
700 // Well, not quite full cross validation since we only
701 // use ONE model.
702 //
703 if (isVerbose) t0 = clock();
704 fNTErrorRate = 0;
705
706 // Check error rate
707 Double_t errroc;//= ErrorRateRoc();
708 Double_t riskPerf = RiskPerf();
709 // Double_t optimism = Optimism();
710 //
711 errroc = riskPerf;
712 //
713 fNTErrorRate = errroc;
714 //
715 if (isVerbose) {
716 tperf = Double_t(clock()-t0)/CLOCKS_PER_SEC;
717 stperf +=tperf;
718 }
719 //
720 // Always take the last min.
721 // For each step the risk is reduced.
722 //
723 if (fNTErrorRate<=errmin) {
724 errmin = fNTErrorRate;
725 // riskMin = fNTRisk;
726 indMin = iloop;
727 fRuleEnsemble->GetCoefficients(coefsMin);
728 lincoefsMin = fRuleEnsemble->GetLinCoefficients();
729 offsetMin = fRuleEnsemble->GetOffset();
730 }
731 if ( fNTErrorRate > fGDErrScale*errmin) found = kTRUE;
732 //
733 // check slope of last couple of points
734 //
735 if (valx.size()==npreg) {
736 valx.erase(valx.begin());
737 valy.erase(valy.begin());
738 valxy.erase(valxy.begin());
739 }
740 valx.push_back(fNTNuval);
741 valy.push_back(fNTErrorRate);
742 valxy.push_back(fNTErrorRate*fNTNuval);
743
745
746 //
747 if (isDebug) fGDNtuple->Fill();
748 if (isVerbose) {
749 Log() << kVERBOSE << "ParamsIRE : "
750 << std::setw(10)
751 << Form("%8d",iloop+1) << " "
752 << Form("%4.4f",fNTRisk) << " "
753 << Form("%4.4f",riskPerf) << " "
754 << Form("%4.4f",fNTRisk+riskPerf) << " "
755 // << Form("%4.4f",fsigave+fbkgave) << " "
756 // << Form("%4.4f",fsigave) << " "
757 // << Form("%4.4f",fsigrms) << " "
758 // << Form("%4.4f",fbkgave) << " "
759 // << Form("%4.4f",fbkgrms) << " "
760
761 // << Form("%4.4f",fRuleEnsemble->CoefficientRadius())
762 << Endl;
763 }
764 }
765 iloop++;
766 // Stop iteration under various conditions
767 // * The condition R(i+1)<R(i) is no longer true (when then implicit regularization is too weak)
768 // * If the current error estimate is > factor*errmin (factor = 1.1)
769 // * We have reach the last step...
770 Bool_t endOfLoop = (iloop==fGDNPathSteps);
771 if ( ((riskFlat) || (endOfLoop)) && (!found) ) {
772 if (riskFlat) {
773 stopCondition = 1;
774 }
775 else if (endOfLoop) {
776 stopCondition = 2;
777 }
778 if (indMin<0) {
779 Log() << kWARNING << "BUG TRAP: should not be here - still, this bug is harmless;)" << Endl;
780 errmin = fNTErrorRate;
781 // riskMin = fNTRisk;
782 indMin = iloop;
783 fRuleEnsemble->GetCoefficients(coefsMin);
784 lincoefsMin = fRuleEnsemble->GetLinCoefficients();
785 offsetMin = fRuleEnsemble->GetOffset();
786 }
787 found = kTRUE;
788 }
789 done = (found);
790 }
791 Log() << Endl;
792 Log() << kINFO << "Minimisation elapsed time : " << timer.GetElapsedTime() << " " << Endl;
793 Log() << kINFO << "----------------------------------------------------------------" << Endl;
794 Log() << kINFO << "Found minimum at step " << indMin+1 << " with error = " << errmin << Endl;
795 Log() << kINFO << "Reason for ending loop: ";
796 switch (stopCondition) {
797 case 0:
798 Log() << kINFO << "clear minima found";
799 break;
800 case 1:
801 Log() << kINFO << "chaotic behaviour of risk";
802 break;
803 case 2:
804 Log() << kINFO << "end of loop reached";
805 break;
806 default:
807 Log() << kINFO << "unknown!";
808 break;
809 }
810 Log() << Endl;
811 Log() << kINFO << "----------------------------------------------------------------" << Endl;
812
813 // check if early minima - might be an indication of too large stepsize
814 if ( Double_t(indMin)/Double_t(nprescan+fGDNPathSteps) < 0.05 ) {
815 Log() << kWARNING << "Reached minimum early in the search" << Endl;
816 Log() << "Check results and maybe decrease GDStep size" << Endl;
817 }
818 //
819 // quick check of the sign of the slope for the last npreg points
820 //
821 Double_t sumx = std::accumulate( valx.begin(), valx.end(), Double_t() );
822 Double_t sumxy = std::accumulate( valxy.begin(), valxy.end(), Double_t() );
823 Double_t sumy = std::accumulate( valy.begin(), valy.end(), Double_t() );
824 Double_t slope = Double_t(valx.size())*sumxy - sumx*sumy;
825 if (slope<0) {
826 Log() << kINFO << "The error rate was still decreasing at the end of the path" << Endl;
827 Log() << kINFO << "Increase number of steps (GDNSteps)." << Endl;
828 }
829 //
830 // set coefficients
831 //
832 if (found) {
833 fRuleEnsemble->SetCoefficients( coefsMin );
834 fRuleEnsemble->SetLinCoefficients( lincoefsMin );
835 fRuleEnsemble->SetOffset( offsetMin );
836 }
837 else {
838 Log() << kFATAL << "BUG TRAP: minimum not found in MakeGDPath()" << Endl;
839 }
840
841 //
842 // print timing info (VERBOSE mode)
843 //
844 if (isVerbose) {
845 Double_t stloop = strisk +stupgrade + stgradvec + stperf;
846 Log() << kVERBOSE << "Timing per loop (ms):" << Endl;
847 Log() << kVERBOSE << " gradvec = " << 1000*stgradvec/iloop << Endl;
848 Log() << kVERBOSE << " upgrade = " << 1000*stupgrade/iloop << Endl;
849 Log() << kVERBOSE << " risk = " << 1000*strisk/iloop << Endl;
850 Log() << kVERBOSE << " perf = " << 1000*stperf/iloop << Endl;
851 Log() << kVERBOSE << " loop = " << 1000*stloop/iloop << Endl;
852 //
853 Log() << kVERBOSE << " GDInit = " << 1000*gGDInit/iloop << Endl;
854 Log() << kVERBOSE << " GDPtr = " << 1000*gGDPtr/iloop << Endl;
855 Log() << kVERBOSE << " GDEval = " << 1000*gGDEval/iloop << Endl;
856 Log() << kVERBOSE << " GDEvalRule = " << 1000*gGDEvalRule/iloop << Endl;
857 Log() << kVERBOSE << " GDNorm = " << 1000*gGDNorm/iloop << Endl;
858 Log() << kVERBOSE << " GDRuleLoop = " << 1000*gGDRuleLoop/iloop << Endl;
859 Log() << kVERBOSE << " GDLinLoop = " << 1000*gGDLinLoop/iloop << Endl;
860 }
861 //
862 // write ntuple (DEBUG)
863 if (isDebug) fGDNtuple->Write();
864}
865
866////////////////////////////////////////////////////////////////////////////////
867/// helper function to store the rule coefficients in local arrays
868
870{
871 fNTOffset = fRuleEnsemble->GetOffset();
872 //
873 for (UInt_t i=0; i<fNRules; i++) {
874 fNTCoeff[i] = fRuleEnsemble->GetRules(i)->GetCoefficient();
875 }
876 for (UInt_t i=0; i<fNLinear; i++) {
877 fNTLinCoeff[i] = fRuleEnsemble->GetLinCoefficients(i);
878 }
879}
880
881////////////////////////////////////////////////////////////////////////////////
882/// Estimates F* (optimum scoring function) for all events for the given sets.
883/// The result is used in ErrorRateReg().
884/// --- NOT USED ---
885
887{
888 Log() << kWARNING << "<CalcFStar> Using unverified code! Check!" << Endl;
889 UInt_t neve = fPerfIdx2-fPerfIdx1+1;
890 if (neve<1) {
891 Log() << kFATAL << "<CalcFStar> Invalid start/end indices!" << Endl;
892 return;
893 }
894 //
895 const std::vector<const Event *> *events = &(fRuleFit->GetTrainingEvents());
896 //
897 fFstar.clear();
898 std::vector<Double_t> fstarSorted;
899 Double_t fstarVal;
900 // loop over all events and estimate F* for each event
901 for (UInt_t i=fPerfIdx1; i<fPerfIdx2+1; i++) {
902 const Event& e = *(*events)[i];
903 fstarVal = fRuleEnsemble->FStar(e);
904 fFstar.push_back(fstarVal);
905 fstarSorted.push_back(fstarVal);
906 if (TMath::IsNaN(fstarVal)) Log() << kFATAL << "F* is NAN!" << Endl;
907 }
908 // sort F* and find median
909 std::sort( fstarSorted.begin(), fstarSorted.end() );
910 UInt_t ind = neve/2;
911 if (neve&1) { // odd number of events
912 fFstarMedian = 0.5*(fstarSorted[ind]+fstarSorted[ind-1]);
913 }
914 else { // even
915 fFstarMedian = fstarSorted[ind];
916 }
917}
918
919////////////////////////////////////////////////////////////////////////////////
920/// implementation of eq. 7.17 in Hastie,Tibshirani & Friedman book
921/// this is the covariance between the estimated response yhat and the
922/// true value y.
923/// NOT REALLY SURE IF THIS IS CORRECT!
924/// --- THIS IS NOT USED ---
925
927{
928 Log() << kWARNING << "<Optimism> Using unverified code! Check!" << Endl;
929 UInt_t neve = fPerfIdx2-fPerfIdx1+1;
930 if (neve<1) {
931 Log() << kFATAL << "<Optimism> Invalid start/end indices!" << Endl;
932 }
933 //
934 const std::vector<const Event *> *events = &(fRuleFit->GetTrainingEvents());
935 //
936 Double_t sumy=0;
937 Double_t sumyhat=0;
938 Double_t sumyhaty=0;
939 Double_t sumw2=0;
940 Double_t sumw=0;
941 Double_t yhat;
942 Double_t y;
943 Double_t w;
944 //
945 for (UInt_t i=fPerfIdx1; i<fPerfIdx2+1; i++) {
946 const Event& e = *(*events)[i];
947 yhat = fRuleEnsemble->EvalEvent(i); // evaluated using the model
948 y = (fRuleFit->GetMethodRuleFit()->DataInfo().IsSignal(&e) ? 1.0:-1.0); // the truth
949 w = fRuleFit->GetTrainingEventWeight(i)/fNEveEffPerf; // the weight, reweighted such that sum=1
950 sumy += w*y;
951 sumyhat += w*yhat;
952 sumyhaty += w*yhat*y;
953 sumw2 += w*w;
954 sumw += w;
955 }
956 Double_t div = 1.0-sumw2;
957 Double_t cov = sumyhaty - sumyhat*sumy;
958 return 2.0*cov/div;
959}
960
961////////////////////////////////////////////////////////////////////////////////
962/// Estimates the error rate with the current set of parameters
963/// This code is pretty messy at the moment.
964/// Cleanup is needed.
965/// -- NOT USED ---
966
968{
969 Log() << kWARNING << "<ErrorRateReg> Using unverified code! Check!" << Endl;
970 UInt_t neve = fPerfIdx2-fPerfIdx1+1;
971 if (neve<1) {
972 Log() << kFATAL << "<ErrorRateReg> Invalid start/end indices!" << Endl;
973 }
974 if (fFstar.size()!=neve) {
975 Log() << kFATAL << "--- RuleFitParams::ErrorRateReg() - F* not initialized! BUG!!!"
976 << " Fstar.size() = " << fFstar.size() << " , N(events) = " << neve << Endl;
977 }
978 //
979 Double_t sF;
980 //
981 const std::vector<const Event *> *events = &(fRuleFit->GetTrainingEvents());
982 //
983 Double_t sumdf = 0;
984 Double_t sumdfmed = 0;
985 //
986 // A bit messy here.
987 // I believe the binary error classification is appropriate here.
988 // The problem is stability.
989 //
990 for (UInt_t i=fPerfIdx1; i<fPerfIdx2+1; i++) {
991 const Event& e = *(*events)[i];
992 sF = fRuleEnsemble->EvalEvent( e );
993 // scaled abs error, eq 20 in RuleFit paper
994 sumdf += TMath::Abs(fFstar[i-fPerfIdx1] - sF);
995 sumdfmed += TMath::Abs(fFstar[i-fPerfIdx1] - fFstarMedian);
996 }
997 // scaled abs error, eq 20
998 // This error (df) is large - need to think on how to compensate...
999 //
1000 return sumdf/sumdfmed;
1001}
1002
1003////////////////////////////////////////////////////////////////////////////////
1004///
1005/// Estimates the error rate with the current set of parameters
1006/// It uses a binary estimate of (y-F*(x))
1007/// (y-F*(x)) = (Num of events where sign(F)!=sign(y))/Neve
1008/// y = {+1 if event is signal, -1 otherwise}
1009/// --- NOT USED ---
1010
1012{
1013 Log() << kWARNING << "<ErrorRateBin> Using unverified code! Check!" << Endl;
1014 UInt_t neve = fPerfIdx2-fPerfIdx1+1;
1015 if (neve<1) {
1016 Log() << kFATAL << "<ErrorRateBin> Invalid start/end indices!" << Endl;
1017 }
1018 //
1019 const std::vector<const Event *> *events = &(fRuleFit->GetTrainingEvents());
1020 //
1021 Double_t sumdfbin = 0;
1022 Double_t dneve = Double_t(neve);
1023 Int_t signF, signy;
1024 Double_t sF;
1025 //
1026 for (UInt_t i=fPerfIdx1; i<fPerfIdx2+1; i++) {
1027 const Event& e = *(*events)[i];
1028 sF = fRuleEnsemble->EvalEvent( e );
1029 // Double_t sFstar = fRuleEnsemble->FStar(e); // THIS CAN BE CALCULATED ONCE!
1030 signF = (sF>0 ? +1:-1);
1031 // signy = (sFStar>0 ? +1:-1);
1032 signy = (fRuleFit->GetMethodRuleFit()->DataInfo().IsSignal(&e) ? +1:-1);
1033 sumdfbin += TMath::Abs(Double_t(signF-signy))*0.5;
1034 }
1035 Double_t f = sumdfbin/dneve;
1036 // Double_t df = f*TMath::Sqrt((1.0/sumdfbin) + (1.0/dneve));
1037 return f;
1038}
1039
1040////////////////////////////////////////////////////////////////////////////////
1041/// Estimates the error rate with the current set of parameters.
1042/// It calculates the area under the bkg rejection vs signal efficiency curve.
1043/// The value returned is 1-area.
1044
1045Double_t TMVA::RuleFitParams::ErrorRateRocRaw( std::vector<Double_t> & sFsig,
1046 std::vector<Double_t> & sFbkg )
1047
1048{
1049 std::sort(sFsig.begin(), sFsig.end());
1050 std::sort(sFbkg.begin(), sFbkg.end());
1051 const Double_t minsig = sFsig.front();
1052 const Double_t minbkg = sFbkg.front();
1053 const Double_t maxsig = sFsig.back();
1054 const Double_t maxbkg = sFbkg.back();
1055 const Double_t minf = std::min(minsig,minbkg);
1056 const Double_t maxf = std::max(maxsig,maxbkg);
1057 const Int_t nsig = Int_t(sFsig.size());
1058 const Int_t nbkg = Int_t(sFbkg.size());
1059 const Int_t np = std::min((nsig+nbkg)/4,50);
1060 const Double_t df = (maxf-minf)/(np-1);
1061 //
1062 // calculate area under rejection/efficiency curve
1063 //
1064 Double_t fcut;
1065 std::vector<Double_t>::const_iterator indit;
1066 Int_t nrbkg;
1067 Int_t nesig;
1068 Int_t pnesig=0;
1069 Double_t rejb=0;
1070 Double_t effs=1.0;
1071 Double_t prejb=0;
1072 Double_t peffs=1.0;
1073 // Double_t drejb;
1074 Double_t deffs;
1075 Double_t area=0;
1076 Int_t npok=0;
1077 //
1078 // loop over range of F [minf,maxf]
1079 //
1080 for (Int_t i=0; i<np; i++) {
1081 fcut = minf + df*Double_t(i);
1082 indit = std::find_if(sFsig.begin(), sFsig.end(),
1083 std::bind(std::greater_equal<Double_t>(), std::placeholders::_1, fcut));
1084 nesig = sFsig.end()-indit; // number of sig accepted with F>cut
1085 if (TMath::Abs(pnesig-nesig)>0) {
1086 npok++;
1087 indit = std::find_if(sFbkg.begin(), sFbkg.end(),
1088 std::bind(std::greater_equal<Double_t>(), std::placeholders::_1, fcut));
1089 nrbkg = indit-sFbkg.begin(); // number of bkg rejected with F>cut
1090 rejb = Double_t(nrbkg)/Double_t(nbkg);
1091 effs = Double_t(nesig)/Double_t(nsig);
1092 //
1093 // drejb = rejb-prejb;
1094 deffs = effs-peffs;
1095 area += 0.5*TMath::Abs(deffs)*(rejb+prejb); // trapezoid
1096 prejb = rejb;
1097 peffs = effs;
1098 }
1099 pnesig = nesig;
1100 }
1101 area += 0.5*(1+rejb)*effs; // extrapolate to the end point
1102
1103 return (1.0-area);
1104}
1105
1106////////////////////////////////////////////////////////////////////////////////
1107/// Estimates the error rate with the current set of parameters.
1108/// It calculates the area under the bkg rejection vs signal efficiency curve.
1109/// The value returned is 1-area.
1110/// This works but is less efficient than calculating the Risk using RiskPerf().
1111
1113{
1114 Log() << kWARNING << "<ErrorRateRoc> Should not be used in the current version! Check!" << Endl;
1115 UInt_t neve = fPerfIdx2-fPerfIdx1+1;
1116 if (neve<1) {
1117 Log() << kFATAL << "<ErrorRateRoc> Invalid start/end indices!" << Endl;
1118 }
1119 //
1120 const std::vector<const Event *> *events = &(fRuleFit->GetTrainingEvents());
1121 //
1122 Double_t sF;
1123 //
1124 std::vector<Double_t> sFsig;
1125 std::vector<Double_t> sFbkg;
1126 Double_t sumfsig=0;
1127 Double_t sumfbkg=0;
1128 Double_t sumf2sig=0;
1129 Double_t sumf2bkg=0;
1130 //
1131 for (UInt_t i=fPerfIdx1; i<fPerfIdx2+1; i++) {
1132 const Event& e = *(*events)[i];
1133 sF = fRuleEnsemble->EvalEvent(i);// * fRuleFit->GetTrainingEventWeight(i);
1134 if (fRuleFit->GetMethodRuleFit()->DataInfo().IsSignal(&e)) {
1135 sFsig.push_back(sF);
1136 sumfsig +=sF;
1137 sumf2sig +=sF*sF;
1138 }
1139 else {
1140 sFbkg.push_back(sF);
1141 sumfbkg +=sF;
1142 sumf2bkg +=sF*sF;
1143 }
1144 }
1145 fsigave = sumfsig/sFsig.size();
1146 fbkgave = sumfbkg/sFbkg.size();
1147 fsigrms = TMath::Sqrt(gTools().ComputeVariance(sumf2sig,sumfsig,sFsig.size()));
1148 fbkgrms = TMath::Sqrt(gTools().ComputeVariance(sumf2bkg,sumfbkg,sFbkg.size()));
1149 //
1150 return ErrorRateRocRaw( sFsig, sFbkg );
1151}
1152
1153////////////////////////////////////////////////////////////////////////////////
1154/// Estimates the error rate with the current set of parameters.
1155/// It calculates the area under the bkg rejection vs signal efficiency curve.
1156/// The value returned is 1-area.
1157///
1158/// See comment under ErrorRateRoc().
1159
1161{
1162 Log() << kWARNING << "<ErrorRateRocTst> Should not be used in the current version! Check!" << Endl;
1163 UInt_t neve = fPerfIdx2-fPerfIdx1+1;
1164 if (neve<1) {
1165 Log() << kFATAL << "<ErrorRateRocTst> Invalid start/end indices!" << Endl;
1166 return;
1167 }
1168 //
1169 const std::vector<const Event *> *events = &(fRuleFit->GetTrainingEvents());
1170 //
1171 // std::vector<Double_t> sF;
1172 Double_t sF;
1173 std::vector< std::vector<Double_t> > sFsig;
1174 std::vector< std::vector<Double_t> > sFbkg;
1175 //
1176 sFsig.resize( fGDNTau );
1177 sFbkg.resize( fGDNTau );
1178 // sF.resize( fGDNTau );
1179
1180 for (UInt_t i=fPerfIdx1; i<fPerfIdx2+1; i++) {
1181 for (UInt_t itau=0; itau<fGDNTau; itau++) {
1182 // if (itau==0) sF = fRuleEnsemble->EvalEvent( *(*events)[i], fGDOfsTst[itau], fGDCoefTst[itau], fGDCoefLinTst[itau] );
1183 // else sF = fRuleEnsemble->EvalEvent( fGDOfsTst[itau], fGDCoefTst[itau], fGDCoefLinTst[itau] );
1184 sF = fRuleEnsemble->EvalEvent( i, fGDOfsTst[itau], fGDCoefTst[itau], fGDCoefLinTst[itau] );
1185 if (fRuleFit->GetMethodRuleFit()->DataInfo().IsSignal((*events)[i])) {
1186 sFsig[itau].push_back(sF);
1187 }
1188 else {
1189 sFbkg[itau].push_back(sF);
1190 }
1191 }
1192 }
1193 Double_t err;
1194
1195 for (UInt_t itau=0; itau<fGDNTau; itau++) {
1196 err = ErrorRateRocRaw( sFsig[itau], sFbkg[itau] );
1197 fGDErrTst[itau] = err;
1198 }
1199}
1200
1201////////////////////////////////////////////////////////////////////////////////
1202/// Estimates the error rate with the current set of parameters.
1203/// using the <Perf> subsample.
1204/// Return the tau index giving the lowest error
1205
1207{
1208 UInt_t neve = fPerfIdx2-fPerfIdx1+1;
1209 if (neve<1) {
1210 Log() << kFATAL << "<ErrorRateRocTst> Invalid start/end indices!" << Endl;
1211 return 0;
1212 }
1213 //
1214 Double_t sumx = 0;
1215 Double_t sumx2 = 0;
1216 Double_t maxx = -100.0;
1217 Double_t minx = 1e30;
1218 UInt_t itaumin = 0;
1219 UInt_t nok=0;
1220 for (UInt_t itau=0; itau<fGDNTau; itau++) {
1221 if (fGDErrTstOK[itau]) {
1222 nok++;
1223 fGDErrTst[itau] = RiskPerf(itau);
1224 sumx += fGDErrTst[itau];
1225 sumx2 += fGDErrTst[itau]*fGDErrTst[itau];
1226 if (fGDErrTst[itau]>maxx) maxx=fGDErrTst[itau];
1227 if (fGDErrTst[itau]<minx) {
1228 minx=fGDErrTst[itau];
1229 itaumin = itau;
1230 }
1231 }
1232 }
1233 Double_t sigx = TMath::Sqrt(gTools().ComputeVariance( sumx2, sumx, nok ) );
1234 Double_t maxacc = minx+sigx;
1235 //
1236 if (nok>0) {
1237 nok = 0;
1238 for (UInt_t itau=0; itau<fGDNTau; itau++) {
1239 if (fGDErrTstOK[itau]) {
1240 if (fGDErrTst[itau] > maxacc) {
1241 fGDErrTstOK[itau] = kFALSE;
1242 }
1243 else {
1244 nok++;
1245 }
1246 }
1247 }
1248 }
1249 fGDNTauTstOK = nok;
1250 Log() << kVERBOSE << "TAU: "
1251 << itaumin << " "
1252 << nok << " "
1253 << minx << " "
1254 << maxx << " "
1255 << sigx << Endl;
1256 //
1257 return itaumin;
1258}
1259
1260////////////////////////////////////////////////////////////////////////////////
1261/// make test gradient vector for all tau
1262/// same algorithm as MakeGradientVector()
1263
1265{
1266 UInt_t neve = fPathIdx1-fPathIdx2+1;
1267 if (neve<1) {
1268 Log() << kFATAL << "<MakeTstGradientVector> Invalid start/end indices!" << Endl;
1269 return;
1270 }
1271 //
1272 Double_t norm = 2.0/fNEveEffPath;
1273 //
1274 const std::vector<const Event *> *events = &(fRuleFit->GetTrainingEvents());
1275
1276 // Clear gradient vectors
1277 for (UInt_t itau=0; itau<fGDNTau; itau++) {
1278 if (fGDErrTstOK[itau]) {
1279 for (UInt_t ir=0; ir<fNRules; ir++) {
1280 fGradVecTst[itau][ir]=0;
1281 }
1282 for (UInt_t il=0; il<fNLinear; il++) {
1283 fGradVecLinTst[itau][il]=0;
1284 }
1285 }
1286 }
1287 //
1288 // Double_t val; // temp store
1289 Double_t sF; // score function value
1290 Double_t r; // eq 35, ref 1
1291 Double_t y; // true score (+1 or -1)
1292 const std::vector<UInt_t> *eventRuleMap=0;
1293 UInt_t rind;
1294 //
1295 // Loop over all events
1296 //
1297 UInt_t nsfok=0;
1298 for (UInt_t i=fPathIdx1; i<fPathIdx2+1; i++) {
1299 const Event *e = (*events)[i];
1300 UInt_t nrules=0;
1301 if (fRuleEnsemble->DoRules()) {
1302 eventRuleMap = &(fRuleEnsemble->GetEventRuleMap(i));
1303 nrules = (*eventRuleMap).size();
1304 }
1305 for (UInt_t itau=0; itau<fGDNTau; itau++) { // loop over tau
1306 // if (itau==0) sF = fRuleEnsemble->EvalEvent( *e, fGDOfsTst[itau], fGDCoefTst[itau], fGDCoefLinTst[itau] );
1307 // else sF = fRuleEnsemble->EvalEvent( fGDOfsTst[itau], fGDCoefTst[itau], fGDCoefLinTst[itau] );
1308 if (fGDErrTstOK[itau]) {
1309 sF = fRuleEnsemble->EvalEvent( i, fGDOfsTst[itau], fGDCoefTst[itau], fGDCoefLinTst[itau] );
1310 if (TMath::Abs(sF)<1.0) {
1311 nsfok++;
1312 r = 0;
1313 y = (fRuleFit->GetMethodRuleFit()->DataInfo().IsSignal(e)?1.0:-1.0);
1314 r = norm*(y - sF) * fRuleFit->GetTrainingEventWeight(i);
1315 // rule gradient vector
1316 for (UInt_t ir=0; ir<nrules; ir++) {
1317 rind = (*eventRuleMap)[ir];
1318 fGradVecTst[itau][rind] += r;
1319 }
1320 // linear terms
1321 for (UInt_t il=0; il<fNLinear; il++) {
1322 fGradVecLinTst[itau][il] += r*fRuleEnsemble->EvalLinEventRaw( il,i, kTRUE );
1323 }
1324 } // if (TMath::Abs(F)<xxx)
1325 }
1326 }
1327 }
1328}
1329
1330////////////////////////////////////////////////////////////////////////////////
1331/// Establish maximum gradient for rules, linear terms and the offset
1332/// for all taus TODO: do not need index range!
1333
1335{
1336 Double_t maxr, maxl, cthresh, val;
1337 // loop over all taus
1338 for (UInt_t itau=0; itau<fGDNTau; itau++) {
1339 if (fGDErrTstOK[itau]) {
1340 // find max gradient
1341 maxr = ( (fNRules>0 ?
1342 TMath::Abs(*(std::max_element( fGradVecTst[itau].begin(), fGradVecTst[itau].end(), AbsValue()))):0) );
1343 maxl = ( (fNLinear>0 ?
1344 TMath::Abs(*(std::max_element( fGradVecLinTst[itau].begin(), fGradVecLinTst[itau].end(), AbsValue()))):0) );
1345
1346 // Use the maximum as a threshold
1347 Double_t maxv = (maxr>maxl ? maxr:maxl);
1348 cthresh = maxv * fGDTauVec[itau];
1349
1350 // Add to offset, if gradient is large enough:
1351 // Loop over the gradient vector and move to next set of coefficients
1352 // size of GradVec (and GradVecLin) should be 0 if learner is disabled
1353 //
1354 // Step-size is divided by 10 when looking for the best path.
1355 //
1356 if (maxv>0) {
1357 const Double_t stepScale=1.0;
1358 for (UInt_t i=0; i<fNRules; i++) {
1359 val = fGradVecTst[itau][i];
1360
1361 if (TMath::Abs(val)>=cthresh) {
1362 fGDCoefTst[itau][i] += fGDPathStep*val*stepScale;
1363 }
1364 }
1365 // Loop over the gradient vector for the linear part and move to next set of coefficients
1366 for (UInt_t i=0; i<fNLinear; i++) {
1367 val = fGradVecLinTst[itau][i];
1368 if (TMath::Abs(val)>=cthresh) {
1369 fGDCoefLinTst[itau][i] += fGDPathStep*val*stepScale/fRuleEnsemble->GetLinNorm(i);
1370 }
1371 }
1372 }
1373 }
1374 }
1375 // set the offset - should be outside the itau loop!
1376 CalcTstAverageResponse();
1377}
1378
1379////////////////////////////////////////////////////////////////////////////////
1380/// make gradient vector
1381
1383{
1384 clock_t t0;
1385 // clock_t t10;
1386 t0 = clock();
1387 //
1388 UInt_t neve = fPathIdx2-fPathIdx1+1;
1389 if (neve<1) {
1390 Log() << kFATAL << "<MakeGradientVector> Invalid start/end indices!" << Endl;
1391 return;
1392 }
1393 //
1394 const Double_t norm = 2.0/fNEveEffPath;
1395 //
1396 const std::vector<const Event *> *events = &(fRuleFit->GetTrainingEvents());
1397
1398 // Clear gradient vectors
1399 for (UInt_t ir=0; ir<fNRules; ir++) {
1400 fGradVec[ir]=0;
1401 }
1402 for (UInt_t il=0; il<fNLinear; il++) {
1403 fGradVecLin[il]=0;
1404 }
1405 //
1406 // Double_t val; // temp store
1407 Double_t sF; // score function value
1408 Double_t r; // eq 35, ref 1
1409 Double_t y; // true score (+1 or -1)
1410 const std::vector<UInt_t> *eventRuleMap=0;
1411 UInt_t rind;
1412 //
1413 gGDInit += Double_t(clock()-t0)/CLOCKS_PER_SEC;
1414
1415 for (UInt_t i=fPathIdx1; i<fPathIdx2+1; i++) {
1416 const Event *e = (*events)[i];
1417
1418 // t0 = clock(); //DEB
1419 sF = fRuleEnsemble->EvalEvent( i ); // should not contain the weight
1420 // gGDEval += Double_t(clock()-t0)/CLOCKS_PER_SEC;
1421 if (TMath::Abs(sF)<1.0) {
1422 UInt_t nrules=0;
1423 if (fRuleEnsemble->DoRules()) {
1424 eventRuleMap = &(fRuleEnsemble->GetEventRuleMap(i));
1425 nrules = (*eventRuleMap).size();
1426 }
1427 y = (fRuleFit->GetMethodRuleFit()->DataInfo().IsSignal(e)?1.0:-1.0);
1428 r = norm*(y - sF) * fRuleFit->GetTrainingEventWeight(i);
1429 // rule gradient vector
1430 for (UInt_t ir=0; ir<nrules; ir++) {
1431 rind = (*eventRuleMap)[ir];
1432 fGradVec[rind] += r;
1433 }
1434 // gGDRuleLoop += Double_t(clock()-t0)/CLOCKS_PER_SEC;
1435 // linear terms
1436 // t0 = clock(); //DEB
1437 for (UInt_t il=0; il<fNLinear; il++) {
1438 fGradVecLin[il] += r*fRuleEnsemble->EvalLinEventRaw( il, i, kTRUE );
1439 }
1440 // gGDLinLoop += Double_t(clock()-t0)/CLOCKS_PER_SEC;
1441 } // if (TMath::Abs(F)<xxx)
1442 }
1443}
1444
1445////////////////////////////////////////////////////////////////////////////////
1446/// Establish maximum gradient for rules, linear terms and the offset
1447
1449{
1450 Double_t maxr = ( (fRuleEnsemble->DoRules() ?
1451 TMath::Abs(*(std::max_element( fGradVec.begin(), fGradVec.end(), AbsValue()))):0) );
1452 Double_t maxl = ( (fRuleEnsemble->DoLinear() ?
1453 TMath::Abs(*(std::max_element( fGradVecLin.begin(), fGradVecLin.end(), AbsValue()))):0) );
1454 // Use the maximum as a threshold
1455 Double_t maxv = (maxr>maxl ? maxr:maxl);
1456 Double_t cthresh = maxv * fGDTau;
1457
1458 Double_t useRThresh;
1459 Double_t useLThresh;
1460 //
1461 // Choose thresholds.
1462 //
1463 useRThresh = cthresh;
1464 useLThresh = cthresh;
1465
1466 Double_t gval, lval, coef, lcoef;
1467
1468 // Add to offset, if gradient is large enough:
1469 // Loop over the gradient vector and move to next set of coefficients
1470 // size of GradVec (and GradVecLin) should be 0 if learner is disabled
1471 if (maxv>0) {
1472 for (UInt_t i=0; i<fGradVec.size(); i++) {
1473 gval = fGradVec[i];
1474 if (TMath::Abs(gval)>=useRThresh) {
1475 coef = fRuleEnsemble->GetRulesConst(i)->GetCoefficient() + fGDPathStep*gval;
1476 fRuleEnsemble->GetRules(i)->SetCoefficient(coef);
1477 }
1478 }
1479
1480 // Loop over the gradient vector for the linear part and move to next set of coefficients
1481 for (UInt_t i=0; i<fGradVecLin.size(); i++) {
1482 lval = fGradVecLin[i];
1483 if (TMath::Abs(lval)>=useLThresh) {
1484 lcoef = fRuleEnsemble->GetLinCoefficients(i) + (fGDPathStep*lval/fRuleEnsemble->GetLinNorm(i));
1485 fRuleEnsemble->SetLinCoefficient(i,lcoef);
1486 }
1487 }
1488 // Set the offset
1489 Double_t offset = CalcAverageResponse();
1490 fRuleEnsemble->SetOffset( offset );
1491 }
1492}
1493
1494////////////////////////////////////////////////////////////////////////////////
1495/// calc average response for all test paths - TODO: see comment under CalcAverageResponse()
1496/// note that 0 offset is used
1497
1499{
1500 for (UInt_t itau=0; itau<fGDNTau; itau++) {
1501 if (fGDErrTstOK[itau]) {
1502 fGDOfsTst[itau] = 0;
1503 for (UInt_t s=0; s<fNLinear; s++) {
1504 fGDOfsTst[itau] -= fGDCoefLinTst[itau][s] * fAverageSelectorPath[s];
1505 }
1506 for (UInt_t r=0; r<fNRules; r++) {
1507 fGDOfsTst[itau] -= fGDCoefTst[itau][r] * fAverageRulePath[r];
1508 }
1509 }
1510 }
1511 //
1512}
1513
1514////////////////////////////////////////////////////////////////////////////////
1515/// calculate the average response - TODO : rewrite bad dependancy on EvaluateAverage() !
1516///
1517/// note that 0 offset is used
1518
1520{
1521 Double_t ofs = 0;
1522 for (UInt_t s=0; s<fNLinear; s++) {
1523 ofs -= fRuleEnsemble->GetLinCoefficients(s) * fAverageSelectorPath[s];
1524 }
1525 for (UInt_t r=0; r<fNRules; r++) {
1526 ofs -= fRuleEnsemble->GetRules(r)->GetCoefficient() * fAverageRulePath[r];
1527 }
1528 return ofs;
1529}
1530
1531////////////////////////////////////////////////////////////////////////////////
1532/// calculate the average truth
1533
1535{
1536 if (fPathIdx2<=fPathIdx1) {
1537 Log() << kFATAL << "<CalcAverageTruth> Invalid start/end indices!" << Endl;
1538 return 0;
1539 }
1540 Double_t sum=0;
1541 Double_t ensig=0;
1542 Double_t enbkg=0;
1543 const std::vector<const Event *> *events = &(fRuleFit->GetTrainingEvents());
1544 for (UInt_t i=fPathIdx1; i<fPathIdx2+1; i++) {
1545 Double_t ew = fRuleFit->GetTrainingEventWeight(i);
1546 if (fRuleFit->GetMethodRuleFit()->DataInfo().IsSignal((*events)[i])) ensig += ew;
1547 else enbkg += ew;
1548 sum += ew*(fRuleFit->GetMethodRuleFit()->DataInfo().IsSignal((*events)[i])?1.0:-1.0);
1549 }
1550 Log() << kVERBOSE << "Effective number of signal / background = " << ensig << " / " << enbkg << Endl;
1551
1552 return sum/fNEveEffPath;
1553}
1554
1555////////////////////////////////////////////////////////////////////////////////
1556
1558 return (fRuleFit->GetMethodRuleFit()->DataInfo().IsSignal(e) ? 1:-1);
1559}
1560
1561////////////////////////////////////////////////////////////////////////////////
1562
1564 fLogger->SetMinType(t);
1565}
ROOT::R::TRInterface & r
Definition: Object.C:4
#define f(i)
Definition: RSha256.hxx:104
#define h(i)
Definition: RSha256.hxx:106
#define e(i)
Definition: RSha256.hxx:103
int Int_t
Definition: RtypesCore.h:41
unsigned int UInt_t
Definition: RtypesCore.h:42
const Bool_t kFALSE
Definition: RtypesCore.h:88
bool Bool_t
Definition: RtypesCore.h:59
double Double_t
Definition: RtypesCore.h:55
const Bool_t kTRUE
Definition: RtypesCore.h:87
Double_t gGDNorm
Double_t gGDLinLoop
Double_t gGDInit
Double_t gGDEval
Bool_t gFIRSTTST
Double_t gGDPtr
Double_t gGDEvalRule
Bool_t gFIRSTORG
Double_t gGDRuleLoop
char * Form(const char *fmt,...)
ostringstream derivative to redirect and format output
Definition: MsgLogger.h:59
void CalcTstAverageResponse()
calc average response for all test paths - TODO: see comment under CalcAverageResponse() note that 0 ...
void MakeGDPath()
The following finds the gradient directed path in parameter space.
void EvaluateAverage(UInt_t ind1, UInt_t ind2, std::vector< Double_t > &avsel, std::vector< Double_t > &avrul)
evaluate the average of each variable and f(x) in the given range
UInt_t RiskPerfTst()
Estimates the error rate with the current set of parameters.
Double_t Risk(UInt_t ind1, UInt_t ind2, Double_t neff) const
risk assessment
Double_t Optimism()
implementation of eq.
Int_t FindGDTau()
This finds the cutoff parameter tau by scanning several different paths.
virtual ~RuleFitParams()
destructor
RuleFitParams()
constructor
void Init()
Initializes all parameters using the RuleEnsemble and the training tree.
Double_t CalcAverageResponse()
calculate the average response - TODO : rewrite bad dependancy on EvaluateAverage() !
void SetMsgType(EMsgType t)
Double_t Penalty() const
This is the "lasso" penalty To be used for regression.
void FillCoefficients()
helper function to store the rule coefficients in local arrays
Double_t LossFunction(const Event &e) const
Implementation of squared-error ramp loss function (eq 39,40 in ref 1) This is used for binary Classi...
void InitGD()
Initialize GD path search.
Int_t Type(const Event *e) const
void ErrorRateRocTst()
Estimates the error rate with the current set of parameters.
void CalcFStar()
Estimates F* (optimum scoring function) for all events for the given sets.
void MakeTstGradientVector()
make test gradient vector for all tau same algorithm as MakeGradientVector()
Double_t CalcAverageTruth()
calculate the average truth
void UpdateTstCoefficients()
Establish maximum gradient for rules, linear terms and the offset for all taus TODO: do not need inde...
void MakeGradientVector()
make gradient vector
void UpdateCoefficients()
Establish maximum gradient for rules, linear terms and the offset.
void InitNtuple()
initializes the ntuple
Double_t ErrorRateBin()
Estimates the error rate with the current set of parameters It uses a binary estimate of (y-F*(x)) (y...
Double_t ErrorRateReg()
Estimates the error rate with the current set of parameters This code is pretty messy at the moment.
Double_t ErrorRateRoc()
Estimates the error rate with the current set of parameters.
Double_t ErrorRateRocRaw(std::vector< Double_t > &sFsig, std::vector< Double_t > &sFbkg)
Estimates the error rate with the current set of parameters.
Timing information for training and evaluation of MVA methods.
Definition: Timer.h:58
TString GetElapsedTime(Bool_t Scientific=kTRUE)
returns pretty string with elapsed time
Definition: Timer.cxx:134
void DrawProgressBar(Int_t, const TString &comment="")
draws progress bar in color or B&W caution:
Definition: Timer.cxx:190
virtual Int_t Write(const char *name=0, Int_t option=0, Int_t bufsize=0)
Write this object to the current directory.
Definition: TObject.cxx:785
A TTree represents a columnar dataset.
Definition: TTree.h:71
Double_t y[n]
Definition: legend1.C:17
static constexpr double s
Tools & gTools()
MsgLogger & Endl(MsgLogger &ml)
Definition: MsgLogger.h:158
Bool_t IsNaN(Double_t x)
Definition: TMath.h:880
Short_t Max(Short_t a, Short_t b)
Definition: TMathBase.h:212
Double_t Log(Double_t x)
Definition: TMath.h:748
Double_t Sqrt(Double_t x)
Definition: TMath.h:679
Short_t Min(Short_t a, Short_t b)
Definition: TMathBase.h:180
Short_t Abs(Short_t d)
Definition: TMathBase.h:120
static long int sum(long int i)
Definition: Factory.cxx:2258
#define rprev(otri1, otri2)
Definition: triangle.c:1024