ROOT  6.06/09
Reference Guide
RuleFitParams.cxx
Go to the documentation of this file.
1 // @(#)root/tmva $Id$
2 // Author: Andreas Hoecker, Joerg Stelzer, Fredrik Tegenfeldt, Helge Voss
3 
4 /**********************************************************************************
5  * Project: TMVA - a Root-integrated toolkit for multivariate data analysis *
6  * Package: TMVA *
7  * Class : RuleFitParams *
8  * Web : http://tmva.sourceforge.net *
9  * *
10  * Description: *
11  * Implementation *
12  * *
13  * Authors (alphabetical): *
14  * Fredrik Tegenfeldt <Fredrik.Tegenfeldt@cern.ch> - Iowa State U., USA *
15  * Helge Voss <Helge.Voss@cern.ch> - MPI-KP Heidelberg, Ger. *
16  * *
17  * Copyright (c) 2005: *
18  * CERN, Switzerland *
19  * Iowa State U. *
20  * MPI-K Heidelberg, Germany *
21  * *
22  * Redistribution and use in source and binary forms, with or without *
23  * modification, are permitted according to the terms listed in LICENSE *
24  * (http://tmva.sourceforge.net/LICENSE) *
25  **********************************************************************************/
26 
27 #include <iostream>
28 #include <iomanip>
29 #include <numeric>
30 #include <algorithm>
31 #include <functional>
32 
33 #include "TTree.h"
34 #include "TMath.h"
35 
36 #include "TMVA/Timer.h"
37 #include "TMVA/RuleFitParams.h"
38 #include "TMVA/RuleFit.h"
39 #include "TMVA/RuleEnsemble.h"
40 #include "TMVA/MethodRuleFit.h"
41 #include "TMVA/Tools.h"
42 
45 
53 
54 ////////////////////////////////////////////////////////////////////////////////
55 /// constructor
56 
58  : fRuleFit ( 0 )
59  , fRuleEnsemble ( 0 )
60  , fNRules ( 0 )
61  , fNLinear ( 0 )
62  , fPathIdx1 ( 0 )
63  , fPathIdx2 ( 0 )
64  , fPerfIdx1 ( 0 )
65  , fPerfIdx2 ( 0 )
66  , fGDNTauTstOK( 0 )
67  , fGDNTau ( 51 )
68  , fGDTauPrec ( 0.02 )
69  , fGDTauScan ( 1000 )
70  , fGDTauMin ( 0.0 )
71  , fGDTauMax ( 1.0 )
72  , fGDTau ( -1.0 )
73  , fGDPathStep ( 0.01 )
74  , fGDNPathSteps ( 1000 )
75  , fGDErrScale ( 1.1 )
76  , fAverageTruth( 0 )
77  , fFstarMedian ( 0 )
78  , fGDNtuple ( 0 )
79  , fNTRisk ( 0 )
80  , fNTErrorRate ( 0 )
81  , fNTNuval ( 0 )
82  , fNTCoefRad ( 0 )
83  , fNTOffset ( 0 )
84  , fNTCoeff ( 0 )
85  , fNTLinCoeff ( 0 )
86  , fsigave( 0 )
87  , fsigrms( 0 )
88  , fbkgave( 0 )
89  , fbkgrms( 0 )
90  , fLogger( new MsgLogger("RuleFit") )
91 {
92  Init();
93 }
94 ////////////////////////////////////////////////////////////////////////////////
95 /// destructor
96 
98 {
99  if (fNTCoeff) { delete fNTCoeff; fNTCoeff = 0; }
100  if (fNTLinCoeff) { delete fNTLinCoeff;fNTLinCoeff = 0; }
101  delete fLogger;
102 }
103 
104 ////////////////////////////////////////////////////////////////////////////////
105 /// Initializes all parameters using the RuleEnsemble and the training tree
106 
108 {
109  if (fRuleFit==0) return;
110  if (fRuleFit->GetMethodRuleFit()==0) {
111  Log() << kFATAL << "RuleFitParams::Init() - MethodRuleFit ptr is null" << Endl;
112  }
113  UInt_t neve = fRuleFit->GetTrainingEvents().size();
114  //
115  fRuleEnsemble = fRuleFit->GetRuleEnsemblePtr();
116  fNRules = fRuleEnsemble->GetNRules();
117  fNLinear = fRuleEnsemble->GetNLinear();
118 
119  //
120  // Fraction of events used for validation should be close of unity..
121  // Always selection from the END
122  //
123  UInt_t ofs;
124  fPerfIdx1 = 0;
125  if (neve>1) {
126  fPerfIdx2 = static_cast<UInt_t>((neve-1)*fRuleFit->GetMethodRuleFit()->GetGDValidEveFrac());
127  }
128  else {
129  fPerfIdx2 = 0;
130  }
131  ofs = neve - fPerfIdx2 - 1;
132  fPerfIdx1 += ofs;
133  fPerfIdx2 += ofs;
134  //
135  // Fraction of events used for the path search can be allowed to be a smaller value, say 0.5
136  // Alwas select events from the BEGINNING.
137  // This means that the validation and search samples will not overlap if both fractions are <0.5.
138  //
139  fPathIdx1 = 0;
140  if (neve>1) {
141  fPathIdx2 = static_cast<UInt_t>((neve-1)*fRuleFit->GetMethodRuleFit()->GetGDPathEveFrac());
142  }
143  else {
144  fPathIdx2 = 0;
145  }
146  //
147  // summarize weights
148  //
149  fNEveEffPath = 0;;
150  for (UInt_t ie=fPathIdx1; ie<fPathIdx2+1; ie++) {
151  fNEveEffPath += fRuleFit->GetTrainingEventWeight(ie);
152  }
153 
154  fNEveEffPerf=0;
155  for (UInt_t ie=fPerfIdx1; ie<fPerfIdx2+1; ie++) {
156  fNEveEffPerf += fRuleFit->GetTrainingEventWeight(ie);
157  }
158  //
159  Log() << kVERBOSE << "Path constr. - event index range = [ " << fPathIdx1 << ", " << fPathIdx2 << " ]"
160  << ", effective N(events) = " << fNEveEffPath << Endl;
161  Log() << kVERBOSE << "Error estim. - event index range = [ " << fPerfIdx1 << ", " << fPerfIdx2 << " ]"
162  << ", effective N(events) = " << fNEveEffPerf << Endl;
163  //
164  if (fRuleEnsemble->DoRules())
165  Log() << kDEBUG << "Number of rules in ensemble: " << fNRules << Endl;
166  else
167  Log() << kDEBUG << "Rules are disabled " << Endl;
168 
169  if (fRuleEnsemble->DoLinear())
170  Log() << kDEBUG << "Number of linear terms: " << fNLinear << Endl;
171  else
172  Log() << kDEBUG << "Linear terms are disabled " << Endl;
173 }
174 
175 ////////////////////////////////////////////////////////////////////////////////
176 /// initializes the ntuple
177 
179 {
180  fGDNtuple= new TTree("MonitorNtuple_RuleFitParams","RuleFit path search");
181  fGDNtuple->Branch("risk", &fNTRisk, "risk/D");
182  fGDNtuple->Branch("error", &fNTErrorRate,"error/D");
183  fGDNtuple->Branch("nuval", &fNTNuval, "nuval/D");
184  fGDNtuple->Branch("coefrad", &fNTCoefRad, "coefrad/D");
185  fGDNtuple->Branch("offset", &fNTOffset, "offset/D");
186  //
187  fNTCoeff = (fNRules >0 ? new Double_t[fNRules] : 0);
188  fNTLinCoeff = (fNLinear>0 ? new Double_t[fNLinear] : 0);
189 
190  for (UInt_t i=0; i<fNRules; i++) {
191  fGDNtuple->Branch(Form("a%d",i+1),&fNTCoeff[i],Form("a%d/D",i+1));
192  }
193  for (UInt_t i=0; i<fNLinear; i++) {
194  fGDNtuple->Branch(Form("b%d",i+1),&fNTLinCoeff[i],Form("b%d/D",i+1));
195  }
196 }
197 
198 ////////////////////////////////////////////////////////////////////////////////
199 /// evaluate the average of each variable and f(x) in the given range
200 
202  std::vector<Double_t> &avsel,
203  std::vector<Double_t> &avrul )
204 {
205  UInt_t neve = ind2-ind1+1;
206  if (neve<1) {
207  Log() << kFATAL << "<EvaluateAverage> - no events selected for path search -> BUG!" << Endl;
208  }
209  //
210  avsel.clear();
211  avrul.clear();
212  //
213  if (fNLinear>0) avsel.resize(fNLinear,0);
214  if (fNRules>0) avrul.resize(fNRules,0);
215  const std::vector<UInt_t> *eventRuleMap=0;
216  Double_t ew;
217  Double_t sumew=0;
218  //
219  // Loop over events and calculate average of linear terms (normalised) and rule response.
220  //
221  if (fRuleEnsemble->IsRuleMapOK()) { // MakeRuleMap() has been called
222  for ( UInt_t i=ind1; i<ind2+1; i++) {
223  ew = fRuleFit->GetTrainingEventWeight(i);
224  sumew += ew;
225  for ( UInt_t sel=0; sel<fNLinear; sel++ ) {
226  avsel[sel] += ew*fRuleEnsemble->EvalLinEvent(i,sel);
227  }
228  // loop over rules
229  UInt_t nrules=0;
230  if (fRuleEnsemble->DoRules()) {
231  eventRuleMap = &(fRuleEnsemble->GetEventRuleMap(i));
232  nrules = (*eventRuleMap).size();
233  }
234  for (UInt_t r=0; r<nrules; r++) {
235  avrul[(*eventRuleMap)[r]] += ew;
236  }
237  }
238  }
239  else { // MakeRuleMap() has not yet been called
240  const std::vector<const Event *> *events = &(fRuleFit->GetTrainingEvents());
241  for ( UInt_t i=ind1; i<ind2+1; i++) {
242  ew = fRuleFit->GetTrainingEventWeight(i);
243  sumew += ew;
244  // first cache rule/lin response
245  /* Double_t val = */ fRuleEnsemble->EvalLinEvent(*((*events)[i]));
246  /* val = */ fRuleEnsemble->EvalEvent(*((*events)[i]));
247  // loop over linear terms
248  for ( UInt_t sel=0; sel<fNLinear; sel++ ) {
249  avsel[sel] += ew*fRuleEnsemble->GetEventLinearValNorm(sel);
250  }
251  // loop over rules
252  for (UInt_t r=0; r<fNRules; r++) {
253  avrul[r] += ew*fRuleEnsemble->GetEventRuleVal(r);
254  }
255  }
256  }
257  // average variable
258  for ( UInt_t sel=0; sel<fNLinear; sel++ ) {
259  avsel[sel] = avsel[sel] / sumew;
260  }
261  // average rule response, excl coeff
262  for (UInt_t r=0; r<fNRules; r++) {
263  avrul[r] = avrul[r] / sumew;
264  }
265 }
266 
267 ////////////////////////////////////////////////////////////////////////////////
268 /// Implementation of squared-error ramp loss function (eq 39,40 in ref 1)
269 /// This is used for binary Classifications where y = {+1,-1} for (sig,bkg)
270 
272 {
273  Double_t h = TMath::Max( -1.0, TMath::Min(1.0,fRuleEnsemble->EvalEvent( e )) );
274  Double_t diff = (fRuleFit->GetMethodRuleFit()->DataInfo().IsSignal(&e)?1:-1) - h;
275  //
276  return diff*diff*e.GetWeight();
277 }
278 
279 ////////////////////////////////////////////////////////////////////////////////
280 /// Implementation of squared-error ramp loss function (eq 39,40 in ref 1)
281 /// This is used for binary Classifications where y = {+1,-1} for (sig,bkg)
282 
284 {
285  Double_t h = TMath::Max( -1.0, TMath::Min(1.0,fRuleEnsemble->EvalEvent( evtidx )) );
286  Double_t diff = (fRuleFit->GetMethodRuleFit()->DataInfo().IsSignal(fRuleEnsemble->GetRuleMapEvent( evtidx ))?1:-1) - h;
287  //
288  return diff*diff*fRuleFit->GetTrainingEventWeight(evtidx);
289 }
290 
291 ////////////////////////////////////////////////////////////////////////////////
292 /// Implementation of squared-error ramp loss function (eq 39,40 in ref 1)
293 /// This is used for binary Classifications where y = {+1,-1} for (sig,bkg)
294 
296 {
297  Double_t e = fRuleEnsemble->EvalEvent( evtidx , fGDOfsTst[itau], fGDCoefTst[itau], fGDCoefLinTst[itau]);
298  Double_t h = TMath::Max( -1.0, TMath::Min(1.0,e) );
299  Double_t diff = (fRuleFit->GetMethodRuleFit()->DataInfo().IsSignal(fRuleEnsemble->GetRuleMapEvent( evtidx ))?1:-1) - h;
300  //
301  return diff*diff*fRuleFit->GetTrainingEventWeight(evtidx);
302 }
303 
304 ////////////////////////////////////////////////////////////////////////////////
305 /// risk asessment
306 
308 {
309  UInt_t neve = ind2-ind1+1;
310  if (neve<1) {
311  Log() << kFATAL << "<Risk> Invalid start/end indices! BUG!!!" << Endl;
312  }
313  Double_t rval=0;
314  //
315  // const std::vector<Event *> *events = &(fRuleFit->GetTrainingEvents());
316  for ( UInt_t i=ind1; i<ind2+1; i++) {
317  rval += LossFunction(i);
318  }
319  rval = rval/neff;
320 
321  return rval;
322 }
323 
324 ////////////////////////////////////////////////////////////////////////////////
325 /// risk asessment for tau model <itau>
326 
328 {
329  UInt_t neve = ind2-ind1+1;
330  if (neve<1) {
331  Log() << kFATAL << "<Risk> Invalid start/end indices! BUG!!!" << Endl;
332  }
333  Double_t rval=0;
334  //
335  // const std::vector<Event *> *events = &(fRuleFit->GetTrainingEvents());
336  for ( UInt_t i=ind1; i<ind2+1; i++) {
337  rval += LossFunction(i,itau);
338  }
339  rval = rval/neff;
340 
341  return rval;
342 }
343 
344 ////////////////////////////////////////////////////////////////////////////////
345 /// This is the "lasso" penalty
346 /// To be used for regression.
347 /// --- NOT USED ---
348 
350 {
351  Log() << kWARNING << "<Penalty> Using unverified code! Check!" << Endl;
352  Double_t rval=0;
353  const std::vector<Double_t> *lincoeff = & (fRuleEnsemble->GetLinCoefficients());
354  for (UInt_t i=0; i<fNRules; i++) {
355  rval += TMath::Abs(fRuleEnsemble->GetRules(i)->GetCoefficient());
356  }
357  for (UInt_t i=0; i<fNLinear; i++) {
358  rval += TMath::Abs((*lincoeff)[i]);
359  }
360  return rval;
361 }
362 
363 ////////////////////////////////////////////////////////////////////////////////
364 /// Initialize GD path search
365 
367 {
368  if (fGDNTau<2) {
369  fGDNTau = 1;
370  fGDTauScan = 0;
371  }
372  if (fGDTau<0.0) {
373  // fGDNTau = 50; already set in MethodRuleFit
374  fGDTauScan = 1000;
375  fGDTauMin = 0.0;
376  fGDTauMax = 1.0;
377  }
378  else {
379  fGDNTau = 1;
380  fGDTauScan = 0;
381  }
382  // set all taus
383  fGDTauVec.clear();
384  fGDTauVec.resize( fGDNTau );
385  if (fGDNTau==1) {
386  fGDTauVec[0] = fGDTau;
387  }
388  else {
389  // set tau vector - TODO: make a smarter choice of range in tau
390  Double_t dtau = (fGDTauMax - fGDTauMin)/static_cast<Double_t>(fGDNTau-1);
391  for (UInt_t itau=0; itau<fGDNTau; itau++) {
392  fGDTauVec[itau] = static_cast<Double_t>(itau)*dtau + fGDTauMin;
393  if (fGDTauVec[itau]>1.0) fGDTauVec[itau]=1.0;
394  }
395  }
396  // inititalize path search vectors
397 
398  fGradVec.clear();
399  fGradVecLin.clear();
400  fGradVecTst.clear();
401  fGradVecLinTst.clear();
402  fGDErrTst.clear();
403  fGDErrTstOK.clear();
404  fGDOfsTst.clear();
405  fGDCoefTst.clear();
406  fGDCoefLinTst.clear();
407  //
408  // rules
409  //
410  fGDCoefTst.resize(fGDNTau);
411  fGradVec.resize(fNRules,0);
412  fGradVecTst.resize(fGDNTau);
413  for (UInt_t i=0; i<fGDNTau; i++) {
414  fGradVecTst[i].resize(fNRules,0);
415  fGDCoefTst[i].resize(fNRules,0);
416  }
417  //
418  // linear terms
419  //
420  fGDCoefLinTst.resize(fGDNTau);
421  fGradVecLin.resize(fNLinear,0);
422  fGradVecLinTst.resize(fGDNTau);
423  for (UInt_t i=0; i<fGDNTau; i++) {
424  fGradVecLinTst[i].resize(fNLinear,0);
425  fGDCoefLinTst[i].resize(fNLinear,0);
426  }
427  //
428  // error, coefs etc
429  //
430  fGDErrTst.resize(fGDNTau,0);
431  fGDErrTstOK.resize(fGDNTau,kTRUE);
432  fGDOfsTst.resize(fGDNTau,0);
433  fGDNTauTstOK = fGDNTau;
434  //
435  // calculate average selectors and rule responses for the path sample size
436  //
437 }
438 
439 ////////////////////////////////////////////////////////////////////////////////
440 /// This finds the cutoff parameter tau by scanning several different paths
441 
443 {
444  if (fGDNTau<2) return 0;
445  if (fGDTauScan==0) return 0;
446 
447  if (fGDOfsTst.size()<1)
448  Log() << kFATAL << "BUG! FindGDTau() has been called BEFORE InitGD()." << Endl;
449  //
450  Log() << kINFO << "Estimating the cutoff parameter tau. The estimated time is a pessimistic maximum." << Endl;
451  //
452  // Find how many points to scan and how often to calculate the error
453  UInt_t nscan = fGDTauScan; //std::min(static_cast<Int_t>(fGDTauScan),fGDNPathSteps);
454  UInt_t netst = std::min(nscan,UInt_t(100));
455  UInt_t nscanned=0;
456  //
457  //--------------------
458  // loop over the paths
459  //--------------------
460  // The number of MAXIMUM loops is given by nscan.
461  // At each loop, the paths being far away from the minimum
462  // are rejected. Hence at each check (every netst events), the number
463  // of paths searched will be reduced.
464  // The maximum 'distance' from the minimum error rate is
465  // 1 sigma. See RiskPerfTst() for details.
466  //
467  Bool_t doloop=kTRUE;
468  UInt_t ip=0;
469  UInt_t itauMin=0;
470  Timer timer( nscan, "RuleFit" );
471  while (doloop) {
472  // make gradvec
473  MakeTstGradientVector();
474  // update coefs
475  UpdateTstCoefficients();
476  // estimate error and do the sum
477  // do this at index=0, netst-1, 2*netst-1 ...
478  nscanned++;
479  if ( (ip==0) || ((ip+1)%netst==0) ) {
480  // ErrorRateRocTst( );
481  itauMin = RiskPerfTst();
482  Log() << kVERBOSE << Form("%4d",ip+1) << ". tau = " << Form("%4.4f",fGDTauVec[itauMin])
483  << " => error rate = " << fGDErrTst[itauMin] << Endl;
484  }
485  ip++;
486  doloop = ((ip<nscan) && (fGDNTauTstOK>3));
488  if (Log().GetMinType()>kVERBOSE)
489  timer.DrawProgressBar(ip);
490  }
491  //
492  // Set tau and coefs
493  // Downscale tau slightly in order to avoid numerical problems
494  //
495  if (nscanned==0) {
496  Log() << kERROR << "<FindGDTau> number of scanned loops is zero! Should NOT see this message." << Endl;
497  }
498  fGDTau = fGDTauVec[itauMin];
499  fRuleEnsemble->SetCoefficients( fGDCoefTst[itauMin] );
500  fRuleEnsemble->SetLinCoefficients( fGDCoefLinTst[itauMin] );
501  fRuleEnsemble->SetOffset( fGDOfsTst[itauMin] );
502  Log() << kINFO << "Best path found with tau = " << Form("%4.4f",fGDTau)
503  << " after " << timer.GetElapsedTime() << " " << Endl;
504 
505  return nscan;
506 }
507 
508 ////////////////////////////////////////////////////////////////////////////////
509 /// The following finds the gradient directed path in parameter space.
510 /// More work is needed... FT, 24/9/2006
511 /// The algorithm is currently as follows:
512 /// <***if not otherwise stated, the sample used below is [fPathIdx1,fPathIdx2]***>
513 /// 1. Set offset to -average(y(true)) and all coefs=0 => average of F(x)==0
514 /// 2. FindGDTau() : start scanning using several paths defined by different tau
515 /// choose the tau yielding the best path
516 /// 3. start the scanning the chosen path
517 /// 4. check error rate at a given frequency
518 /// data used for check: [fPerfIdx1,fPerfIdx2]
519 /// 5. stop when either of the following onditions are fullfilled:
520 /// a. loop index==fGDNPathSteps
521 /// b. error > fGDErrScale*errmin
522 /// c. only in DEBUG mode: risk is not monotoneously decreasing
523 ///
524 /// The algorithm will warn if:
525 /// I. the error rate was still decreasing when loop finnished -> increase fGDNPathSteps!
526 /// II. minimum was found at an early stage -> decrease fGDPathStep
527 /// III. DEBUG: risk > previous risk -> entered caotic region (regularization is too small)
528 ///
529 
531 {
532  Log() << kINFO << "GD path scan - the scan stops when the max num. of steps is reached or a min is found"
533  << Endl;
534  Log() << kVERBOSE << "Number of events used per path step = " << fPathIdx2-fPathIdx1+1 << Endl;
535  Log() << kVERBOSE << "Number of events used for error estimation = " << fPerfIdx2-fPerfIdx1+1 << Endl;
536 
537  // check if debug mode
538  const Bool_t isVerbose = (Log().GetMinType()<=kVERBOSE);
539  const Bool_t isDebug = (Log().GetMinType()<=kDEBUG);
540 
541  // init GD parameters and clear coeff vectors
542  InitGD();
543 
544  // evaluate average response of rules/linear terms (with event weights)
545  EvaluateAveragePath();
546  EvaluateAveragePerf();
547 
548  // initial estimate; all other a(i) are zero
549  Log() << kVERBOSE << "Creating GD path" << Endl;
550  Log() << kVERBOSE << " N(steps) = " << fGDNPathSteps << Endl;
551  Log() << kVERBOSE << " step = " << fGDPathStep << Endl;
552  Log() << kVERBOSE << " N(tau) = " << fGDNTau << Endl;
553  Log() << kVERBOSE << " N(tau steps) = " << fGDTauScan << Endl;
554  Log() << kVERBOSE << " tau range = [ " << fGDTauVec[0] << " , " << fGDTauVec[fGDNTau-1] << " ]" << Endl;
555 
556  // init ntuple
557  if (isDebug) InitNtuple();
558 
559  // DEBUG: risk scan
560  Int_t nbadrisk=0; // number of points where risk(i+1)>risk(i)
561  Double_t trisk=0; // time per risk evaluation
562  Double_t strisk=0; // total time
563  Double_t rprev=1e32; // previous risk
564 
565  // parameters set at point with min error
566  Double_t errmin=1e32; // min error
567  // Double_t riskMin=0; // risk
568  Int_t indMin=-1; // index
569  std::vector<Double_t> coefsMin; // rule coefs
570  std::vector<Double_t> lincoefsMin; // linear coefs
571  Double_t offsetMin; // offset
572 
573 
574  // DEBUG: timing
575  clock_t t0=0;
576  Double_t tgradvec;
577  Double_t tupgrade;
578  Double_t tperf;
579  Double_t stgradvec=0;
580  Double_t stupgrade=0;
581  Double_t stperf=0;
582 
583  // linear regression to estimate slope of error rate evolution
584  const UInt_t npreg=5;
585  std::vector<Double_t> valx;
586  std::vector<Double_t> valy;
587  std::vector<Double_t> valxy;
588 
589  // loop related
590  Bool_t docheck; // true if an error rate check is to be done
591  Int_t iloop=0; // loop index
592  Bool_t found=kFALSE; // true if minimum is found
593  Bool_t riskFlat=kFALSE; // DEBUG: flag is true if risk evolution behaves badly
594  Bool_t done = kFALSE; // flag that the scan is done
595 
596  // calculate how often to check error rate
597  int imod = fGDNPathSteps/100;
598  if (imod<100) imod = std::min(100,fGDNPathSteps);
599  if (imod>100) imod=100;
600 
601  // reset coefficients
602  fAverageTruth = -CalcAverageTruth();
603  offsetMin = fAverageTruth;
604  fRuleEnsemble->SetOffset(offsetMin);
605  fRuleEnsemble->ClearCoefficients(0);
606  fRuleEnsemble->ClearLinCoefficients(0);
607  for (UInt_t i=0; i<fGDOfsTst.size(); i++) {
608  fGDOfsTst[i] = offsetMin;
609  }
610  Log() << kVERBOSE << "Obtained initial offset = " << offsetMin << Endl;
611 
612  // find the best tau - returns the number of steps performed in scan
613  Int_t nprescan = FindGDTau();
614 
615  //
616  //
617  // calculate F*
618  //
619  // CalcFStar(fPerfIdx1, fPerfIdx2);
620  //
621 
622  // set some ntuple values
623  fNTRisk = rprev;
624  fNTCoefRad = -1.0;
625  fNTErrorRate = 0;
626 
627  // a local flag indicating for what reason the search was stopped
628  Int_t stopCondition=0;
629 
630  Log() << kINFO << "Fitting model..." << Endl;
631  // start loop with timer
632  Timer timer( fGDNPathSteps, "RuleFit" );
633  while (!done) {
634  // Make gradient vector (eq 44, ref 1)
635  if (isVerbose) t0 = clock();
636  MakeGradientVector();
637  if (isVerbose) {
638  tgradvec = Double_t(clock()-t0)/CLOCKS_PER_SEC;
639  stgradvec += tgradvec;
640  }
641 
642  // Calculate the direction in parameter space (eq 25, ref 1) and update coeffs (eq 22, ref 1)
643  if (isVerbose) t0 = clock();
644  UpdateCoefficients();
645  if (isVerbose) {
646  tupgrade = Double_t(clock()-t0)/CLOCKS_PER_SEC;
647  stupgrade += tupgrade;
648  }
649 
650  // don't check error rate every loop
651  docheck = ((iloop==0) ||((iloop+1)%imod==0));
652 
653  if (docheck) {
654  // draw progressbar only if not debug
655  if (!isVerbose)
656  timer.DrawProgressBar(iloop);
657  fNTNuval = Double_t(iloop)*fGDPathStep;
658  fNTRisk = 0.0;
659 
660  // check risk evolution
661 
662  if (isDebug) FillCoefficients();
663  fNTCoefRad = fRuleEnsemble->CoefficientRadius();
664 
665  // calculate risk
666  t0 = clock();
667  fNTRisk = RiskPath();
668  trisk = Double_t(clock()-t0)/CLOCKS_PER_SEC;
669  strisk += trisk;
670  //
671  // Check for an increase in risk.
672  // Such an increase would imply that the regularization is too small.
673  // Stop the iteration if this happens.
674  //
675  if (fNTRisk>=rprev) {
676  if (fNTRisk>rprev) {
677  nbadrisk++;
678  Log() << kWARNING << "Risk(i+1)>=Risk(i) in path" << Endl;
679  riskFlat=(nbadrisk>3);
680  if (riskFlat) {
681  Log() << kWARNING << "Chaotic behaviour of risk evolution" << Endl;
682  Log() << kWARNING << "--- STOPPING MINIMISATION ---" << Endl;
683  Log() << kWARNING << "This may be OK if minimum is already found" << Endl;
684  }
685  }
686  }
687  rprev = fNTRisk;
688  //
689  // Estimate the error rate using cross validation
690  // Well, not quite full cross validation since we only
691  // use ONE model.
692  //
693  if (isVerbose) t0 = clock();
694  fNTErrorRate = 0;
695 
696  // Check error rate
697  Double_t errroc;//= ErrorRateRoc();
698  Double_t riskPerf = RiskPerf();
699  // Double_t optimism = Optimism();
700  //
701  errroc = riskPerf;
702  //
703  fNTErrorRate = errroc;
704  //
705  if (isVerbose) {
706  tperf = Double_t(clock()-t0)/CLOCKS_PER_SEC;
707  stperf +=tperf;
708  }
709  //
710  // Always take the last min.
711  // For each step the risk is reduced.
712  //
713  if (fNTErrorRate<=errmin) {
714  errmin = fNTErrorRate;
715  // riskMin = fNTRisk;
716  indMin = iloop;
717  fRuleEnsemble->GetCoefficients(coefsMin);
718  lincoefsMin = fRuleEnsemble->GetLinCoefficients();
719  offsetMin = fRuleEnsemble->GetOffset();
720  }
721  if ( fNTErrorRate > fGDErrScale*errmin) found = kTRUE;
722  //
723  // check slope of last couple of points
724  //
725  if (valx.size()==npreg) {
726  valx.erase(valx.begin());
727  valy.erase(valy.begin());
728  valxy.erase(valxy.begin());
729  }
730  valx.push_back(fNTNuval);
731  valy.push_back(fNTErrorRate);
732  valxy.push_back(fNTErrorRate*fNTNuval);
733 
735 
736  //
737  if (isDebug) fGDNtuple->Fill();
738  if (isVerbose) {
739  Log() << kVERBOSE << "ParamsIRE : "
740  << std::setw(10)
741  << Form("%8d",iloop+1) << " "
742  << Form("%4.4f",fNTRisk) << " "
743  << Form("%4.4f",riskPerf) << " "
744  << Form("%4.4f",fNTRisk+riskPerf) << " "
745 // << Form("%4.4f",fsigave+fbkgave) << " "
746 // << Form("%4.4f",fsigave) << " "
747 // << Form("%4.4f",fsigrms) << " "
748 // << Form("%4.4f",fbkgave) << " "
749 // << Form("%4.4f",fbkgrms) << " "
750 
751  // << Form("%4.4f",fRuleEnsemble->CoefficientRadius())
752  << Endl;
753  }
754  }
755  iloop++;
756  // Stop iteration under various conditions
757  // * The condition R(i+1)<R(i) is no longer true (when then implicit regularization is too weak)
758  // * If the current error estimate is > factor*errmin (factor = 1.1)
759  // * We have reach the last step...
760  Bool_t endOfLoop = (iloop==fGDNPathSteps);
761  if ( ((riskFlat) || (endOfLoop)) && (!found) ) {
762  if (riskFlat) {
763  stopCondition = 1;
764  }
765  else if (endOfLoop) {
766  stopCondition = 2;
767  }
768  if (indMin<0) {
769  Log() << kWARNING << "BUG TRAP: should not be here - still, this bug is harmless;)" << Endl;
770  errmin = fNTErrorRate;
771  // riskMin = fNTRisk;
772  indMin = iloop;
773  fRuleEnsemble->GetCoefficients(coefsMin);
774  lincoefsMin = fRuleEnsemble->GetLinCoefficients();
775  offsetMin = fRuleEnsemble->GetOffset();
776  }
777  found = kTRUE;
778  }
779  done = (found);
780  }
781  Log() << kINFO << "Minimisation elapsed time : " << timer.GetElapsedTime() << " " << Endl;
782  Log() << kINFO << "----------------------------------------------------------------" << Endl;
783  Log() << kINFO << "Found minimum at step " << indMin+1 << " with error = " << errmin << Endl;
784  Log() << kINFO << "Reason for ending loop: ";
785  switch (stopCondition) {
786  case 0:
787  Log() << kINFO << "clear minima found";
788  break;
789  case 1:
790  Log() << kINFO << "chaotic behaviour of risk";
791  break;
792  case 2:
793  Log() << kINFO << "end of loop reached";
794  break;
795  default:
796  Log() << kINFO << "unknown!";
797  break;
798  }
799  Log() << Endl;
800  Log() << kINFO << "----------------------------------------------------------------" << Endl;
801 
802  // check if early minima - might be an indication of too large stepsize
803  if ( Double_t(indMin)/Double_t(nprescan+fGDNPathSteps) < 0.05 ) {
804  Log() << kWARNING << "Reached minimum early in the search" << Endl;
805  Log() << kWARNING << "Check results and maybe decrease GDStep size" << Endl;
806  }
807  //
808  // quick check of the sign of the slope for the last npreg points
809  //
810  Double_t sumx = std::accumulate( valx.begin(), valx.end(), Double_t() );
811  Double_t sumxy = std::accumulate( valxy.begin(), valxy.end(), Double_t() );
812  Double_t sumy = std::accumulate( valy.begin(), valy.end(), Double_t() );
813  Double_t slope = Double_t(valx.size())*sumxy - sumx*sumy;
814  if (slope<0) {
815  Log() << kINFO << "The error rate was still decreasing at the end of the path" << Endl;
816  Log() << kINFO << "Increase number of steps (GDNSteps)." << Endl;
817  }
818  //
819  // set coefficients
820  //
821  if (found) {
822  fRuleEnsemble->SetCoefficients( coefsMin );
823  fRuleEnsemble->SetLinCoefficients( lincoefsMin );
824  fRuleEnsemble->SetOffset( offsetMin );
825  }
826  else {
827  Log() << kFATAL << "BUG TRAP: minimum not found in MakeGDPath()" << Endl;
828  }
829 
830  //
831  // print timing info (VERBOSE mode)
832  //
833  if (isVerbose) {
834  Double_t stloop = strisk +stupgrade + stgradvec + stperf;
835  Log() << kVERBOSE << "Timing per loop (ms):" << Endl;
836  Log() << kVERBOSE << " gradvec = " << 1000*stgradvec/iloop << Endl;
837  Log() << kVERBOSE << " upgrade = " << 1000*stupgrade/iloop << Endl;
838  Log() << kVERBOSE << " risk = " << 1000*strisk/iloop << Endl;
839  Log() << kVERBOSE << " perf = " << 1000*stperf/iloop << Endl;
840  Log() << kVERBOSE << " loop = " << 1000*stloop/iloop << Endl;
841  //
842  Log() << kVERBOSE << " GDInit = " << 1000*gGDInit/iloop << Endl;
843  Log() << kVERBOSE << " GDPtr = " << 1000*gGDPtr/iloop << Endl;
844  Log() << kVERBOSE << " GDEval = " << 1000*gGDEval/iloop << Endl;
845  Log() << kVERBOSE << " GDEvalRule = " << 1000*gGDEvalRule/iloop << Endl;
846  Log() << kVERBOSE << " GDNorm = " << 1000*gGDNorm/iloop << Endl;
847  Log() << kVERBOSE << " GDRuleLoop = " << 1000*gGDRuleLoop/iloop << Endl;
848  Log() << kVERBOSE << " GDLinLoop = " << 1000*gGDLinLoop/iloop << Endl;
849  }
850  //
851  // write ntuple (DEBUG)
852  if (isDebug) fGDNtuple->Write();
853 }
854 
855 ////////////////////////////////////////////////////////////////////////////////
856 /// helper function to store the rule coefficients in local arrays
857 
859 {
860  fNTOffset = fRuleEnsemble->GetOffset();
861  //
862  for (UInt_t i=0; i<fNRules; i++) {
863  fNTCoeff[i] = fRuleEnsemble->GetRules(i)->GetCoefficient();
864  }
865  for (UInt_t i=0; i<fNLinear; i++) {
866  fNTLinCoeff[i] = fRuleEnsemble->GetLinCoefficients(i);
867  }
868 }
869 
870 ////////////////////////////////////////////////////////////////////////////////
871 /// Estimates F* (optimum scoring function) for all events for the given sets.
872 /// The result is used in ErrorRateReg().
873 /// --- NOT USED ---
874 ///
875 
877 {
878  Log() << kWARNING << "<CalcFStar> Using unverified code! Check!" << Endl;
879  UInt_t neve = fPerfIdx2-fPerfIdx1+1;
880  if (neve<1) {
881  Log() << kFATAL << "<CalcFStar> Invalid start/end indices!" << Endl;
882  return;
883  }
884  //
885  const std::vector<const Event *> *events = &(fRuleFit->GetTrainingEvents());
886  //
887  fFstar.clear();
888  std::vector<Double_t> fstarSorted;
889  Double_t fstarVal;
890  // loop over all events and estimate F* for each event
891  for (UInt_t i=fPerfIdx1; i<fPerfIdx2+1; i++) {
892  const Event& e = *(*events)[i];
893  fstarVal = fRuleEnsemble->FStar(e);
894  fFstar.push_back(fstarVal);
895  fstarSorted.push_back(fstarVal);
896  if (TMath::IsNaN(fstarVal)) Log() << kFATAL << "F* is NAN!" << Endl;
897  }
898  // sort F* and find median
899  std::sort( fstarSorted.begin(), fstarSorted.end() );
900  UInt_t ind = neve/2;
901  if (neve&1) { // odd number of events
902  fFstarMedian = 0.5*(fstarSorted[ind]+fstarSorted[ind-1]);
903  }
904  else { // even
905  fFstarMedian = fstarSorted[ind];
906  }
907 }
908 
909 ////////////////////////////////////////////////////////////////////////////////
910 /// implementation of eq. 7.17 in Hastie,Tibshirani & Friedman book
911 /// this is the covariance between the estimated response yhat and the
912 /// true value y.
913 /// NOT REALLY SURE IF THIS IS CORRECT!
914 /// --- THIS IS NOT USED ---
915 ///
916 
918 {
919  Log() << kWARNING << "<Optimism> Using unverified code! Check!" << Endl;
920  UInt_t neve = fPerfIdx2-fPerfIdx1+1;
921  if (neve<1) {
922  Log() << kFATAL << "<Optimism> Invalid start/end indices!" << Endl;
923  }
924  //
925  const std::vector<const Event *> *events = &(fRuleFit->GetTrainingEvents());
926  //
927  Double_t sumy=0;
928  Double_t sumyhat=0;
929  Double_t sumyhaty=0;
930  Double_t sumw2=0;
931  Double_t sumw=0;
932  Double_t yhat;
933  Double_t y;
934  Double_t w;
935  //
936  for (UInt_t i=fPerfIdx1; i<fPerfIdx2+1; i++) {
937  const Event& e = *(*events)[i];
938  yhat = fRuleEnsemble->EvalEvent(i); // evaluated using the model
939  y = (fRuleFit->GetMethodRuleFit()->DataInfo().IsSignal(&e) ? 1.0:-1.0); // the truth
940  w = fRuleFit->GetTrainingEventWeight(i)/fNEveEffPerf; // the weight, reweighted such that sum=1
941  sumy += w*y;
942  sumyhat += w*yhat;
943  sumyhaty += w*yhat*y;
944  sumw2 += w*w;
945  sumw += w;
946  }
947  Double_t div = 1.0-sumw2;
948  Double_t cov = sumyhaty - sumyhat*sumy;
949  return 2.0*cov/div;
950 }
951 
952 ////////////////////////////////////////////////////////////////////////////////
953 /// Estimates the error rate with the current set of parameters
954 /// This code is pretty messy at the moment.
955 /// Cleanup is needed.
956 /// -- NOT USED ---
957 ///
958 
960 {
961  Log() << kWARNING << "<ErrorRateReg> Using unverified code! Check!" << Endl;
962  UInt_t neve = fPerfIdx2-fPerfIdx1+1;
963  if (neve<1) {
964  Log() << kFATAL << "<ErrorRateReg> Invalid start/end indices!" << Endl;
965  }
966  if (fFstar.size()!=neve) {
967  Log() << kFATAL << "--- RuleFitParams::ErrorRateReg() - F* not initialized! BUG!!!"
968  << " Fstar.size() = " << fFstar.size() << " , N(events) = " << neve << Endl;
969  }
970  //
971  Double_t sF;
972  //
973  const std::vector<const Event *> *events = &(fRuleFit->GetTrainingEvents());
974  //
975  Double_t sumdf = 0;
976  Double_t sumdfmed = 0;
977  //
978  // A bit messy here.
979  // I believe the binary error classification is appropriate here.
980  // The problem is stability.
981  //
982  for (UInt_t i=fPerfIdx1; i<fPerfIdx2+1; i++) {
983  const Event& e = *(*events)[i];
984  sF = fRuleEnsemble->EvalEvent( e );
985  // scaled abs error, eq 20 in RuleFit paper
986  sumdf += TMath::Abs(fFstar[i-fPerfIdx1] - sF);
987  sumdfmed += TMath::Abs(fFstar[i-fPerfIdx1] - fFstarMedian);
988  }
989  // scaled abs error, eq 20
990  // This error (df) is large - need to think on how to compensate...
991  //
992  return sumdf/sumdfmed;
993 }
994 
995 ////////////////////////////////////////////////////////////////////////////////
996 ///
997 /// Estimates the error rate with the current set of parameters
998 /// It uses a binary estimate of (y-F*(x))
999 /// (y-F*(x)) = (Num of events where sign(F)!=sign(y))/Neve
1000 /// y = {+1 if event is signal, -1 otherwise}
1001 /// --- NOT USED ---
1002 ///
1003 
1005 {
1006  Log() << kWARNING << "<ErrorRateBin> Using unverified code! Check!" << Endl;
1007  UInt_t neve = fPerfIdx2-fPerfIdx1+1;
1008  if (neve<1) {
1009  Log() << kFATAL << "<ErrorRateBin> Invalid start/end indices!" << Endl;
1010  }
1011  //
1012  const std::vector<const Event *> *events = &(fRuleFit->GetTrainingEvents());
1013  //
1014  Double_t sumdfbin = 0;
1015  Double_t dneve = Double_t(neve);
1016  Int_t signF, signy;
1017  Double_t sF;
1018  //
1019  for (UInt_t i=fPerfIdx1; i<fPerfIdx2+1; i++) {
1020  const Event& e = *(*events)[i];
1021  sF = fRuleEnsemble->EvalEvent( e );
1022  // Double_t sFstar = fRuleEnsemble->FStar(e); // THIS CAN BE CALCULATED ONCE!
1023  signF = (sF>0 ? +1:-1);
1024  // signy = (sFStar>0 ? +1:-1);
1025  signy = (fRuleFit->GetMethodRuleFit()->DataInfo().IsSignal(&e) ? +1:-1);
1026  sumdfbin += TMath::Abs(Double_t(signF-signy))*0.5;
1027  }
1028  Double_t f = sumdfbin/dneve;
1029  // Double_t df = f*TMath::Sqrt((1.0/sumdfbin) + (1.0/dneve));
1030  return f;
1031 }
1032 
1033 ////////////////////////////////////////////////////////////////////////////////
1034 
1035 Double_t TMVA::RuleFitParams::ErrorRateRocRaw( std::vector<Double_t> & sFsig,
1036  std::vector<Double_t> & sFbkg )
1037 
1038 {
1039  //
1040  // Estimates the error rate with the current set of parameters.
1041  // It calculates the area under the bkg rejection vs signal efficiency curve.
1042  // The value returned is 1-area.
1043  //
1044  std::sort(sFsig.begin(), sFsig.end());
1045  std::sort(sFbkg.begin(), sFbkg.end());
1046  const Double_t minsig = sFsig.front();
1047  const Double_t minbkg = sFbkg.front();
1048  const Double_t maxsig = sFsig.back();
1049  const Double_t maxbkg = sFbkg.back();
1050  const Double_t minf = std::min(minsig,minbkg);
1051  const Double_t maxf = std::max(maxsig,maxbkg);
1052  const Int_t nsig = Int_t(sFsig.size());
1053  const Int_t nbkg = Int_t(sFbkg.size());
1054  const Int_t np = std::min((nsig+nbkg)/4,50);
1055  const Double_t df = (maxf-minf)/(np-1);
1056  //
1057  // calculate area under rejection/efficiency curve
1058  //
1059  Double_t fcut;
1060  std::vector<Double_t>::const_iterator indit;
1061  Int_t nrbkg;
1062  Int_t nesig;
1063  Int_t pnesig=0;
1064  Double_t rejb=0;
1065  Double_t effs=1.0;
1066  Double_t prejb=0;
1067  Double_t peffs=1.0;
1068  // Double_t drejb;
1069  Double_t deffs;
1070  Double_t area=0;
1071  Int_t npok=0;
1072  //
1073  // loop over range of F [minf,maxf]
1074  //
1075  for (Int_t i=0; i<np; i++) {
1076  fcut = minf + df*Double_t(i);
1077  indit = std::find_if( sFsig.begin(), sFsig.end(), std::bind2nd(std::greater_equal<Double_t>(), fcut));
1078  nesig = sFsig.end()-indit; // number of sig accepted with F>cut
1079  if (TMath::Abs(pnesig-nesig)>0) {
1080  npok++;
1081  indit = std::find_if( sFbkg.begin(), sFbkg.end(), std::bind2nd(std::greater_equal<Double_t>(), fcut));
1082  nrbkg = indit-sFbkg.begin(); // number of bkg rejected with F>cut
1083  rejb = Double_t(nrbkg)/Double_t(nbkg);
1084  effs = Double_t(nesig)/Double_t(nsig);
1085  //
1086  // drejb = rejb-prejb;
1087  deffs = effs-peffs;
1088  area += 0.5*TMath::Abs(deffs)*(rejb+prejb); // trapezoid
1089  prejb = rejb;
1090  peffs = effs;
1091  }
1092  pnesig = nesig;
1093  }
1094  area += 0.5*(1+rejb)*effs; // extrapolate to the end point
1095 
1096  return (1.0-area);
1097 }
1098 
1099 ////////////////////////////////////////////////////////////////////////////////
1100 ///
1101 /// Estimates the error rate with the current set of parameters.
1102 /// It calculates the area under the bkg rejection vs signal efficiency curve.
1103 /// The value returned is 1-area.
1104 /// This works but is less efficient than calculating the Risk using RiskPerf().
1105 ///
1106 
1108 {
1109  Log() << kWARNING << "<ErrorRateRoc> Should not be used in the current version! Check!" << Endl;
1110  UInt_t neve = fPerfIdx2-fPerfIdx1+1;
1111  if (neve<1) {
1112  Log() << kFATAL << "<ErrorRateRoc> Invalid start/end indices!" << Endl;
1113  }
1114  //
1115  const std::vector<const Event *> *events = &(fRuleFit->GetTrainingEvents());
1116  //
1117  Double_t sF;
1118  //
1119  std::vector<Double_t> sFsig;
1120  std::vector<Double_t> sFbkg;
1121  Double_t sumfsig=0;
1122  Double_t sumfbkg=0;
1123  Double_t sumf2sig=0;
1124  Double_t sumf2bkg=0;
1125  //
1126  for (UInt_t i=fPerfIdx1; i<fPerfIdx2+1; i++) {
1127  const Event& e = *(*events)[i];
1128  sF = fRuleEnsemble->EvalEvent(i);// * fRuleFit->GetTrainingEventWeight(i);
1129  if (fRuleFit->GetMethodRuleFit()->DataInfo().IsSignal(&e)) {
1130  sFsig.push_back(sF);
1131  sumfsig +=sF;
1132  sumf2sig +=sF*sF;
1133  }
1134  else {
1135  sFbkg.push_back(sF);
1136  sumfbkg +=sF;
1137  sumf2bkg +=sF*sF;
1138  }
1139  }
1140  fsigave = sumfsig/sFsig.size();
1141  fbkgave = sumfbkg/sFbkg.size();
1142  fsigrms = TMath::Sqrt(gTools().ComputeVariance(sumf2sig,sumfsig,sFsig.size()));
1143  fbkgrms = TMath::Sqrt(gTools().ComputeVariance(sumf2bkg,sumfbkg,sFbkg.size()));
1144  //
1145  return ErrorRateRocRaw( sFsig, sFbkg );
1146 }
1147 
1148 ////////////////////////////////////////////////////////////////////////////////
1149 ///
1150 /// Estimates the error rate with the current set of parameters.
1151 /// It calculates the area under the bkg rejection vs signal efficiency curve.
1152 /// The value returned is 1-area.
1153 ///
1154 /// See comment under ErrorRateRoc().
1155 ///
1156 
1158 {
1159  Log() << kWARNING << "<ErrorRateRocTst> Should not be used in the current version! Check!" << Endl;
1160  UInt_t neve = fPerfIdx2-fPerfIdx1+1;
1161  if (neve<1) {
1162  Log() << kFATAL << "<ErrorRateRocTst> Invalid start/end indices!" << Endl;
1163  return;
1164  }
1165  //
1166  const std::vector<const Event *> *events = &(fRuleFit->GetTrainingEvents());
1167  //
1168  // std::vector<Double_t> sF;
1169  Double_t sF;
1170  std::vector< std::vector<Double_t> > sFsig;
1171  std::vector< std::vector<Double_t> > sFbkg;
1172  //
1173  sFsig.resize( fGDNTau );
1174  sFbkg.resize( fGDNTau );
1175  // sF.resize( fGDNTau );
1176 
1177  for (UInt_t i=fPerfIdx1; i<fPerfIdx2+1; i++) {
1178  for (UInt_t itau=0; itau<fGDNTau; itau++) {
1179  // if (itau==0) sF = fRuleEnsemble->EvalEvent( *(*events)[i], fGDOfsTst[itau], fGDCoefTst[itau], fGDCoefLinTst[itau] );
1180  // else sF = fRuleEnsemble->EvalEvent( fGDOfsTst[itau], fGDCoefTst[itau], fGDCoefLinTst[itau] );
1181  sF = fRuleEnsemble->EvalEvent( i, fGDOfsTst[itau], fGDCoefTst[itau], fGDCoefLinTst[itau] );
1182  if (fRuleFit->GetMethodRuleFit()->DataInfo().IsSignal((*events)[i])) {
1183  sFsig[itau].push_back(sF);
1184  }
1185  else {
1186  sFbkg[itau].push_back(sF);
1187  }
1188  }
1189  }
1190  Double_t err;
1191 
1192  for (UInt_t itau=0; itau<fGDNTau; itau++) {
1193  err = ErrorRateRocRaw( sFsig[itau], sFbkg[itau] );
1194  fGDErrTst[itau] = err;
1195  }
1196 }
1197 
1198 ////////////////////////////////////////////////////////////////////////////////
1199 ///
1200 /// Estimates the error rate with the current set of parameters.
1201 /// using the <Perf> subsample.
1202 /// Return the tau index giving the lowest error
1203 ///
1204 
1206 {
1207  UInt_t neve = fPerfIdx2-fPerfIdx1+1;
1208  if (neve<1) {
1209  Log() << kFATAL << "<ErrorRateRocTst> Invalid start/end indices!" << Endl;
1210  return 0;
1211  }
1212  //
1213  Double_t sumx = 0;
1214  Double_t sumx2 = 0;
1215  Double_t maxx = -100.0;
1216  Double_t minx = 1e30;
1217  UInt_t itaumin = 0;
1218  UInt_t nok=0;
1219  for (UInt_t itau=0; itau<fGDNTau; itau++) {
1220  if (fGDErrTstOK[itau]) {
1221  nok++;
1222  fGDErrTst[itau] = RiskPerf(itau);
1223  sumx += fGDErrTst[itau];
1224  sumx2 += fGDErrTst[itau]*fGDErrTst[itau];
1225  if (fGDErrTst[itau]>maxx) maxx=fGDErrTst[itau];
1226  if (fGDErrTst[itau]<minx) {
1227  minx=fGDErrTst[itau];
1228  itaumin = itau;
1229  }
1230  }
1231  }
1232  Double_t sigx = TMath::Sqrt(gTools().ComputeVariance( sumx2, sumx, nok ) );
1233  Double_t maxacc = minx+sigx;
1234  //
1235  if (nok>0) {
1236  nok = 0;
1237  for (UInt_t itau=0; itau<fGDNTau; itau++) {
1238  if (fGDErrTstOK[itau]) {
1239  if (fGDErrTst[itau] > maxacc) {
1240  fGDErrTstOK[itau] = kFALSE;
1241  }
1242  else {
1243  nok++;
1244  }
1245  }
1246  }
1247  }
1248  fGDNTauTstOK = nok;
1249  Log() << kVERBOSE << "TAU: "
1250  << itaumin << " "
1251  << nok << " "
1252  << minx << " "
1253  << maxx << " "
1254  << sigx << Endl;
1255  //
1256  return itaumin;
1257 }
1258 
1259 ////////////////////////////////////////////////////////////////////////////////
1260 /// make test gradient vector for all tau
1261 /// same algorithm as MakeGradientVector()
1262 
1264 {
1265  UInt_t neve = fPathIdx1-fPathIdx2+1;
1266  if (neve<1) {
1267  Log() << kFATAL << "<MakeTstGradientVector> Invalid start/end indices!" << Endl;
1268  return;
1269  }
1270  //
1271  Double_t norm = 2.0/fNEveEffPath;
1272  //
1273  const std::vector<const Event *> *events = &(fRuleFit->GetTrainingEvents());
1274 
1275  // Clear gradient vectors
1276  for (UInt_t itau=0; itau<fGDNTau; itau++) {
1277  if (fGDErrTstOK[itau]) {
1278  for (UInt_t ir=0; ir<fNRules; ir++) {
1279  fGradVecTst[itau][ir]=0;
1280  }
1281  for (UInt_t il=0; il<fNLinear; il++) {
1282  fGradVecLinTst[itau][il]=0;
1283  }
1284  }
1285  }
1286  //
1287  // Double_t val; // temp store
1288  Double_t sF; // score function value
1289  Double_t r; // eq 35, ref 1
1290  Double_t y; // true score (+1 or -1)
1291  const std::vector<UInt_t> *eventRuleMap=0;
1292  UInt_t rind;
1293  //
1294  // Loop over all events
1295  //
1296  UInt_t nsfok=0;
1297  for (UInt_t i=fPathIdx1; i<fPathIdx2+1; i++) {
1298  const Event *e = (*events)[i];
1299  UInt_t nrules=0;
1300  if (fRuleEnsemble->DoRules()) {
1301  eventRuleMap = &(fRuleEnsemble->GetEventRuleMap(i));
1302  nrules = (*eventRuleMap).size();
1303  }
1304  for (UInt_t itau=0; itau<fGDNTau; itau++) { // loop over tau
1305  // if (itau==0) sF = fRuleEnsemble->EvalEvent( *e, fGDOfsTst[itau], fGDCoefTst[itau], fGDCoefLinTst[itau] );
1306  // else sF = fRuleEnsemble->EvalEvent( fGDOfsTst[itau], fGDCoefTst[itau], fGDCoefLinTst[itau] );
1307  if (fGDErrTstOK[itau]) {
1308  sF = fRuleEnsemble->EvalEvent( i, fGDOfsTst[itau], fGDCoefTst[itau], fGDCoefLinTst[itau] );
1309  if (TMath::Abs(sF)<1.0) {
1310  nsfok++;
1311  r = 0;
1312  y = (fRuleFit->GetMethodRuleFit()->DataInfo().IsSignal(e)?1.0:-1.0);
1313  r = norm*(y - sF) * fRuleFit->GetTrainingEventWeight(i);
1314  // rule gradient vector
1315  for (UInt_t ir=0; ir<nrules; ir++) {
1316  rind = (*eventRuleMap)[ir];
1317  fGradVecTst[itau][rind] += r;
1318  }
1319  // linear terms
1320  for (UInt_t il=0; il<fNLinear; il++) {
1321  fGradVecLinTst[itau][il] += r*fRuleEnsemble->EvalLinEventRaw( il,i, kTRUE );
1322  }
1323  } // if (TMath::Abs(F)<xxx)
1324  }
1325  }
1326  }
1327 }
1328 
1329 ////////////////////////////////////////////////////////////////////////////////
1330 /// Establish maximum gradient for rules, linear terms and the offset
1331 /// for all taus TODO: do not need index range!
1332 ///
1333 
1335 {
1336  Double_t maxr, maxl, cthresh, val;
1337  // loop over all taus
1338  for (UInt_t itau=0; itau<fGDNTau; itau++) {
1339  if (fGDErrTstOK[itau]) {
1340  // find max gradient
1341  maxr = ( (fNRules>0 ?
1342  TMath::Abs(*(std::max_element( fGradVecTst[itau].begin(), fGradVecTst[itau].end(), AbsValue()))):0) );
1343  maxl = ( (fNLinear>0 ?
1344  TMath::Abs(*(std::max_element( fGradVecLinTst[itau].begin(), fGradVecLinTst[itau].end(), AbsValue()))):0) );
1345 
1346  // Use the maximum as a threshold
1347  Double_t maxv = (maxr>maxl ? maxr:maxl);
1348  cthresh = maxv * fGDTauVec[itau];
1349 
1350  // Add to offset, if gradient is large enough:
1351  // Loop over the gradient vector and move to next set of coefficients
1352  // size of GradVec (and GradVecLin) should be 0 if learner is disabled
1353  //
1354  // Step-size is divided by 10 when looking for the best path.
1355  //
1356  if (maxv>0) {
1357  const Double_t stepScale=1.0;
1358  for (UInt_t i=0; i<fNRules; i++) {
1359  val = fGradVecTst[itau][i];
1360 
1361  if (TMath::Abs(val)>=cthresh) {
1362  fGDCoefTst[itau][i] += fGDPathStep*val*stepScale;
1363  }
1364  }
1365  // Loop over the gradient vector for the linear part and move to next set of coefficients
1366  for (UInt_t i=0; i<fNLinear; i++) {
1367  val = fGradVecLinTst[itau][i];
1368  if (TMath::Abs(val)>=cthresh) {
1369  fGDCoefLinTst[itau][i] += fGDPathStep*val*stepScale/fRuleEnsemble->GetLinNorm(i);
1370  }
1371  }
1372  }
1373  }
1374  }
1375  // set the offset - should be outside the itau loop!
1376  CalcTstAverageResponse();
1377 }
1378 
1379 ////////////////////////////////////////////////////////////////////////////////
1380 /// make gradient vector
1381 ///
1382 
1384 {
1385  clock_t t0;
1386  // clock_t t10;
1387  t0 = clock();
1388  //
1389  UInt_t neve = fPathIdx2-fPathIdx1+1;
1390  if (neve<1) {
1391  Log() << kFATAL << "<MakeGradientVector> Invalid start/end indices!" << Endl;
1392  return;
1393  }
1394  //
1395  const Double_t norm = 2.0/fNEveEffPath;
1396  //
1397  const std::vector<const Event *> *events = &(fRuleFit->GetTrainingEvents());
1398 
1399  // Clear gradient vectors
1400  for (UInt_t ir=0; ir<fNRules; ir++) {
1401  fGradVec[ir]=0;
1402  }
1403  for (UInt_t il=0; il<fNLinear; il++) {
1404  fGradVecLin[il]=0;
1405  }
1406  //
1407  // Double_t val; // temp store
1408  Double_t sF; // score function value
1409  Double_t r; // eq 35, ref 1
1410  Double_t y; // true score (+1 or -1)
1411  const std::vector<UInt_t> *eventRuleMap=0;
1412  UInt_t rind;
1413  //
1414  gGDInit += Double_t(clock()-t0)/CLOCKS_PER_SEC;
1415 
1416  for (UInt_t i=fPathIdx1; i<fPathIdx2+1; i++) {
1417  const Event *e = (*events)[i];
1418 
1419  // t0 = clock(); //DEB
1420  sF = fRuleEnsemble->EvalEvent( i ); // should not contain the weight
1421  // gGDEval += Double_t(clock()-t0)/CLOCKS_PER_SEC;
1422  if (TMath::Abs(sF)<1.0) {
1423  UInt_t nrules=0;
1424  if (fRuleEnsemble->DoRules()) {
1425  eventRuleMap = &(fRuleEnsemble->GetEventRuleMap(i));
1426  nrules = (*eventRuleMap).size();
1427  }
1428  y = (fRuleFit->GetMethodRuleFit()->DataInfo().IsSignal(e)?1.0:-1.0);
1429  r = norm*(y - sF) * fRuleFit->GetTrainingEventWeight(i);
1430  // rule gradient vector
1431  for (UInt_t ir=0; ir<nrules; ir++) {
1432  rind = (*eventRuleMap)[ir];
1433  fGradVec[rind] += r;
1434  }
1435  // gGDRuleLoop += Double_t(clock()-t0)/CLOCKS_PER_SEC;
1436  // linear terms
1437  // t0 = clock(); //DEB
1438  for (UInt_t il=0; il<fNLinear; il++) {
1439  fGradVecLin[il] += r*fRuleEnsemble->EvalLinEventRaw( il, i, kTRUE );
1440  }
1441  // gGDLinLoop += Double_t(clock()-t0)/CLOCKS_PER_SEC;
1442  } // if (TMath::Abs(F)<xxx)
1443  }
1444 }
1445 
1446 
1447 ////////////////////////////////////////////////////////////////////////////////
1448 /// Establish maximum gradient for rules, linear terms and the offset
1449 ///
1450 
1452 {
1453  Double_t maxr = ( (fRuleEnsemble->DoRules() ?
1454  TMath::Abs(*(std::max_element( fGradVec.begin(), fGradVec.end(), AbsValue()))):0) );
1455  Double_t maxl = ( (fRuleEnsemble->DoLinear() ?
1456  TMath::Abs(*(std::max_element( fGradVecLin.begin(), fGradVecLin.end(), AbsValue()))):0) );
1457  // Use the maximum as a threshold
1458  Double_t maxv = (maxr>maxl ? maxr:maxl);
1459  Double_t cthresh = maxv * fGDTau;
1460 
1461  Double_t useRThresh;
1462  Double_t useLThresh;
1463  //
1464  // Choose threshholds.
1465  //
1466  useRThresh = cthresh;
1467  useLThresh = cthresh;
1468 
1469  Double_t gval, lval, coef, lcoef;
1470 
1471  // Add to offset, if gradient is large enough:
1472  // Loop over the gradient vector and move to next set of coefficients
1473  // size of GradVec (and GradVecLin) should be 0 if learner is disabled
1474  if (maxv>0) {
1475  for (UInt_t i=0; i<fGradVec.size(); i++) {
1476  gval = fGradVec[i];
1477  if (TMath::Abs(gval)>=useRThresh) {
1478  coef = fRuleEnsemble->GetRulesConst(i)->GetCoefficient() + fGDPathStep*gval;
1479  fRuleEnsemble->GetRules(i)->SetCoefficient(coef);
1480  }
1481  }
1482 
1483  // Loop over the gradient vector for the linear part and move to next set of coefficients
1484  for (UInt_t i=0; i<fGradVecLin.size(); i++) {
1485  lval = fGradVecLin[i];
1486  if (TMath::Abs(lval)>=useLThresh) {
1487  lcoef = fRuleEnsemble->GetLinCoefficients(i) + (fGDPathStep*lval/fRuleEnsemble->GetLinNorm(i));
1488  fRuleEnsemble->SetLinCoefficient(i,lcoef);
1489  }
1490  }
1491  // Set the offset
1492  Double_t offset = CalcAverageResponse();
1493  fRuleEnsemble->SetOffset( offset );
1494  }
1495 }
1496 
1497 ////////////////////////////////////////////////////////////////////////////////
1498 /// calc average response for all test paths - TODO: see comment under CalcAverageResponse()
1499 /// note that 0 offset is used
1500 
1502 {
1503  for (UInt_t itau=0; itau<fGDNTau; itau++) {
1504  if (fGDErrTstOK[itau]) {
1505  fGDOfsTst[itau] = 0;
1506  for (UInt_t s=0; s<fNLinear; s++) {
1507  fGDOfsTst[itau] -= fGDCoefLinTst[itau][s] * fAverageSelectorPath[s];
1508  }
1509  for (UInt_t r=0; r<fNRules; r++) {
1510  fGDOfsTst[itau] -= fGDCoefTst[itau][r] * fAverageRulePath[r];
1511  }
1512  }
1513  }
1514  //
1515 }
1516 
1517 ////////////////////////////////////////////////////////////////////////////////
1518 /// calulate the average response - TODO : rewrite bad dependancy on EvaluateAverage() !
1519 ///
1520 /// note that 0 offset is used
1521 
1523 {
1524  Double_t ofs = 0;
1525  for (UInt_t s=0; s<fNLinear; s++) {
1526  ofs -= fRuleEnsemble->GetLinCoefficients(s) * fAverageSelectorPath[s];
1527  }
1528  for (UInt_t r=0; r<fNRules; r++) {
1529  ofs -= fRuleEnsemble->GetRules(r)->GetCoefficient() * fAverageRulePath[r];
1530  }
1531  return ofs;
1532 }
1533 
1534 ////////////////////////////////////////////////////////////////////////////////
1535 /// calulate the average truth
1536 
1538 {
1539  if (fPathIdx2<=fPathIdx1) {
1540  Log() << kFATAL << "<CalcAverageTruth> Invalid start/end indices!" << Endl;
1541  return 0;
1542  }
1543  Double_t sum=0;
1544  Double_t ensig=0;
1545  Double_t enbkg=0;
1546  const std::vector<const Event *> *events = &(fRuleFit->GetTrainingEvents());
1547  for (UInt_t i=fPathIdx1; i<fPathIdx2+1; i++) {
1548  Double_t ew = fRuleFit->GetTrainingEventWeight(i);
1549  if (fRuleFit->GetMethodRuleFit()->DataInfo().IsSignal((*events)[i])) ensig += ew;
1550  else enbkg += ew;
1551  sum += ew*(fRuleFit->GetMethodRuleFit()->DataInfo().IsSignal((*events)[i])?1.0:-1.0);
1552  }
1553  Log() << kVERBOSE << "Effective number of signal / background = " << ensig << " / " << enbkg << Endl;
1554 
1555  return sum/fNEveEffPath;
1556 }
1557 
1558 //_______________________________________________________________________
1559 
1561  return (fRuleFit->GetMethodRuleFit()->DataInfo().IsSignal(e) ? 1:-1);
1562 }
1563 
1564 
1565 ////////////////////////////////////////////////////////////////////////////////
1566 
1568  fLogger->SetMinType(t);
1569 }
Double_t gGDRuleLoop
virtual Int_t Write(const char *name=0, Int_t option=0, Int_t bufsize=0)
Write this object to the current directory.
Definition: TObject.cxx:823
Bool_t gFIRSTORG
static Vc_ALWAYS_INLINE int_v min(const int_v &x, const int_v &y)
Definition: vector.h:433
MsgLogger & Endl(MsgLogger &ml)
Definition: MsgLogger.h:162
void MakeGradientVector()
make gradient vector
TH1 * h
Definition: legend2.C:5
void FillCoefficients()
helper function to store the rule coefficients in local arrays
Double_t Penalty() const
This is the "lasso" penalty To be used for regression.
Short_t Min(Short_t a, Short_t b)
Definition: TMathBase.h:170
int Int_t
Definition: RtypesCore.h:41
bool Bool_t
Definition: RtypesCore.h:59
const Bool_t kFALSE
Definition: Rtypes.h:92
Double_t GetWeight() const
return the event weight - depending on whether the flag IgnoreNegWeightsInTraining is or not...
Definition: Event.cxx:376
void ErrorRateRocTst()
Estimates the error rate with the current set of parameters.
Short_t Abs(Short_t d)
Definition: TMathBase.h:110
void MakeTstGradientVector()
make test gradient vector for all tau same algorithm as MakeGradientVector()
Tools & gTools()
Definition: Tools.cxx:79
TStopwatch timer
Definition: pirndm.C:37
#define rprev(otri1, otri2)
Definition: triangle.c:1024
Int_t FindGDTau()
This finds the cutoff parameter tau by scanning several different paths.
Double_t CalcAverageTruth()
calulate the average truth
Double_t ErrorRateBin()
Estimates the error rate with the current set of parameters It uses a binary estimate of (y-F*(x)) (y...
void SetMsgType(EMsgType t)
TString GetElapsedTime(Bool_t Scientific=kTRUE)
Definition: Timer.cxx:131
Int_t Type(const Event *e) const
ROOT::R::TRInterface & r
Definition: Object.C:4
Double_t ErrorRateReg()
Estimates the error rate with the current set of parameters This code is pretty messy at the moment...
Double_t ErrorRateRoc()
Estimates the error rate with the current set of parameters.
EMsgType
Definition: Types.h:61
Double_t Optimism()
implementation of eq.
unsigned int UInt_t
Definition: RtypesCore.h:42
char * Form(const char *fmt,...)
Double_t LossFunction(const Event &e) const
Implementation of squared-error ramp loss function (eq 39,40 in ref 1) This is used for binary Classi...
Double_t gGDEval
double f(double x)
void InitNtuple()
initializes the ntuple
double Double_t
Definition: RtypesCore.h:55
Double_t y[n]
Definition: legend1.C:17
void CalcFStar()
Estimates F* (optimum scoring function) for all events for the given sets.
Double_t gGDEvalRule
virtual ~RuleFitParams()
destructor
Double_t gGDNorm
static Vc_ALWAYS_INLINE int_v max(const int_v &x, const int_v &y)
Definition: vector.h:440
void UpdateCoefficients()
Establish maximum gradient for rules, linear terms and the offset.
Double_t ErrorRateRocRaw(std::vector< Double_t > &sFsig, std::vector< Double_t > &sFbkg)
RuleFitParams()
constructor
Double_t gGDInit
Short_t Max(Short_t a, Short_t b)
Definition: TMathBase.h:202
void Init()
Initializes all parameters using the RuleEnsemble and the training tree.
Int_t IsNaN(Double_t x)
Definition: TMath.h:617
Double_t CalcAverageResponse()
calulate the average response - TODO : rewrite bad dependancy on EvaluateAverage() ! ...
Double_t gGDLinLoop
Double_t Risk(UInt_t ind1, UInt_t ind2, Double_t neff) const
risk asessment
void DrawProgressBar(Int_t, const TString &comment="")
draws progress bar in color or B&W caution:
Definition: Timer.cxx:183
void InitGD()
Initialize GD path search.
A TTree object has a header with a name and a title.
Definition: TTree.h:94
Double_t gGDPtr
void UpdateTstCoefficients()
Establish maximum gradient for rules, linear terms and the offset for all taus TODO: do not need inde...
UInt_t RiskPerfTst()
Estimates the error rate with the current set of parameters.
Double_t Sqrt(Double_t x)
Definition: TMath.h:464
void EvaluateAverage(UInt_t ind1, UInt_t ind2, std::vector< Double_t > &avsel, std::vector< Double_t > &avrul)
evaluate the average of each variable and f(x) in the given range
void MakeGDPath()
The following finds the gradient directed path in parameter space.
const Bool_t kTRUE
Definition: Rtypes.h:91
double norm(double *x, double *p)
Definition: unuranDistr.cxx:40
Definition: math.cpp:60
Bool_t gFIRSTTST
void CalcTstAverageResponse()
calc average response for all test paths - TODO: see comment under CalcAverageResponse() note that 0 ...