ROOT  6.07/01
Reference Guide
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Properties Friends Macros Groups Pages
RuleFitParams.cxx
Go to the documentation of this file.
1 // @(#)root/tmva $Id$
2 // Author: Andreas Hoecker, Joerg Stelzer, Fredrik Tegenfeldt, Helge Voss
3 
4 /**********************************************************************************
5  * Project: TMVA - a Root-integrated toolkit for multivariate data analysis *
6  * Package: TMVA *
7  * Class : RuleFitParams *
8  * Web : http://tmva.sourceforge.net *
9  * *
10  * Description: *
11  * Implementation *
12  * *
13  * Authors (alphabetical): *
14  * Fredrik Tegenfeldt <Fredrik.Tegenfeldt@cern.ch> - Iowa State U., USA *
15  * Helge Voss <Helge.Voss@cern.ch> - MPI-KP Heidelberg, Ger. *
16  * *
17  * Copyright (c) 2005: *
18  * CERN, Switzerland *
19  * Iowa State U. *
20  * MPI-K Heidelberg, Germany *
21  * *
22  * Redistribution and use in source and binary forms, with or without *
23  * modification, are permitted according to the terms listed in LICENSE *
24  * (http://tmva.sourceforge.net/LICENSE) *
25  **********************************************************************************/
26 
27 #include "TMVA/RuleFitParams.h"
28 
29 #include "TMVA/DataSetInfo.h"
30 #include "TMVA/MethodRuleFit.h"
31 #include "TMVA/MsgLogger.h"
32 #include "TMVA/RuleEnsemble.h"
33 #include "TMVA/RuleFit.h"
34 #include "TMVA/Timer.h"
35 #include "TMVA/Tools.h"
36 #include "TMVA/Types.h"
37 
38 #include "TTree.h"
39 #include "TMath.h"
40 
41 #include <iostream>
42 #include <iomanip>
43 #include <numeric>
44 #include <algorithm>
45 #include <functional>
46 
49 
57 
58 ////////////////////////////////////////////////////////////////////////////////
59 /// constructor
60 
62  : fRuleFit ( 0 )
63  , fRuleEnsemble ( 0 )
64  , fNRules ( 0 )
65  , fNLinear ( 0 )
66  , fPathIdx1 ( 0 )
67  , fPathIdx2 ( 0 )
68  , fPerfIdx1 ( 0 )
69  , fPerfIdx2 ( 0 )
70  , fGDNTauTstOK( 0 )
71  , fGDNTau ( 51 )
72  , fGDTauPrec ( 0.02 )
73  , fGDTauScan ( 1000 )
74  , fGDTauMin ( 0.0 )
75  , fGDTauMax ( 1.0 )
76  , fGDTau ( -1.0 )
77  , fGDPathStep ( 0.01 )
78  , fGDNPathSteps ( 1000 )
79  , fGDErrScale ( 1.1 )
80  , fAverageTruth( 0 )
81  , fFstarMedian ( 0 )
82  , fGDNtuple ( 0 )
83  , fNTRisk ( 0 )
84  , fNTErrorRate ( 0 )
85  , fNTNuval ( 0 )
86  , fNTCoefRad ( 0 )
87  , fNTOffset ( 0 )
88  , fNTCoeff ( 0 )
89  , fNTLinCoeff ( 0 )
90  , fsigave( 0 )
91  , fsigrms( 0 )
92  , fbkgave( 0 )
93  , fbkgrms( 0 )
94  , fLogger( new MsgLogger("RuleFit") )
95 {
96  Init();
97 }
98 ////////////////////////////////////////////////////////////////////////////////
99 /// destructor
100 
102 {
103  if (fNTCoeff) { delete fNTCoeff; fNTCoeff = 0; }
104  if (fNTLinCoeff) { delete fNTLinCoeff;fNTLinCoeff = 0; }
105  delete fLogger;
106 }
107 
108 ////////////////////////////////////////////////////////////////////////////////
109 /// Initializes all parameters using the RuleEnsemble and the training tree
110 
112 {
113  if (fRuleFit==0) return;
114  if (fRuleFit->GetMethodRuleFit()==0) {
115  Log() << kFATAL << "RuleFitParams::Init() - MethodRuleFit ptr is null" << Endl;
116  }
117  UInt_t neve = fRuleFit->GetTrainingEvents().size();
118  //
119  fRuleEnsemble = fRuleFit->GetRuleEnsemblePtr();
120  fNRules = fRuleEnsemble->GetNRules();
121  fNLinear = fRuleEnsemble->GetNLinear();
122 
123  //
124  // Fraction of events used for validation should be close of unity..
125  // Always selection from the END
126  //
127  UInt_t ofs;
128  fPerfIdx1 = 0;
129  if (neve>1) {
130  fPerfIdx2 = static_cast<UInt_t>((neve-1)*fRuleFit->GetMethodRuleFit()->GetGDValidEveFrac());
131  }
132  else {
133  fPerfIdx2 = 0;
134  }
135  ofs = neve - fPerfIdx2 - 1;
136  fPerfIdx1 += ofs;
137  fPerfIdx2 += ofs;
138  //
139  // Fraction of events used for the path search can be allowed to be a smaller value, say 0.5
140  // Alwas select events from the BEGINNING.
141  // This means that the validation and search samples will not overlap if both fractions are <0.5.
142  //
143  fPathIdx1 = 0;
144  if (neve>1) {
145  fPathIdx2 = static_cast<UInt_t>((neve-1)*fRuleFit->GetMethodRuleFit()->GetGDPathEveFrac());
146  }
147  else {
148  fPathIdx2 = 0;
149  }
150  //
151  // summarize weights
152  //
153  fNEveEffPath = 0;;
154  for (UInt_t ie=fPathIdx1; ie<fPathIdx2+1; ie++) {
155  fNEveEffPath += fRuleFit->GetTrainingEventWeight(ie);
156  }
157 
158  fNEveEffPerf=0;
159  for (UInt_t ie=fPerfIdx1; ie<fPerfIdx2+1; ie++) {
160  fNEveEffPerf += fRuleFit->GetTrainingEventWeight(ie);
161  }
162  //
163  Log() << kVERBOSE << "Path constr. - event index range = [ " << fPathIdx1 << ", " << fPathIdx2 << " ]"
164  << ", effective N(events) = " << fNEveEffPath << Endl;
165  Log() << kVERBOSE << "Error estim. - event index range = [ " << fPerfIdx1 << ", " << fPerfIdx2 << " ]"
166  << ", effective N(events) = " << fNEveEffPerf << Endl;
167  //
168  if (fRuleEnsemble->DoRules())
169  Log() << kDEBUG << "Number of rules in ensemble: " << fNRules << Endl;
170  else
171  Log() << kDEBUG << "Rules are disabled " << Endl;
172 
173  if (fRuleEnsemble->DoLinear())
174  Log() << kDEBUG << "Number of linear terms: " << fNLinear << Endl;
175  else
176  Log() << kDEBUG << "Linear terms are disabled " << Endl;
177 }
178 
179 ////////////////////////////////////////////////////////////////////////////////
180 /// initializes the ntuple
181 
183 {
184  fGDNtuple= new TTree("MonitorNtuple_RuleFitParams","RuleFit path search");
185  fGDNtuple->Branch("risk", &fNTRisk, "risk/D");
186  fGDNtuple->Branch("error", &fNTErrorRate,"error/D");
187  fGDNtuple->Branch("nuval", &fNTNuval, "nuval/D");
188  fGDNtuple->Branch("coefrad", &fNTCoefRad, "coefrad/D");
189  fGDNtuple->Branch("offset", &fNTOffset, "offset/D");
190  //
191  fNTCoeff = (fNRules >0 ? new Double_t[fNRules] : 0);
192  fNTLinCoeff = (fNLinear>0 ? new Double_t[fNLinear] : 0);
193 
194  for (UInt_t i=0; i<fNRules; i++) {
195  fGDNtuple->Branch(Form("a%d",i+1),&fNTCoeff[i],Form("a%d/D",i+1));
196  }
197  for (UInt_t i=0; i<fNLinear; i++) {
198  fGDNtuple->Branch(Form("b%d",i+1),&fNTLinCoeff[i],Form("b%d/D",i+1));
199  }
200 }
201 
202 ////////////////////////////////////////////////////////////////////////////////
203 /// evaluate the average of each variable and f(x) in the given range
204 
206  std::vector<Double_t> &avsel,
207  std::vector<Double_t> &avrul )
208 {
209  UInt_t neve = ind2-ind1+1;
210  if (neve<1) {
211  Log() << kFATAL << "<EvaluateAverage> - no events selected for path search -> BUG!" << Endl;
212  }
213  //
214  avsel.clear();
215  avrul.clear();
216  //
217  if (fNLinear>0) avsel.resize(fNLinear,0);
218  if (fNRules>0) avrul.resize(fNRules,0);
219  const std::vector<UInt_t> *eventRuleMap=0;
220  Double_t ew;
221  Double_t sumew=0;
222  //
223  // Loop over events and calculate average of linear terms (normalised) and rule response.
224  //
225  if (fRuleEnsemble->IsRuleMapOK()) { // MakeRuleMap() has been called
226  for ( UInt_t i=ind1; i<ind2+1; i++) {
227  ew = fRuleFit->GetTrainingEventWeight(i);
228  sumew += ew;
229  for ( UInt_t sel=0; sel<fNLinear; sel++ ) {
230  avsel[sel] += ew*fRuleEnsemble->EvalLinEvent(i,sel);
231  }
232  // loop over rules
233  UInt_t nrules=0;
234  if (fRuleEnsemble->DoRules()) {
235  eventRuleMap = &(fRuleEnsemble->GetEventRuleMap(i));
236  nrules = (*eventRuleMap).size();
237  }
238  for (UInt_t r=0; r<nrules; r++) {
239  avrul[(*eventRuleMap)[r]] += ew;
240  }
241  }
242  }
243  else { // MakeRuleMap() has not yet been called
244  const std::vector<const Event *> *events = &(fRuleFit->GetTrainingEvents());
245  for ( UInt_t i=ind1; i<ind2+1; i++) {
246  ew = fRuleFit->GetTrainingEventWeight(i);
247  sumew += ew;
248  // first cache rule/lin response
249  /* Double_t val = */ fRuleEnsemble->EvalLinEvent(*((*events)[i]));
250  /* val = */ fRuleEnsemble->EvalEvent(*((*events)[i]));
251  // loop over linear terms
252  for ( UInt_t sel=0; sel<fNLinear; sel++ ) {
253  avsel[sel] += ew*fRuleEnsemble->GetEventLinearValNorm(sel);
254  }
255  // loop over rules
256  for (UInt_t r=0; r<fNRules; r++) {
257  avrul[r] += ew*fRuleEnsemble->GetEventRuleVal(r);
258  }
259  }
260  }
261  // average variable
262  for ( UInt_t sel=0; sel<fNLinear; sel++ ) {
263  avsel[sel] = avsel[sel] / sumew;
264  }
265  // average rule response, excl coeff
266  for (UInt_t r=0; r<fNRules; r++) {
267  avrul[r] = avrul[r] / sumew;
268  }
269 }
270 
271 ////////////////////////////////////////////////////////////////////////////////
272 /// Implementation of squared-error ramp loss function (eq 39,40 in ref 1)
273 /// This is used for binary Classifications where y = {+1,-1} for (sig,bkg)
274 
276 {
277  Double_t h = TMath::Max( -1.0, TMath::Min(1.0,fRuleEnsemble->EvalEvent( e )) );
278  Double_t diff = (fRuleFit->GetMethodRuleFit()->DataInfo().IsSignal(&e)?1:-1) - h;
279  //
280  return diff*diff*e.GetWeight();
281 }
282 
283 ////////////////////////////////////////////////////////////////////////////////
284 /// Implementation of squared-error ramp loss function (eq 39,40 in ref 1)
285 /// This is used for binary Classifications where y = {+1,-1} for (sig,bkg)
286 
288 {
289  Double_t h = TMath::Max( -1.0, TMath::Min(1.0,fRuleEnsemble->EvalEvent( evtidx )) );
290  Double_t diff = (fRuleFit->GetMethodRuleFit()->DataInfo().IsSignal(fRuleEnsemble->GetRuleMapEvent( evtidx ))?1:-1) - h;
291  //
292  return diff*diff*fRuleFit->GetTrainingEventWeight(evtidx);
293 }
294 
295 ////////////////////////////////////////////////////////////////////////////////
296 /// Implementation of squared-error ramp loss function (eq 39,40 in ref 1)
297 /// This is used for binary Classifications where y = {+1,-1} for (sig,bkg)
298 
300 {
301  Double_t e = fRuleEnsemble->EvalEvent( evtidx , fGDOfsTst[itau], fGDCoefTst[itau], fGDCoefLinTst[itau]);
302  Double_t h = TMath::Max( -1.0, TMath::Min(1.0,e) );
303  Double_t diff = (fRuleFit->GetMethodRuleFit()->DataInfo().IsSignal(fRuleEnsemble->GetRuleMapEvent( evtidx ))?1:-1) - h;
304  //
305  return diff*diff*fRuleFit->GetTrainingEventWeight(evtidx);
306 }
307 
308 ////////////////////////////////////////////////////////////////////////////////
309 /// risk asessment
310 
312 {
313  UInt_t neve = ind2-ind1+1;
314  if (neve<1) {
315  Log() << kFATAL << "<Risk> Invalid start/end indices! BUG!!!" << Endl;
316  }
317  Double_t rval=0;
318  //
319  // const std::vector<Event *> *events = &(fRuleFit->GetTrainingEvents());
320  for ( UInt_t i=ind1; i<ind2+1; i++) {
321  rval += LossFunction(i);
322  }
323  rval = rval/neff;
324 
325  return rval;
326 }
327 
328 ////////////////////////////////////////////////////////////////////////////////
329 /// risk asessment for tau model <itau>
330 
332 {
333  UInt_t neve = ind2-ind1+1;
334  if (neve<1) {
335  Log() << kFATAL << "<Risk> Invalid start/end indices! BUG!!!" << Endl;
336  }
337  Double_t rval=0;
338  //
339  // const std::vector<Event *> *events = &(fRuleFit->GetTrainingEvents());
340  for ( UInt_t i=ind1; i<ind2+1; i++) {
341  rval += LossFunction(i,itau);
342  }
343  rval = rval/neff;
344 
345  return rval;
346 }
347 
348 ////////////////////////////////////////////////////////////////////////////////
349 /// This is the "lasso" penalty
350 /// To be used for regression.
351 /// --- NOT USED ---
352 
354 {
355  Log() << kWARNING << "<Penalty> Using unverified code! Check!" << Endl;
356  Double_t rval=0;
357  const std::vector<Double_t> *lincoeff = & (fRuleEnsemble->GetLinCoefficients());
358  for (UInt_t i=0; i<fNRules; i++) {
359  rval += TMath::Abs(fRuleEnsemble->GetRules(i)->GetCoefficient());
360  }
361  for (UInt_t i=0; i<fNLinear; i++) {
362  rval += TMath::Abs((*lincoeff)[i]);
363  }
364  return rval;
365 }
366 
367 ////////////////////////////////////////////////////////////////////////////////
368 /// Initialize GD path search
369 
371 {
372  if (fGDNTau<2) {
373  fGDNTau = 1;
374  fGDTauScan = 0;
375  }
376  if (fGDTau<0.0) {
377  // fGDNTau = 50; already set in MethodRuleFit
378  fGDTauScan = 1000;
379  fGDTauMin = 0.0;
380  fGDTauMax = 1.0;
381  }
382  else {
383  fGDNTau = 1;
384  fGDTauScan = 0;
385  }
386  // set all taus
387  fGDTauVec.clear();
388  fGDTauVec.resize( fGDNTau );
389  if (fGDNTau==1) {
390  fGDTauVec[0] = fGDTau;
391  }
392  else {
393  // set tau vector - TODO: make a smarter choice of range in tau
394  Double_t dtau = (fGDTauMax - fGDTauMin)/static_cast<Double_t>(fGDNTau-1);
395  for (UInt_t itau=0; itau<fGDNTau; itau++) {
396  fGDTauVec[itau] = static_cast<Double_t>(itau)*dtau + fGDTauMin;
397  if (fGDTauVec[itau]>1.0) fGDTauVec[itau]=1.0;
398  }
399  }
400  // inititalize path search vectors
401 
402  fGradVec.clear();
403  fGradVecLin.clear();
404  fGradVecTst.clear();
405  fGradVecLinTst.clear();
406  fGDErrTst.clear();
407  fGDErrTstOK.clear();
408  fGDOfsTst.clear();
409  fGDCoefTst.clear();
410  fGDCoefLinTst.clear();
411  //
412  // rules
413  //
414  fGDCoefTst.resize(fGDNTau);
415  fGradVec.resize(fNRules,0);
416  fGradVecTst.resize(fGDNTau);
417  for (UInt_t i=0; i<fGDNTau; i++) {
418  fGradVecTst[i].resize(fNRules,0);
419  fGDCoefTst[i].resize(fNRules,0);
420  }
421  //
422  // linear terms
423  //
424  fGDCoefLinTst.resize(fGDNTau);
425  fGradVecLin.resize(fNLinear,0);
426  fGradVecLinTst.resize(fGDNTau);
427  for (UInt_t i=0; i<fGDNTau; i++) {
428  fGradVecLinTst[i].resize(fNLinear,0);
429  fGDCoefLinTst[i].resize(fNLinear,0);
430  }
431  //
432  // error, coefs etc
433  //
434  fGDErrTst.resize(fGDNTau,0);
435  fGDErrTstOK.resize(fGDNTau,kTRUE);
436  fGDOfsTst.resize(fGDNTau,0);
437  fGDNTauTstOK = fGDNTau;
438  //
439  // calculate average selectors and rule responses for the path sample size
440  //
441 }
442 
443 ////////////////////////////////////////////////////////////////////////////////
444 /// This finds the cutoff parameter tau by scanning several different paths
445 
447 {
448  if (fGDNTau<2) return 0;
449  if (fGDTauScan==0) return 0;
450 
451  if (fGDOfsTst.size()<1)
452  Log() << kFATAL << "BUG! FindGDTau() has been called BEFORE InitGD()." << Endl;
453  //
454  Log() << kINFO << "Estimating the cutoff parameter tau. The estimated time is a pessimistic maximum." << Endl;
455  //
456  // Find how many points to scan and how often to calculate the error
457  UInt_t nscan = fGDTauScan; //std::min(static_cast<Int_t>(fGDTauScan),fGDNPathSteps);
458  UInt_t netst = std::min(nscan,UInt_t(100));
459  UInt_t nscanned=0;
460  //
461  //--------------------
462  // loop over the paths
463  //--------------------
464  // The number of MAXIMUM loops is given by nscan.
465  // At each loop, the paths being far away from the minimum
466  // are rejected. Hence at each check (every netst events), the number
467  // of paths searched will be reduced.
468  // The maximum 'distance' from the minimum error rate is
469  // 1 sigma. See RiskPerfTst() for details.
470  //
471  Bool_t doloop=kTRUE;
472  UInt_t ip=0;
473  UInt_t itauMin=0;
474  Timer timer( nscan, "RuleFit" );
475  while (doloop) {
476  // make gradvec
477  MakeTstGradientVector();
478  // update coefs
479  UpdateTstCoefficients();
480  // estimate error and do the sum
481  // do this at index=0, netst-1, 2*netst-1 ...
482  nscanned++;
483  if ( (ip==0) || ((ip+1)%netst==0) ) {
484  // ErrorRateRocTst( );
485  itauMin = RiskPerfTst();
486  Log() << kVERBOSE << Form("%4d",ip+1) << ". tau = " << Form("%4.4f",fGDTauVec[itauMin])
487  << " => error rate = " << fGDErrTst[itauMin] << Endl;
488  }
489  ip++;
490  doloop = ((ip<nscan) && (fGDNTauTstOK>3));
492  if (Log().GetMinType()>kVERBOSE)
493  timer.DrawProgressBar(ip);
494  }
495  //
496  // Set tau and coefs
497  // Downscale tau slightly in order to avoid numerical problems
498  //
499  if (nscanned==0) {
500  Log() << kERROR << "<FindGDTau> number of scanned loops is zero! Should NOT see this message." << Endl;
501  }
502  fGDTau = fGDTauVec[itauMin];
503  fRuleEnsemble->SetCoefficients( fGDCoefTst[itauMin] );
504  fRuleEnsemble->SetLinCoefficients( fGDCoefLinTst[itauMin] );
505  fRuleEnsemble->SetOffset( fGDOfsTst[itauMin] );
506  Log() << kINFO << "Best path found with tau = " << Form("%4.4f",fGDTau)
507  << " after " << timer.GetElapsedTime() << " " << Endl;
508 
509  return nscan;
510 }
511 
512 ////////////////////////////////////////////////////////////////////////////////
513 /// The following finds the gradient directed path in parameter space.
514 /// More work is needed... FT, 24/9/2006
515 /// The algorithm is currently as follows:
516 /// <***if not otherwise stated, the sample used below is [fPathIdx1,fPathIdx2]***>
517 /// 1. Set offset to -average(y(true)) and all coefs=0 => average of F(x)==0
518 /// 2. FindGDTau() : start scanning using several paths defined by different tau
519 /// choose the tau yielding the best path
520 /// 3. start the scanning the chosen path
521 /// 4. check error rate at a given frequency
522 /// data used for check: [fPerfIdx1,fPerfIdx2]
523 /// 5. stop when either of the following onditions are fullfilled:
524 /// a. loop index==fGDNPathSteps
525 /// b. error > fGDErrScale*errmin
526 /// c. only in DEBUG mode: risk is not monotoneously decreasing
527 ///
528 /// The algorithm will warn if:
529 /// I. the error rate was still decreasing when loop finnished -> increase fGDNPathSteps!
530 /// II. minimum was found at an early stage -> decrease fGDPathStep
531 /// III. DEBUG: risk > previous risk -> entered caotic region (regularization is too small)
532 ///
533 
535 {
536  Log() << kINFO << "GD path scan - the scan stops when the max num. of steps is reached or a min is found"
537  << Endl;
538  Log() << kVERBOSE << "Number of events used per path step = " << fPathIdx2-fPathIdx1+1 << Endl;
539  Log() << kVERBOSE << "Number of events used for error estimation = " << fPerfIdx2-fPerfIdx1+1 << Endl;
540 
541  // check if debug mode
542  const Bool_t isVerbose = (Log().GetMinType()<=kVERBOSE);
543  const Bool_t isDebug = (Log().GetMinType()<=kDEBUG);
544 
545  // init GD parameters and clear coeff vectors
546  InitGD();
547 
548  // evaluate average response of rules/linear terms (with event weights)
549  EvaluateAveragePath();
550  EvaluateAveragePerf();
551 
552  // initial estimate; all other a(i) are zero
553  Log() << kVERBOSE << "Creating GD path" << Endl;
554  Log() << kVERBOSE << " N(steps) = " << fGDNPathSteps << Endl;
555  Log() << kVERBOSE << " step = " << fGDPathStep << Endl;
556  Log() << kVERBOSE << " N(tau) = " << fGDNTau << Endl;
557  Log() << kVERBOSE << " N(tau steps) = " << fGDTauScan << Endl;
558  Log() << kVERBOSE << " tau range = [ " << fGDTauVec[0] << " , " << fGDTauVec[fGDNTau-1] << " ]" << Endl;
559 
560  // init ntuple
561  if (isDebug) InitNtuple();
562 
563  // DEBUG: risk scan
564  Int_t nbadrisk=0; // number of points where risk(i+1)>risk(i)
565  Double_t trisk=0; // time per risk evaluation
566  Double_t strisk=0; // total time
567  Double_t rprev=1e32; // previous risk
568 
569  // parameters set at point with min error
570  Double_t errmin=1e32; // min error
571  // Double_t riskMin=0; // risk
572  Int_t indMin=-1; // index
573  std::vector<Double_t> coefsMin; // rule coefs
574  std::vector<Double_t> lincoefsMin; // linear coefs
575  Double_t offsetMin; // offset
576 
577 
578  // DEBUG: timing
579  clock_t t0=0;
580  Double_t tgradvec;
581  Double_t tupgrade;
582  Double_t tperf;
583  Double_t stgradvec=0;
584  Double_t stupgrade=0;
585  Double_t stperf=0;
586 
587  // linear regression to estimate slope of error rate evolution
588  const UInt_t npreg=5;
589  std::vector<Double_t> valx;
590  std::vector<Double_t> valy;
591  std::vector<Double_t> valxy;
592 
593  // loop related
594  Bool_t docheck; // true if an error rate check is to be done
595  Int_t iloop=0; // loop index
596  Bool_t found=kFALSE; // true if minimum is found
597  Bool_t riskFlat=kFALSE; // DEBUG: flag is true if risk evolution behaves badly
598  Bool_t done = kFALSE; // flag that the scan is done
599 
600  // calculate how often to check error rate
601  int imod = fGDNPathSteps/100;
602  if (imod<100) imod = std::min(100,fGDNPathSteps);
603  if (imod>100) imod=100;
604 
605  // reset coefficients
606  fAverageTruth = -CalcAverageTruth();
607  offsetMin = fAverageTruth;
608  fRuleEnsemble->SetOffset(offsetMin);
609  fRuleEnsemble->ClearCoefficients(0);
610  fRuleEnsemble->ClearLinCoefficients(0);
611  for (UInt_t i=0; i<fGDOfsTst.size(); i++) {
612  fGDOfsTst[i] = offsetMin;
613  }
614  Log() << kVERBOSE << "Obtained initial offset = " << offsetMin << Endl;
615 
616  // find the best tau - returns the number of steps performed in scan
617  Int_t nprescan = FindGDTau();
618 
619  //
620  //
621  // calculate F*
622  //
623  // CalcFStar(fPerfIdx1, fPerfIdx2);
624  //
625 
626  // set some ntuple values
627  fNTRisk = rprev;
628  fNTCoefRad = -1.0;
629  fNTErrorRate = 0;
630 
631  // a local flag indicating for what reason the search was stopped
632  Int_t stopCondition=0;
633 
634  Log() << kINFO << "Fitting model..." << Endl;
635  // start loop with timer
636  Timer timer( fGDNPathSteps, "RuleFit" );
637  while (!done) {
638  // Make gradient vector (eq 44, ref 1)
639  if (isVerbose) t0 = clock();
640  MakeGradientVector();
641  if (isVerbose) {
642  tgradvec = Double_t(clock()-t0)/CLOCKS_PER_SEC;
643  stgradvec += tgradvec;
644  }
645 
646  // Calculate the direction in parameter space (eq 25, ref 1) and update coeffs (eq 22, ref 1)
647  if (isVerbose) t0 = clock();
648  UpdateCoefficients();
649  if (isVerbose) {
650  tupgrade = Double_t(clock()-t0)/CLOCKS_PER_SEC;
651  stupgrade += tupgrade;
652  }
653 
654  // don't check error rate every loop
655  docheck = ((iloop==0) ||((iloop+1)%imod==0));
656 
657  if (docheck) {
658  // draw progressbar only if not debug
659  if (!isVerbose)
660  timer.DrawProgressBar(iloop);
661  fNTNuval = Double_t(iloop)*fGDPathStep;
662  fNTRisk = 0.0;
663 
664  // check risk evolution
665 
666  if (isDebug) FillCoefficients();
667  fNTCoefRad = fRuleEnsemble->CoefficientRadius();
668 
669  // calculate risk
670  t0 = clock();
671  fNTRisk = RiskPath();
672  trisk = Double_t(clock()-t0)/CLOCKS_PER_SEC;
673  strisk += trisk;
674  //
675  // Check for an increase in risk.
676  // Such an increase would imply that the regularization is too small.
677  // Stop the iteration if this happens.
678  //
679  if (fNTRisk>=rprev) {
680  if (fNTRisk>rprev) {
681  nbadrisk++;
682  Log() << kWARNING << "Risk(i+1)>=Risk(i) in path" << Endl;
683  riskFlat=(nbadrisk>3);
684  if (riskFlat) {
685  Log() << kWARNING << "Chaotic behaviour of risk evolution" << Endl;
686  Log() << kWARNING << "--- STOPPING MINIMISATION ---" << Endl;
687  Log() << kWARNING << "This may be OK if minimum is already found" << Endl;
688  }
689  }
690  }
691  rprev = fNTRisk;
692  //
693  // Estimate the error rate using cross validation
694  // Well, not quite full cross validation since we only
695  // use ONE model.
696  //
697  if (isVerbose) t0 = clock();
698  fNTErrorRate = 0;
699 
700  // Check error rate
701  Double_t errroc;//= ErrorRateRoc();
702  Double_t riskPerf = RiskPerf();
703  // Double_t optimism = Optimism();
704  //
705  errroc = riskPerf;
706  //
707  fNTErrorRate = errroc;
708  //
709  if (isVerbose) {
710  tperf = Double_t(clock()-t0)/CLOCKS_PER_SEC;
711  stperf +=tperf;
712  }
713  //
714  // Always take the last min.
715  // For each step the risk is reduced.
716  //
717  if (fNTErrorRate<=errmin) {
718  errmin = fNTErrorRate;
719  // riskMin = fNTRisk;
720  indMin = iloop;
721  fRuleEnsemble->GetCoefficients(coefsMin);
722  lincoefsMin = fRuleEnsemble->GetLinCoefficients();
723  offsetMin = fRuleEnsemble->GetOffset();
724  }
725  if ( fNTErrorRate > fGDErrScale*errmin) found = kTRUE;
726  //
727  // check slope of last couple of points
728  //
729  if (valx.size()==npreg) {
730  valx.erase(valx.begin());
731  valy.erase(valy.begin());
732  valxy.erase(valxy.begin());
733  }
734  valx.push_back(fNTNuval);
735  valy.push_back(fNTErrorRate);
736  valxy.push_back(fNTErrorRate*fNTNuval);
737 
739 
740  //
741  if (isDebug) fGDNtuple->Fill();
742  if (isVerbose) {
743  Log() << kVERBOSE << "ParamsIRE : "
744  << std::setw(10)
745  << Form("%8d",iloop+1) << " "
746  << Form("%4.4f",fNTRisk) << " "
747  << Form("%4.4f",riskPerf) << " "
748  << Form("%4.4f",fNTRisk+riskPerf) << " "
749 // << Form("%4.4f",fsigave+fbkgave) << " "
750 // << Form("%4.4f",fsigave) << " "
751 // << Form("%4.4f",fsigrms) << " "
752 // << Form("%4.4f",fbkgave) << " "
753 // << Form("%4.4f",fbkgrms) << " "
754 
755  // << Form("%4.4f",fRuleEnsemble->CoefficientRadius())
756  << Endl;
757  }
758  }
759  iloop++;
760  // Stop iteration under various conditions
761  // * The condition R(i+1)<R(i) is no longer true (when then implicit regularization is too weak)
762  // * If the current error estimate is > factor*errmin (factor = 1.1)
763  // * We have reach the last step...
764  Bool_t endOfLoop = (iloop==fGDNPathSteps);
765  if ( ((riskFlat) || (endOfLoop)) && (!found) ) {
766  if (riskFlat) {
767  stopCondition = 1;
768  }
769  else if (endOfLoop) {
770  stopCondition = 2;
771  }
772  if (indMin<0) {
773  Log() << kWARNING << "BUG TRAP: should not be here - still, this bug is harmless;)" << Endl;
774  errmin = fNTErrorRate;
775  // riskMin = fNTRisk;
776  indMin = iloop;
777  fRuleEnsemble->GetCoefficients(coefsMin);
778  lincoefsMin = fRuleEnsemble->GetLinCoefficients();
779  offsetMin = fRuleEnsemble->GetOffset();
780  }
781  found = kTRUE;
782  }
783  done = (found);
784  }
785  Log() << kINFO << "Minimisation elapsed time : " << timer.GetElapsedTime() << " " << Endl;
786  Log() << kINFO << "----------------------------------------------------------------" << Endl;
787  Log() << kINFO << "Found minimum at step " << indMin+1 << " with error = " << errmin << Endl;
788  Log() << kINFO << "Reason for ending loop: ";
789  switch (stopCondition) {
790  case 0:
791  Log() << kINFO << "clear minima found";
792  break;
793  case 1:
794  Log() << kINFO << "chaotic behaviour of risk";
795  break;
796  case 2:
797  Log() << kINFO << "end of loop reached";
798  break;
799  default:
800  Log() << kINFO << "unknown!";
801  break;
802  }
803  Log() << Endl;
804  Log() << kINFO << "----------------------------------------------------------------" << Endl;
805 
806  // check if early minima - might be an indication of too large stepsize
807  if ( Double_t(indMin)/Double_t(nprescan+fGDNPathSteps) < 0.05 ) {
808  Log() << kWARNING << "Reached minimum early in the search" << Endl;
809  Log() << kWARNING << "Check results and maybe decrease GDStep size" << Endl;
810  }
811  //
812  // quick check of the sign of the slope for the last npreg points
813  //
814  Double_t sumx = std::accumulate( valx.begin(), valx.end(), Double_t() );
815  Double_t sumxy = std::accumulate( valxy.begin(), valxy.end(), Double_t() );
816  Double_t sumy = std::accumulate( valy.begin(), valy.end(), Double_t() );
817  Double_t slope = Double_t(valx.size())*sumxy - sumx*sumy;
818  if (slope<0) {
819  Log() << kINFO << "The error rate was still decreasing at the end of the path" << Endl;
820  Log() << kINFO << "Increase number of steps (GDNSteps)." << Endl;
821  }
822  //
823  // set coefficients
824  //
825  if (found) {
826  fRuleEnsemble->SetCoefficients( coefsMin );
827  fRuleEnsemble->SetLinCoefficients( lincoefsMin );
828  fRuleEnsemble->SetOffset( offsetMin );
829  }
830  else {
831  Log() << kFATAL << "BUG TRAP: minimum not found in MakeGDPath()" << Endl;
832  }
833 
834  //
835  // print timing info (VERBOSE mode)
836  //
837  if (isVerbose) {
838  Double_t stloop = strisk +stupgrade + stgradvec + stperf;
839  Log() << kVERBOSE << "Timing per loop (ms):" << Endl;
840  Log() << kVERBOSE << " gradvec = " << 1000*stgradvec/iloop << Endl;
841  Log() << kVERBOSE << " upgrade = " << 1000*stupgrade/iloop << Endl;
842  Log() << kVERBOSE << " risk = " << 1000*strisk/iloop << Endl;
843  Log() << kVERBOSE << " perf = " << 1000*stperf/iloop << Endl;
844  Log() << kVERBOSE << " loop = " << 1000*stloop/iloop << Endl;
845  //
846  Log() << kVERBOSE << " GDInit = " << 1000*gGDInit/iloop << Endl;
847  Log() << kVERBOSE << " GDPtr = " << 1000*gGDPtr/iloop << Endl;
848  Log() << kVERBOSE << " GDEval = " << 1000*gGDEval/iloop << Endl;
849  Log() << kVERBOSE << " GDEvalRule = " << 1000*gGDEvalRule/iloop << Endl;
850  Log() << kVERBOSE << " GDNorm = " << 1000*gGDNorm/iloop << Endl;
851  Log() << kVERBOSE << " GDRuleLoop = " << 1000*gGDRuleLoop/iloop << Endl;
852  Log() << kVERBOSE << " GDLinLoop = " << 1000*gGDLinLoop/iloop << Endl;
853  }
854  //
855  // write ntuple (DEBUG)
856  if (isDebug) fGDNtuple->Write();
857 }
858 
859 ////////////////////////////////////////////////////////////////////////////////
860 /// helper function to store the rule coefficients in local arrays
861 
863 {
864  fNTOffset = fRuleEnsemble->GetOffset();
865  //
866  for (UInt_t i=0; i<fNRules; i++) {
867  fNTCoeff[i] = fRuleEnsemble->GetRules(i)->GetCoefficient();
868  }
869  for (UInt_t i=0; i<fNLinear; i++) {
870  fNTLinCoeff[i] = fRuleEnsemble->GetLinCoefficients(i);
871  }
872 }
873 
874 ////////////////////////////////////////////////////////////////////////////////
875 /// Estimates F* (optimum scoring function) for all events for the given sets.
876 /// The result is used in ErrorRateReg().
877 /// --- NOT USED ---
878 ///
879 
881 {
882  Log() << kWARNING << "<CalcFStar> Using unverified code! Check!" << Endl;
883  UInt_t neve = fPerfIdx2-fPerfIdx1+1;
884  if (neve<1) {
885  Log() << kFATAL << "<CalcFStar> Invalid start/end indices!" << Endl;
886  return;
887  }
888  //
889  const std::vector<const Event *> *events = &(fRuleFit->GetTrainingEvents());
890  //
891  fFstar.clear();
892  std::vector<Double_t> fstarSorted;
893  Double_t fstarVal;
894  // loop over all events and estimate F* for each event
895  for (UInt_t i=fPerfIdx1; i<fPerfIdx2+1; i++) {
896  const Event& e = *(*events)[i];
897  fstarVal = fRuleEnsemble->FStar(e);
898  fFstar.push_back(fstarVal);
899  fstarSorted.push_back(fstarVal);
900  if (TMath::IsNaN(fstarVal)) Log() << kFATAL << "F* is NAN!" << Endl;
901  }
902  // sort F* and find median
903  std::sort( fstarSorted.begin(), fstarSorted.end() );
904  UInt_t ind = neve/2;
905  if (neve&1) { // odd number of events
906  fFstarMedian = 0.5*(fstarSorted[ind]+fstarSorted[ind-1]);
907  }
908  else { // even
909  fFstarMedian = fstarSorted[ind];
910  }
911 }
912 
913 ////////////////////////////////////////////////////////////////////////////////
914 /// implementation of eq. 7.17 in Hastie,Tibshirani & Friedman book
915 /// this is the covariance between the estimated response yhat and the
916 /// true value y.
917 /// NOT REALLY SURE IF THIS IS CORRECT!
918 /// --- THIS IS NOT USED ---
919 ///
920 
922 {
923  Log() << kWARNING << "<Optimism> Using unverified code! Check!" << Endl;
924  UInt_t neve = fPerfIdx2-fPerfIdx1+1;
925  if (neve<1) {
926  Log() << kFATAL << "<Optimism> Invalid start/end indices!" << Endl;
927  }
928  //
929  const std::vector<const Event *> *events = &(fRuleFit->GetTrainingEvents());
930  //
931  Double_t sumy=0;
932  Double_t sumyhat=0;
933  Double_t sumyhaty=0;
934  Double_t sumw2=0;
935  Double_t sumw=0;
936  Double_t yhat;
937  Double_t y;
938  Double_t w;
939  //
940  for (UInt_t i=fPerfIdx1; i<fPerfIdx2+1; i++) {
941  const Event& e = *(*events)[i];
942  yhat = fRuleEnsemble->EvalEvent(i); // evaluated using the model
943  y = (fRuleFit->GetMethodRuleFit()->DataInfo().IsSignal(&e) ? 1.0:-1.0); // the truth
944  w = fRuleFit->GetTrainingEventWeight(i)/fNEveEffPerf; // the weight, reweighted such that sum=1
945  sumy += w*y;
946  sumyhat += w*yhat;
947  sumyhaty += w*yhat*y;
948  sumw2 += w*w;
949  sumw += w;
950  }
951  Double_t div = 1.0-sumw2;
952  Double_t cov = sumyhaty - sumyhat*sumy;
953  return 2.0*cov/div;
954 }
955 
956 ////////////////////////////////////////////////////////////////////////////////
957 /// Estimates the error rate with the current set of parameters
958 /// This code is pretty messy at the moment.
959 /// Cleanup is needed.
960 /// -- NOT USED ---
961 ///
962 
964 {
965  Log() << kWARNING << "<ErrorRateReg> Using unverified code! Check!" << Endl;
966  UInt_t neve = fPerfIdx2-fPerfIdx1+1;
967  if (neve<1) {
968  Log() << kFATAL << "<ErrorRateReg> Invalid start/end indices!" << Endl;
969  }
970  if (fFstar.size()!=neve) {
971  Log() << kFATAL << "--- RuleFitParams::ErrorRateReg() - F* not initialized! BUG!!!"
972  << " Fstar.size() = " << fFstar.size() << " , N(events) = " << neve << Endl;
973  }
974  //
975  Double_t sF;
976  //
977  const std::vector<const Event *> *events = &(fRuleFit->GetTrainingEvents());
978  //
979  Double_t sumdf = 0;
980  Double_t sumdfmed = 0;
981  //
982  // A bit messy here.
983  // I believe the binary error classification is appropriate here.
984  // The problem is stability.
985  //
986  for (UInt_t i=fPerfIdx1; i<fPerfIdx2+1; i++) {
987  const Event& e = *(*events)[i];
988  sF = fRuleEnsemble->EvalEvent( e );
989  // scaled abs error, eq 20 in RuleFit paper
990  sumdf += TMath::Abs(fFstar[i-fPerfIdx1] - sF);
991  sumdfmed += TMath::Abs(fFstar[i-fPerfIdx1] - fFstarMedian);
992  }
993  // scaled abs error, eq 20
994  // This error (df) is large - need to think on how to compensate...
995  //
996  return sumdf/sumdfmed;
997 }
998 
999 ////////////////////////////////////////////////////////////////////////////////
1000 ///
1001 /// Estimates the error rate with the current set of parameters
1002 /// It uses a binary estimate of (y-F*(x))
1003 /// (y-F*(x)) = (Num of events where sign(F)!=sign(y))/Neve
1004 /// y = {+1 if event is signal, -1 otherwise}
1005 /// --- NOT USED ---
1006 ///
1007 
1009 {
1010  Log() << kWARNING << "<ErrorRateBin> Using unverified code! Check!" << Endl;
1011  UInt_t neve = fPerfIdx2-fPerfIdx1+1;
1012  if (neve<1) {
1013  Log() << kFATAL << "<ErrorRateBin> Invalid start/end indices!" << Endl;
1014  }
1015  //
1016  const std::vector<const Event *> *events = &(fRuleFit->GetTrainingEvents());
1017  //
1018  Double_t sumdfbin = 0;
1019  Double_t dneve = Double_t(neve);
1020  Int_t signF, signy;
1021  Double_t sF;
1022  //
1023  for (UInt_t i=fPerfIdx1; i<fPerfIdx2+1; i++) {
1024  const Event& e = *(*events)[i];
1025  sF = fRuleEnsemble->EvalEvent( e );
1026  // Double_t sFstar = fRuleEnsemble->FStar(e); // THIS CAN BE CALCULATED ONCE!
1027  signF = (sF>0 ? +1:-1);
1028  // signy = (sFStar>0 ? +1:-1);
1029  signy = (fRuleFit->GetMethodRuleFit()->DataInfo().IsSignal(&e) ? +1:-1);
1030  sumdfbin += TMath::Abs(Double_t(signF-signy))*0.5;
1031  }
1032  Double_t f = sumdfbin/dneve;
1033  // Double_t df = f*TMath::Sqrt((1.0/sumdfbin) + (1.0/dneve));
1034  return f;
1035 }
1036 
1037 ////////////////////////////////////////////////////////////////////////////////
1038 
1039 Double_t TMVA::RuleFitParams::ErrorRateRocRaw( std::vector<Double_t> & sFsig,
1040  std::vector<Double_t> & sFbkg )
1041 
1042 {
1043  //
1044  // Estimates the error rate with the current set of parameters.
1045  // It calculates the area under the bkg rejection vs signal efficiency curve.
1046  // The value returned is 1-area.
1047  //
1048  std::sort(sFsig.begin(), sFsig.end());
1049  std::sort(sFbkg.begin(), sFbkg.end());
1050  const Double_t minsig = sFsig.front();
1051  const Double_t minbkg = sFbkg.front();
1052  const Double_t maxsig = sFsig.back();
1053  const Double_t maxbkg = sFbkg.back();
1054  const Double_t minf = std::min(minsig,minbkg);
1055  const Double_t maxf = std::max(maxsig,maxbkg);
1056  const Int_t nsig = Int_t(sFsig.size());
1057  const Int_t nbkg = Int_t(sFbkg.size());
1058  const Int_t np = std::min((nsig+nbkg)/4,50);
1059  const Double_t df = (maxf-minf)/(np-1);
1060  //
1061  // calculate area under rejection/efficiency curve
1062  //
1063  Double_t fcut;
1064  std::vector<Double_t>::const_iterator indit;
1065  Int_t nrbkg;
1066  Int_t nesig;
1067  Int_t pnesig=0;
1068  Double_t rejb=0;
1069  Double_t effs=1.0;
1070  Double_t prejb=0;
1071  Double_t peffs=1.0;
1072  // Double_t drejb;
1073  Double_t deffs;
1074  Double_t area=0;
1075  Int_t npok=0;
1076  //
1077  // loop over range of F [minf,maxf]
1078  //
1079  for (Int_t i=0; i<np; i++) {
1080  fcut = minf + df*Double_t(i);
1081  indit = std::find_if( sFsig.begin(), sFsig.end(), std::bind2nd(std::greater_equal<Double_t>(), fcut));
1082  nesig = sFsig.end()-indit; // number of sig accepted with F>cut
1083  if (TMath::Abs(pnesig-nesig)>0) {
1084  npok++;
1085  indit = std::find_if( sFbkg.begin(), sFbkg.end(), std::bind2nd(std::greater_equal<Double_t>(), fcut));
1086  nrbkg = indit-sFbkg.begin(); // number of bkg rejected with F>cut
1087  rejb = Double_t(nrbkg)/Double_t(nbkg);
1088  effs = Double_t(nesig)/Double_t(nsig);
1089  //
1090  // drejb = rejb-prejb;
1091  deffs = effs-peffs;
1092  area += 0.5*TMath::Abs(deffs)*(rejb+prejb); // trapezoid
1093  prejb = rejb;
1094  peffs = effs;
1095  }
1096  pnesig = nesig;
1097  }
1098  area += 0.5*(1+rejb)*effs; // extrapolate to the end point
1099 
1100  return (1.0-area);
1101 }
1102 
1103 ////////////////////////////////////////////////////////////////////////////////
1104 ///
1105 /// Estimates the error rate with the current set of parameters.
1106 /// It calculates the area under the bkg rejection vs signal efficiency curve.
1107 /// The value returned is 1-area.
1108 /// This works but is less efficient than calculating the Risk using RiskPerf().
1109 ///
1110 
1112 {
1113  Log() << kWARNING << "<ErrorRateRoc> Should not be used in the current version! Check!" << Endl;
1114  UInt_t neve = fPerfIdx2-fPerfIdx1+1;
1115  if (neve<1) {
1116  Log() << kFATAL << "<ErrorRateRoc> Invalid start/end indices!" << Endl;
1117  }
1118  //
1119  const std::vector<const Event *> *events = &(fRuleFit->GetTrainingEvents());
1120  //
1121  Double_t sF;
1122  //
1123  std::vector<Double_t> sFsig;
1124  std::vector<Double_t> sFbkg;
1125  Double_t sumfsig=0;
1126  Double_t sumfbkg=0;
1127  Double_t sumf2sig=0;
1128  Double_t sumf2bkg=0;
1129  //
1130  for (UInt_t i=fPerfIdx1; i<fPerfIdx2+1; i++) {
1131  const Event& e = *(*events)[i];
1132  sF = fRuleEnsemble->EvalEvent(i);// * fRuleFit->GetTrainingEventWeight(i);
1133  if (fRuleFit->GetMethodRuleFit()->DataInfo().IsSignal(&e)) {
1134  sFsig.push_back(sF);
1135  sumfsig +=sF;
1136  sumf2sig +=sF*sF;
1137  }
1138  else {
1139  sFbkg.push_back(sF);
1140  sumfbkg +=sF;
1141  sumf2bkg +=sF*sF;
1142  }
1143  }
1144  fsigave = sumfsig/sFsig.size();
1145  fbkgave = sumfbkg/sFbkg.size();
1146  fsigrms = TMath::Sqrt(gTools().ComputeVariance(sumf2sig,sumfsig,sFsig.size()));
1147  fbkgrms = TMath::Sqrt(gTools().ComputeVariance(sumf2bkg,sumfbkg,sFbkg.size()));
1148  //
1149  return ErrorRateRocRaw( sFsig, sFbkg );
1150 }
1151 
1152 ////////////////////////////////////////////////////////////////////////////////
1153 ///
1154 /// Estimates the error rate with the current set of parameters.
1155 /// It calculates the area under the bkg rejection vs signal efficiency curve.
1156 /// The value returned is 1-area.
1157 ///
1158 /// See comment under ErrorRateRoc().
1159 ///
1160 
1162 {
1163  Log() << kWARNING << "<ErrorRateRocTst> Should not be used in the current version! Check!" << Endl;
1164  UInt_t neve = fPerfIdx2-fPerfIdx1+1;
1165  if (neve<1) {
1166  Log() << kFATAL << "<ErrorRateRocTst> Invalid start/end indices!" << Endl;
1167  return;
1168  }
1169  //
1170  const std::vector<const Event *> *events = &(fRuleFit->GetTrainingEvents());
1171  //
1172  // std::vector<Double_t> sF;
1173  Double_t sF;
1174  std::vector< std::vector<Double_t> > sFsig;
1175  std::vector< std::vector<Double_t> > sFbkg;
1176  //
1177  sFsig.resize( fGDNTau );
1178  sFbkg.resize( fGDNTau );
1179  // sF.resize( fGDNTau );
1180 
1181  for (UInt_t i=fPerfIdx1; i<fPerfIdx2+1; i++) {
1182  for (UInt_t itau=0; itau<fGDNTau; itau++) {
1183  // if (itau==0) sF = fRuleEnsemble->EvalEvent( *(*events)[i], fGDOfsTst[itau], fGDCoefTst[itau], fGDCoefLinTst[itau] );
1184  // else sF = fRuleEnsemble->EvalEvent( fGDOfsTst[itau], fGDCoefTst[itau], fGDCoefLinTst[itau] );
1185  sF = fRuleEnsemble->EvalEvent( i, fGDOfsTst[itau], fGDCoefTst[itau], fGDCoefLinTst[itau] );
1186  if (fRuleFit->GetMethodRuleFit()->DataInfo().IsSignal((*events)[i])) {
1187  sFsig[itau].push_back(sF);
1188  }
1189  else {
1190  sFbkg[itau].push_back(sF);
1191  }
1192  }
1193  }
1194  Double_t err;
1195 
1196  for (UInt_t itau=0; itau<fGDNTau; itau++) {
1197  err = ErrorRateRocRaw( sFsig[itau], sFbkg[itau] );
1198  fGDErrTst[itau] = err;
1199  }
1200 }
1201 
1202 ////////////////////////////////////////////////////////////////////////////////
1203 ///
1204 /// Estimates the error rate with the current set of parameters.
1205 /// using the <Perf> subsample.
1206 /// Return the tau index giving the lowest error
1207 ///
1208 
1210 {
1211  UInt_t neve = fPerfIdx2-fPerfIdx1+1;
1212  if (neve<1) {
1213  Log() << kFATAL << "<ErrorRateRocTst> Invalid start/end indices!" << Endl;
1214  return 0;
1215  }
1216  //
1217  Double_t sumx = 0;
1218  Double_t sumx2 = 0;
1219  Double_t maxx = -100.0;
1220  Double_t minx = 1e30;
1221  UInt_t itaumin = 0;
1222  UInt_t nok=0;
1223  for (UInt_t itau=0; itau<fGDNTau; itau++) {
1224  if (fGDErrTstOK[itau]) {
1225  nok++;
1226  fGDErrTst[itau] = RiskPerf(itau);
1227  sumx += fGDErrTst[itau];
1228  sumx2 += fGDErrTst[itau]*fGDErrTst[itau];
1229  if (fGDErrTst[itau]>maxx) maxx=fGDErrTst[itau];
1230  if (fGDErrTst[itau]<minx) {
1231  minx=fGDErrTst[itau];
1232  itaumin = itau;
1233  }
1234  }
1235  }
1236  Double_t sigx = TMath::Sqrt(gTools().ComputeVariance( sumx2, sumx, nok ) );
1237  Double_t maxacc = minx+sigx;
1238  //
1239  if (nok>0) {
1240  nok = 0;
1241  for (UInt_t itau=0; itau<fGDNTau; itau++) {
1242  if (fGDErrTstOK[itau]) {
1243  if (fGDErrTst[itau] > maxacc) {
1244  fGDErrTstOK[itau] = kFALSE;
1245  }
1246  else {
1247  nok++;
1248  }
1249  }
1250  }
1251  }
1252  fGDNTauTstOK = nok;
1253  Log() << kVERBOSE << "TAU: "
1254  << itaumin << " "
1255  << nok << " "
1256  << minx << " "
1257  << maxx << " "
1258  << sigx << Endl;
1259  //
1260  return itaumin;
1261 }
1262 
1263 ////////////////////////////////////////////////////////////////////////////////
1264 /// make test gradient vector for all tau
1265 /// same algorithm as MakeGradientVector()
1266 
1268 {
1269  UInt_t neve = fPathIdx1-fPathIdx2+1;
1270  if (neve<1) {
1271  Log() << kFATAL << "<MakeTstGradientVector> Invalid start/end indices!" << Endl;
1272  return;
1273  }
1274  //
1275  Double_t norm = 2.0/fNEveEffPath;
1276  //
1277  const std::vector<const Event *> *events = &(fRuleFit->GetTrainingEvents());
1278 
1279  // Clear gradient vectors
1280  for (UInt_t itau=0; itau<fGDNTau; itau++) {
1281  if (fGDErrTstOK[itau]) {
1282  for (UInt_t ir=0; ir<fNRules; ir++) {
1283  fGradVecTst[itau][ir]=0;
1284  }
1285  for (UInt_t il=0; il<fNLinear; il++) {
1286  fGradVecLinTst[itau][il]=0;
1287  }
1288  }
1289  }
1290  //
1291  // Double_t val; // temp store
1292  Double_t sF; // score function value
1293  Double_t r; // eq 35, ref 1
1294  Double_t y; // true score (+1 or -1)
1295  const std::vector<UInt_t> *eventRuleMap=0;
1296  UInt_t rind;
1297  //
1298  // Loop over all events
1299  //
1300  UInt_t nsfok=0;
1301  for (UInt_t i=fPathIdx1; i<fPathIdx2+1; i++) {
1302  const Event *e = (*events)[i];
1303  UInt_t nrules=0;
1304  if (fRuleEnsemble->DoRules()) {
1305  eventRuleMap = &(fRuleEnsemble->GetEventRuleMap(i));
1306  nrules = (*eventRuleMap).size();
1307  }
1308  for (UInt_t itau=0; itau<fGDNTau; itau++) { // loop over tau
1309  // if (itau==0) sF = fRuleEnsemble->EvalEvent( *e, fGDOfsTst[itau], fGDCoefTst[itau], fGDCoefLinTst[itau] );
1310  // else sF = fRuleEnsemble->EvalEvent( fGDOfsTst[itau], fGDCoefTst[itau], fGDCoefLinTst[itau] );
1311  if (fGDErrTstOK[itau]) {
1312  sF = fRuleEnsemble->EvalEvent( i, fGDOfsTst[itau], fGDCoefTst[itau], fGDCoefLinTst[itau] );
1313  if (TMath::Abs(sF)<1.0) {
1314  nsfok++;
1315  r = 0;
1316  y = (fRuleFit->GetMethodRuleFit()->DataInfo().IsSignal(e)?1.0:-1.0);
1317  r = norm*(y - sF) * fRuleFit->GetTrainingEventWeight(i);
1318  // rule gradient vector
1319  for (UInt_t ir=0; ir<nrules; ir++) {
1320  rind = (*eventRuleMap)[ir];
1321  fGradVecTst[itau][rind] += r;
1322  }
1323  // linear terms
1324  for (UInt_t il=0; il<fNLinear; il++) {
1325  fGradVecLinTst[itau][il] += r*fRuleEnsemble->EvalLinEventRaw( il,i, kTRUE );
1326  }
1327  } // if (TMath::Abs(F)<xxx)
1328  }
1329  }
1330  }
1331 }
1332 
1333 ////////////////////////////////////////////////////////////////////////////////
1334 /// Establish maximum gradient for rules, linear terms and the offset
1335 /// for all taus TODO: do not need index range!
1336 ///
1337 
1339 {
1340  Double_t maxr, maxl, cthresh, val;
1341  // loop over all taus
1342  for (UInt_t itau=0; itau<fGDNTau; itau++) {
1343  if (fGDErrTstOK[itau]) {
1344  // find max gradient
1345  maxr = ( (fNRules>0 ?
1346  TMath::Abs(*(std::max_element( fGradVecTst[itau].begin(), fGradVecTst[itau].end(), AbsValue()))):0) );
1347  maxl = ( (fNLinear>0 ?
1348  TMath::Abs(*(std::max_element( fGradVecLinTst[itau].begin(), fGradVecLinTst[itau].end(), AbsValue()))):0) );
1349 
1350  // Use the maximum as a threshold
1351  Double_t maxv = (maxr>maxl ? maxr:maxl);
1352  cthresh = maxv * fGDTauVec[itau];
1353 
1354  // Add to offset, if gradient is large enough:
1355  // Loop over the gradient vector and move to next set of coefficients
1356  // size of GradVec (and GradVecLin) should be 0 if learner is disabled
1357  //
1358  // Step-size is divided by 10 when looking for the best path.
1359  //
1360  if (maxv>0) {
1361  const Double_t stepScale=1.0;
1362  for (UInt_t i=0; i<fNRules; i++) {
1363  val = fGradVecTst[itau][i];
1364 
1365  if (TMath::Abs(val)>=cthresh) {
1366  fGDCoefTst[itau][i] += fGDPathStep*val*stepScale;
1367  }
1368  }
1369  // Loop over the gradient vector for the linear part and move to next set of coefficients
1370  for (UInt_t i=0; i<fNLinear; i++) {
1371  val = fGradVecLinTst[itau][i];
1372  if (TMath::Abs(val)>=cthresh) {
1373  fGDCoefLinTst[itau][i] += fGDPathStep*val*stepScale/fRuleEnsemble->GetLinNorm(i);
1374  }
1375  }
1376  }
1377  }
1378  }
1379  // set the offset - should be outside the itau loop!
1380  CalcTstAverageResponse();
1381 }
1382 
1383 ////////////////////////////////////////////////////////////////////////////////
1384 /// make gradient vector
1385 ///
1386 
1388 {
1389  clock_t t0;
1390  // clock_t t10;
1391  t0 = clock();
1392  //
1393  UInt_t neve = fPathIdx2-fPathIdx1+1;
1394  if (neve<1) {
1395  Log() << kFATAL << "<MakeGradientVector> Invalid start/end indices!" << Endl;
1396  return;
1397  }
1398  //
1399  const Double_t norm = 2.0/fNEveEffPath;
1400  //
1401  const std::vector<const Event *> *events = &(fRuleFit->GetTrainingEvents());
1402 
1403  // Clear gradient vectors
1404  for (UInt_t ir=0; ir<fNRules; ir++) {
1405  fGradVec[ir]=0;
1406  }
1407  for (UInt_t il=0; il<fNLinear; il++) {
1408  fGradVecLin[il]=0;
1409  }
1410  //
1411  // Double_t val; // temp store
1412  Double_t sF; // score function value
1413  Double_t r; // eq 35, ref 1
1414  Double_t y; // true score (+1 or -1)
1415  const std::vector<UInt_t> *eventRuleMap=0;
1416  UInt_t rind;
1417  //
1418  gGDInit += Double_t(clock()-t0)/CLOCKS_PER_SEC;
1419 
1420  for (UInt_t i=fPathIdx1; i<fPathIdx2+1; i++) {
1421  const Event *e = (*events)[i];
1422 
1423  // t0 = clock(); //DEB
1424  sF = fRuleEnsemble->EvalEvent( i ); // should not contain the weight
1425  // gGDEval += Double_t(clock()-t0)/CLOCKS_PER_SEC;
1426  if (TMath::Abs(sF)<1.0) {
1427  UInt_t nrules=0;
1428  if (fRuleEnsemble->DoRules()) {
1429  eventRuleMap = &(fRuleEnsemble->GetEventRuleMap(i));
1430  nrules = (*eventRuleMap).size();
1431  }
1432  y = (fRuleFit->GetMethodRuleFit()->DataInfo().IsSignal(e)?1.0:-1.0);
1433  r = norm*(y - sF) * fRuleFit->GetTrainingEventWeight(i);
1434  // rule gradient vector
1435  for (UInt_t ir=0; ir<nrules; ir++) {
1436  rind = (*eventRuleMap)[ir];
1437  fGradVec[rind] += r;
1438  }
1439  // gGDRuleLoop += Double_t(clock()-t0)/CLOCKS_PER_SEC;
1440  // linear terms
1441  // t0 = clock(); //DEB
1442  for (UInt_t il=0; il<fNLinear; il++) {
1443  fGradVecLin[il] += r*fRuleEnsemble->EvalLinEventRaw( il, i, kTRUE );
1444  }
1445  // gGDLinLoop += Double_t(clock()-t0)/CLOCKS_PER_SEC;
1446  } // if (TMath::Abs(F)<xxx)
1447  }
1448 }
1449 
1450 
1451 ////////////////////////////////////////////////////////////////////////////////
1452 /// Establish maximum gradient for rules, linear terms and the offset
1453 ///
1454 
1456 {
1457  Double_t maxr = ( (fRuleEnsemble->DoRules() ?
1458  TMath::Abs(*(std::max_element( fGradVec.begin(), fGradVec.end(), AbsValue()))):0) );
1459  Double_t maxl = ( (fRuleEnsemble->DoLinear() ?
1460  TMath::Abs(*(std::max_element( fGradVecLin.begin(), fGradVecLin.end(), AbsValue()))):0) );
1461  // Use the maximum as a threshold
1462  Double_t maxv = (maxr>maxl ? maxr:maxl);
1463  Double_t cthresh = maxv * fGDTau;
1464 
1465  Double_t useRThresh;
1466  Double_t useLThresh;
1467  //
1468  // Choose threshholds.
1469  //
1470  useRThresh = cthresh;
1471  useLThresh = cthresh;
1472 
1473  Double_t gval, lval, coef, lcoef;
1474 
1475  // Add to offset, if gradient is large enough:
1476  // Loop over the gradient vector and move to next set of coefficients
1477  // size of GradVec (and GradVecLin) should be 0 if learner is disabled
1478  if (maxv>0) {
1479  for (UInt_t i=0; i<fGradVec.size(); i++) {
1480  gval = fGradVec[i];
1481  if (TMath::Abs(gval)>=useRThresh) {
1482  coef = fRuleEnsemble->GetRulesConst(i)->GetCoefficient() + fGDPathStep*gval;
1483  fRuleEnsemble->GetRules(i)->SetCoefficient(coef);
1484  }
1485  }
1486 
1487  // Loop over the gradient vector for the linear part and move to next set of coefficients
1488  for (UInt_t i=0; i<fGradVecLin.size(); i++) {
1489  lval = fGradVecLin[i];
1490  if (TMath::Abs(lval)>=useLThresh) {
1491  lcoef = fRuleEnsemble->GetLinCoefficients(i) + (fGDPathStep*lval/fRuleEnsemble->GetLinNorm(i));
1492  fRuleEnsemble->SetLinCoefficient(i,lcoef);
1493  }
1494  }
1495  // Set the offset
1496  Double_t offset = CalcAverageResponse();
1497  fRuleEnsemble->SetOffset( offset );
1498  }
1499 }
1500 
1501 ////////////////////////////////////////////////////////////////////////////////
1502 /// calc average response for all test paths - TODO: see comment under CalcAverageResponse()
1503 /// note that 0 offset is used
1504 
1506 {
1507  for (UInt_t itau=0; itau<fGDNTau; itau++) {
1508  if (fGDErrTstOK[itau]) {
1509  fGDOfsTst[itau] = 0;
1510  for (UInt_t s=0; s<fNLinear; s++) {
1511  fGDOfsTst[itau] -= fGDCoefLinTst[itau][s] * fAverageSelectorPath[s];
1512  }
1513  for (UInt_t r=0; r<fNRules; r++) {
1514  fGDOfsTst[itau] -= fGDCoefTst[itau][r] * fAverageRulePath[r];
1515  }
1516  }
1517  }
1518  //
1519 }
1520 
1521 ////////////////////////////////////////////////////////////////////////////////
1522 /// calulate the average response - TODO : rewrite bad dependancy on EvaluateAverage() !
1523 ///
1524 /// note that 0 offset is used
1525 
1527 {
1528  Double_t ofs = 0;
1529  for (UInt_t s=0; s<fNLinear; s++) {
1530  ofs -= fRuleEnsemble->GetLinCoefficients(s) * fAverageSelectorPath[s];
1531  }
1532  for (UInt_t r=0; r<fNRules; r++) {
1533  ofs -= fRuleEnsemble->GetRules(r)->GetCoefficient() * fAverageRulePath[r];
1534  }
1535  return ofs;
1536 }
1537 
1538 ////////////////////////////////////////////////////////////////////////////////
1539 /// calulate the average truth
1540 
1542 {
1543  if (fPathIdx2<=fPathIdx1) {
1544  Log() << kFATAL << "<CalcAverageTruth> Invalid start/end indices!" << Endl;
1545  return 0;
1546  }
1547  Double_t sum=0;
1548  Double_t ensig=0;
1549  Double_t enbkg=0;
1550  const std::vector<const Event *> *events = &(fRuleFit->GetTrainingEvents());
1551  for (UInt_t i=fPathIdx1; i<fPathIdx2+1; i++) {
1552  Double_t ew = fRuleFit->GetTrainingEventWeight(i);
1553  if (fRuleFit->GetMethodRuleFit()->DataInfo().IsSignal((*events)[i])) ensig += ew;
1554  else enbkg += ew;
1555  sum += ew*(fRuleFit->GetMethodRuleFit()->DataInfo().IsSignal((*events)[i])?1.0:-1.0);
1556  }
1557  Log() << kVERBOSE << "Effective number of signal / background = " << ensig << " / " << enbkg << Endl;
1558 
1559  return sum/fNEveEffPath;
1560 }
1561 
1562 //_______________________________________________________________________
1563 
1565  return (fRuleFit->GetMethodRuleFit()->DataInfo().IsSignal(e) ? 1:-1);
1566 }
1567 
1568 
1569 ////////////////////////////////////////////////////////////////////////////////
1570 
1572  fLogger->SetMinType(t);
1573 }
Double_t gGDRuleLoop
virtual Int_t Write(const char *name=0, Int_t option=0, Int_t bufsize=0)
Write this object to the current directory.
Definition: TObject.cxx:823
Bool_t gFIRSTORG
static Vc_ALWAYS_INLINE int_v min(const int_v &x, const int_v &y)
Definition: vector.h:433
MsgLogger & Endl(MsgLogger &ml)
Definition: MsgLogger.h:162
tuple offset
Definition: tree.py:93
void MakeGradientVector()
make gradient vector
TH1 * h
Definition: legend2.C:5
void FillCoefficients()
helper function to store the rule coefficients in local arrays
Double_t Penalty() const
This is the "lasso" penalty To be used for regression.
Short_t Min(Short_t a, Short_t b)
Definition: TMathBase.h:170
int Int_t
Definition: RtypesCore.h:41
bool Bool_t
Definition: RtypesCore.h:59
const Bool_t kFALSE
Definition: Rtypes.h:92
Double_t GetWeight() const
return the event weight - depending on whether the flag IgnoreNegWeightsInTraining is or not...
Definition: Event.cxx:376
void ErrorRateRocTst()
Estimates the error rate with the current set of parameters.
Short_t Abs(Short_t d)
Definition: TMathBase.h:110
TFile * f
void MakeTstGradientVector()
make test gradient vector for all tau same algorithm as MakeGradientVector()
Tools & gTools()
Definition: Tools.cxx:79
TStopwatch timer
Definition: pirndm.C:37
#define rprev(otri1, otri2)
Definition: triangle.c:1024
Int_t FindGDTau()
This finds the cutoff parameter tau by scanning several different paths.
Double_t CalcAverageTruth()
calulate the average truth
tuple np
Definition: multifit.py:30
Double_t ErrorRateBin()
Estimates the error rate with the current set of parameters It uses a binary estimate of (y-F*(x)) (y...
void SetMsgType(EMsgType t)
TString GetElapsedTime(Bool_t Scientific=kTRUE)
Definition: Timer.cxx:131
TThread * t[5]
Definition: threadsh1.C:13
Int_t Type(const Event *e) const
ROOT::R::TRInterface & r
Definition: Object.C:4
Double_t ErrorRateReg()
Estimates the error rate with the current set of parameters This code is pretty messy at the moment...
Double_t ErrorRateRoc()
Estimates the error rate with the current set of parameters.
EMsgType
Definition: Types.h:61
Double_t Optimism()
implementation of eq.
unsigned int UInt_t
Definition: RtypesCore.h:42
char * Form(const char *fmt,...)
tuple w
Definition: qtexample.py:51
Double_t LossFunction(const Event &e) const
Implementation of squared-error ramp loss function (eq 39,40 in ref 1) This is used for binary Classi...
Double_t gGDEval
void InitNtuple()
initializes the ntuple
double Double_t
Definition: RtypesCore.h:55
Double_t y[n]
Definition: legend1.C:17
void CalcFStar()
Estimates F* (optimum scoring function) for all events for the given sets.
Double_t gGDEvalRule
virtual ~RuleFitParams()
destructor
Double_t gGDNorm
static Vc_ALWAYS_INLINE int_v max(const int_v &x, const int_v &y)
Definition: vector.h:440
void UpdateCoefficients()
Establish maximum gradient for rules, linear terms and the offset.
Double_t ErrorRateRocRaw(std::vector< Double_t > &sFsig, std::vector< Double_t > &sFbkg)
RuleFitParams()
constructor
Double_t gGDInit
Short_t Max(Short_t a, Short_t b)
Definition: TMathBase.h:202
void Init()
Initializes all parameters using the RuleEnsemble and the training tree.
Int_t IsNaN(Double_t x)
Definition: TMath.h:617
Double_t CalcAverageResponse()
calulate the average response - TODO : rewrite bad dependancy on EvaluateAverage() ! ...
Double_t gGDLinLoop
Double_t Risk(UInt_t ind1, UInt_t ind2, Double_t neff) const
risk asessment
void DrawProgressBar(Int_t, const TString &comment="")
draws progress bar in color or B&W caution:
Definition: Timer.cxx:183
void InitGD()
Initialize GD path search.
A TTree object has a header with a name and a title.
Definition: TTree.h:98
Double_t gGDPtr
void UpdateTstCoefficients()
Establish maximum gradient for rules, linear terms and the offset for all taus TODO: do not need inde...
UInt_t RiskPerfTst()
Estimates the error rate with the current set of parameters.
Double_t Sqrt(Double_t x)
Definition: TMath.h:464
void EvaluateAverage(UInt_t ind1, UInt_t ind2, std::vector< Double_t > &avsel, std::vector< Double_t > &avrul)
evaluate the average of each variable and f(x) in the given range
void MakeGDPath()
The following finds the gradient directed path in parameter space.
const Bool_t kTRUE
Definition: Rtypes.h:91
double norm(double *x, double *p)
Definition: unuranDistr.cxx:40
Definition: math.cpp:60
Bool_t gFIRSTTST
void CalcTstAverageResponse()
calc average response for all test paths - TODO: see comment under CalcAverageResponse() note that 0 ...