Logo ROOT   6.12/07
Reference Guide
RuleFitParams.cxx
Go to the documentation of this file.
1 // @(#)root/tmva $Id$
2 // Author: Andreas Hoecker, Joerg Stelzer, Fredrik Tegenfeldt, Helge Voss
3 
4 /**********************************************************************************
5  * Project: TMVA - a Root-integrated toolkit for multivariate data analysis *
6  * Package: TMVA *
7  * Class : RuleFitParams *
8  * Web : http://tmva.sourceforge.net *
9  * *
10  * Description: *
11  * Implementation *
12  * *
13  * Authors (alphabetical): *
14  * Fredrik Tegenfeldt <Fredrik.Tegenfeldt@cern.ch> - Iowa State U., USA *
15  * Helge Voss <Helge.Voss@cern.ch> - MPI-KP Heidelberg, Ger. *
16  * *
17  * Copyright (c) 2005: *
18  * CERN, Switzerland *
19  * Iowa State U. *
20  * MPI-K Heidelberg, Germany *
21  * *
22  * Redistribution and use in source and binary forms, with or without *
23  * modification, are permitted according to the terms listed in LICENSE *
24  * (http://tmva.sourceforge.net/LICENSE) *
25  **********************************************************************************/
26 
27 /*! \class TMVA::RuleFitParams
28 \ingroup TMVA
29 A class doing the actual fitting of a linear model using rules as base functions.
30 */
31 #include "TMVA/RuleFitParams.h"
32 
33 #include "TMVA/DataSetInfo.h"
34 #include "TMVA/MethodRuleFit.h"
35 #include "TMVA/MsgLogger.h"
36 #include "TMVA/RuleEnsemble.h"
37 #include "TMVA/RuleFit.h"
38 #include "TMVA/Timer.h"
39 #include "TMVA/Tools.h"
40 #include "TMVA/Types.h"
41 
42 #include "TTree.h"
43 #include "TMath.h"
44 
45 #include <iostream>
46 #include <iomanip>
47 #include <numeric>
48 #include <algorithm>
49 #include <functional>
50 
53 
61 
62 ////////////////////////////////////////////////////////////////////////////////
63 /// constructor
64 
66  : fRuleFit ( 0 )
67  , fRuleEnsemble ( 0 )
68  , fNRules ( 0 )
69  , fNLinear ( 0 )
70  , fPathIdx1 ( 0 )
71  , fPathIdx2 ( 0 )
72  , fPerfIdx1 ( 0 )
73  , fPerfIdx2 ( 0 )
74  , fGDNTauTstOK( 0 )
75  , fGDNTau ( 51 )
76  , fGDTauPrec ( 0.02 )
77  , fGDTauScan ( 1000 )
78  , fGDTauMin ( 0.0 )
79  , fGDTauMax ( 1.0 )
80  , fGDTau ( -1.0 )
81  , fGDPathStep ( 0.01 )
82  , fGDNPathSteps ( 1000 )
83  , fGDErrScale ( 1.1 )
84  , fAverageTruth( 0 )
85  , fFstarMedian ( 0 )
86  , fGDNtuple ( 0 )
87  , fNTRisk ( 0 )
88  , fNTErrorRate ( 0 )
89  , fNTNuval ( 0 )
90  , fNTCoefRad ( 0 )
91  , fNTOffset ( 0 )
92  , fNTCoeff ( 0 )
93  , fNTLinCoeff ( 0 )
94  , fsigave( 0 )
95  , fsigrms( 0 )
96  , fbkgave( 0 )
97  , fbkgrms( 0 )
98  , fLogger( new MsgLogger("RuleFit") )
99 {
100  Init();
101 }
102 ////////////////////////////////////////////////////////////////////////////////
103 /// destructor
104 
106 {
107  if (fNTCoeff) { delete fNTCoeff; fNTCoeff = 0; }
108  if (fNTLinCoeff) { delete fNTLinCoeff;fNTLinCoeff = 0; }
109  delete fLogger;
110 }
111 
112 ////////////////////////////////////////////////////////////////////////////////
113 /// Initializes all parameters using the RuleEnsemble and the training tree
114 
116 {
117  if (fRuleFit==0) return;
118  if (fRuleFit->GetMethodRuleFit()==0) {
119  Log() << kFATAL << "RuleFitParams::Init() - MethodRuleFit ptr is null" << Endl;
120  }
121  UInt_t neve = fRuleFit->GetTrainingEvents().size();
122  //
126 
127  //
128  // Fraction of events used for validation should be close of unity..
129  // Always selection from the END
130  //
131  UInt_t ofs;
132  fPerfIdx1 = 0;
133  if (neve>1) {
134  fPerfIdx2 = static_cast<UInt_t>((neve-1)*fRuleFit->GetMethodRuleFit()->GetGDValidEveFrac());
135  }
136  else {
137  fPerfIdx2 = 0;
138  }
139  ofs = neve - fPerfIdx2 - 1;
140  fPerfIdx1 += ofs;
141  fPerfIdx2 += ofs;
142  //
143  // Fraction of events used for the path search can be allowed to be a smaller value, say 0.5
144  // Alwas select events from the BEGINNING.
145  // This means that the validation and search samples will not overlap if both fractions are <0.5.
146  //
147  fPathIdx1 = 0;
148  if (neve>1) {
149  fPathIdx2 = static_cast<UInt_t>((neve-1)*fRuleFit->GetMethodRuleFit()->GetGDPathEveFrac());
150  }
151  else {
152  fPathIdx2 = 0;
153  }
154  //
155  // summarize weights
156  //
157  fNEveEffPath = 0;;
158  for (UInt_t ie=fPathIdx1; ie<fPathIdx2+1; ie++) {
160  }
161 
162  fNEveEffPerf=0;
163  for (UInt_t ie=fPerfIdx1; ie<fPerfIdx2+1; ie++) {
165  }
166  //
167  Log() << kVERBOSE << "Path constr. - event index range = [ " << fPathIdx1 << ", " << fPathIdx2 << " ]"
168  << ", effective N(events) = " << fNEveEffPath << Endl;
169  Log() << kVERBOSE << "Error estim. - event index range = [ " << fPerfIdx1 << ", " << fPerfIdx2 << " ]"
170  << ", effective N(events) = " << fNEveEffPerf << Endl;
171  //
172  if (fRuleEnsemble->DoRules())
173  Log() << kDEBUG << "Number of rules in ensemble: " << fNRules << Endl;
174  else
175  Log() << kDEBUG << "Rules are disabled " << Endl;
176 
177  if (fRuleEnsemble->DoLinear())
178  Log() << kDEBUG << "Number of linear terms: " << fNLinear << Endl;
179  else
180  Log() << kDEBUG << "Linear terms are disabled " << Endl;
181 }
182 
183 ////////////////////////////////////////////////////////////////////////////////
184 /// initializes the ntuple
185 
187 {
188  fGDNtuple= new TTree("MonitorNtuple_RuleFitParams","RuleFit path search");
189  fGDNtuple->Branch("risk", &fNTRisk, "risk/D");
190  fGDNtuple->Branch("error", &fNTErrorRate,"error/D");
191  fGDNtuple->Branch("nuval", &fNTNuval, "nuval/D");
192  fGDNtuple->Branch("coefrad", &fNTCoefRad, "coefrad/D");
193  fGDNtuple->Branch("offset", &fNTOffset, "offset/D");
194  //
195  fNTCoeff = (fNRules >0 ? new Double_t[fNRules] : 0);
196  fNTLinCoeff = (fNLinear>0 ? new Double_t[fNLinear] : 0);
197 
198  for (UInt_t i=0; i<fNRules; i++) {
199  fGDNtuple->Branch(Form("a%d",i+1),&fNTCoeff[i],Form("a%d/D",i+1));
200  }
201  for (UInt_t i=0; i<fNLinear; i++) {
202  fGDNtuple->Branch(Form("b%d",i+1),&fNTLinCoeff[i],Form("b%d/D",i+1));
203  }
204 }
205 
206 ////////////////////////////////////////////////////////////////////////////////
207 /// evaluate the average of each variable and f(x) in the given range
208 
210  std::vector<Double_t> &avsel,
211  std::vector<Double_t> &avrul )
212 {
213  UInt_t neve = ind2-ind1+1;
214  if (neve<1) {
215  Log() << kFATAL << "<EvaluateAverage> - no events selected for path search -> BUG!" << Endl;
216  }
217  //
218  avsel.clear();
219  avrul.clear();
220  //
221  if (fNLinear>0) avsel.resize(fNLinear,0);
222  if (fNRules>0) avrul.resize(fNRules,0);
223  const std::vector<UInt_t> *eventRuleMap=0;
224  Double_t ew;
225  Double_t sumew=0;
226  //
227  // Loop over events and calculate average of linear terms (normalised) and rule response.
228  //
229  if (fRuleEnsemble->IsRuleMapOK()) { // MakeRuleMap() has been called
230  for ( UInt_t i=ind1; i<ind2+1; i++) {
232  sumew += ew;
233  for ( UInt_t sel=0; sel<fNLinear; sel++ ) {
234  avsel[sel] += ew*fRuleEnsemble->EvalLinEvent(i,sel);
235  }
236  // loop over rules
237  UInt_t nrules=0;
238  if (fRuleEnsemble->DoRules()) {
239  eventRuleMap = &(fRuleEnsemble->GetEventRuleMap(i));
240  nrules = (*eventRuleMap).size();
241  }
242  for (UInt_t r=0; r<nrules; r++) {
243  avrul[(*eventRuleMap)[r]] += ew;
244  }
245  }
246  }
247  else { // MakeRuleMap() has not yet been called
248  const std::vector<const Event *> *events = &(fRuleFit->GetTrainingEvents());
249  for ( UInt_t i=ind1; i<ind2+1; i++) {
251  sumew += ew;
252  // first cache rule/lin response
253  /* Double_t val = */ fRuleEnsemble->EvalLinEvent(*((*events)[i]));
254  /* val = */ fRuleEnsemble->EvalEvent(*((*events)[i]));
255  // loop over linear terms
256  for ( UInt_t sel=0; sel<fNLinear; sel++ ) {
257  avsel[sel] += ew*fRuleEnsemble->GetEventLinearValNorm(sel);
258  }
259  // loop over rules
260  for (UInt_t r=0; r<fNRules; r++) {
261  avrul[r] += ew*fRuleEnsemble->GetEventRuleVal(r);
262  }
263  }
264  }
265  // average variable
266  for ( UInt_t sel=0; sel<fNLinear; sel++ ) {
267  avsel[sel] = avsel[sel] / sumew;
268  }
269  // average rule response, excl coeff
270  for (UInt_t r=0; r<fNRules; r++) {
271  avrul[r] = avrul[r] / sumew;
272  }
273 }
274 
275 ////////////////////////////////////////////////////////////////////////////////
276 /// Implementation of squared-error ramp loss function (eq 39,40 in ref 1)
277 /// This is used for binary Classifications where y = {+1,-1} for (sig,bkg)
278 
280 {
281  Double_t h = TMath::Max( -1.0, TMath::Min(1.0,fRuleEnsemble->EvalEvent( e )) );
282  Double_t diff = (fRuleFit->GetMethodRuleFit()->DataInfo().IsSignal(&e)?1:-1) - h;
283  //
284  return diff*diff*e.GetWeight();
285 }
286 
287 ////////////////////////////////////////////////////////////////////////////////
288 /// Implementation of squared-error ramp loss function (eq 39,40 in ref 1)
289 /// This is used for binary Classifications where y = {+1,-1} for (sig,bkg)
290 
292 {
293  Double_t h = TMath::Max( -1.0, TMath::Min(1.0,fRuleEnsemble->EvalEvent( evtidx )) );
295  //
296  return diff*diff*fRuleFit->GetTrainingEventWeight(evtidx);
297 }
298 
299 ////////////////////////////////////////////////////////////////////////////////
300 /// Implementation of squared-error ramp loss function (eq 39,40 in ref 1)
301 /// This is used for binary Classifications where y = {+1,-1} for (sig,bkg)
302 
304 {
305  Double_t e = fRuleEnsemble->EvalEvent( evtidx , fGDOfsTst[itau], fGDCoefTst[itau], fGDCoefLinTst[itau]);
306  Double_t h = TMath::Max( -1.0, TMath::Min(1.0,e) );
308  //
309  return diff*diff*fRuleFit->GetTrainingEventWeight(evtidx);
310 }
311 
312 ////////////////////////////////////////////////////////////////////////////////
313 /// risk assessment
314 
316 {
317  UInt_t neve = ind2-ind1+1;
318  if (neve<1) {
319  Log() << kFATAL << "<Risk> Invalid start/end indices! BUG!!!" << Endl;
320  }
321  Double_t rval=0;
322  //
323  // const std::vector<Event *> *events = &(fRuleFit->GetTrainingEvents());
324  for ( UInt_t i=ind1; i<ind2+1; i++) {
325  rval += LossFunction(i);
326  }
327  rval = rval/neff;
328 
329  return rval;
330 }
331 
332 ////////////////////////////////////////////////////////////////////////////////
333 /// risk assessment for tau model <itau>
334 
336 {
337  UInt_t neve = ind2-ind1+1;
338  if (neve<1) {
339  Log() << kFATAL << "<Risk> Invalid start/end indices! BUG!!!" << Endl;
340  }
341  Double_t rval=0;
342  //
343  // const std::vector<Event *> *events = &(fRuleFit->GetTrainingEvents());
344  for ( UInt_t i=ind1; i<ind2+1; i++) {
345  rval += LossFunction(i,itau);
346  }
347  rval = rval/neff;
348 
349  return rval;
350 }
351 
352 ////////////////////////////////////////////////////////////////////////////////
353 /// This is the "lasso" penalty
354 /// To be used for regression.
355 /// --- NOT USED ---
356 
358 {
359  Log() << kWARNING << "<Penalty> Using unverified code! Check!" << Endl;
360  Double_t rval=0;
361  const std::vector<Double_t> *lincoeff = & (fRuleEnsemble->GetLinCoefficients());
362  for (UInt_t i=0; i<fNRules; i++) {
363  rval += TMath::Abs(fRuleEnsemble->GetRules(i)->GetCoefficient());
364  }
365  for (UInt_t i=0; i<fNLinear; i++) {
366  rval += TMath::Abs((*lincoeff)[i]);
367  }
368  return rval;
369 }
370 
371 ////////////////////////////////////////////////////////////////////////////////
372 /// Initialize GD path search
373 
375 {
376  if (fGDNTau<2) {
377  fGDNTau = 1;
378  fGDTauScan = 0;
379  }
380  if (fGDTau<0.0) {
381  // fGDNTau = 50; already set in MethodRuleFit
382  fGDTauScan = 1000;
383  fGDTauMin = 0.0;
384  fGDTauMax = 1.0;
385  }
386  else {
387  fGDNTau = 1;
388  fGDTauScan = 0;
389  }
390  // set all taus
391  fGDTauVec.clear();
392  fGDTauVec.resize( fGDNTau );
393  if (fGDNTau==1) {
394  fGDTauVec[0] = fGDTau;
395  }
396  else {
397  // set tau vector - TODO: make a smarter choice of range in tau
398  Double_t dtau = (fGDTauMax - fGDTauMin)/static_cast<Double_t>(fGDNTau-1);
399  for (UInt_t itau=0; itau<fGDNTau; itau++) {
400  fGDTauVec[itau] = static_cast<Double_t>(itau)*dtau + fGDTauMin;
401  if (fGDTauVec[itau]>1.0) fGDTauVec[itau]=1.0;
402  }
403  }
404  // initialize path search vectors
405 
406  fGradVec.clear();
407  fGradVecLin.clear();
408  fGradVecTst.clear();
409  fGradVecLinTst.clear();
410  fGDErrTst.clear();
411  fGDErrTstOK.clear();
412  fGDOfsTst.clear();
413  fGDCoefTst.clear();
414  fGDCoefLinTst.clear();
415  //
416  // rules
417  //
418  fGDCoefTst.resize(fGDNTau);
419  fGradVec.resize(fNRules,0);
420  fGradVecTst.resize(fGDNTau);
421  for (UInt_t i=0; i<fGDNTau; i++) {
422  fGradVecTst[i].resize(fNRules,0);
423  fGDCoefTst[i].resize(fNRules,0);
424  }
425  //
426  // linear terms
427  //
428  fGDCoefLinTst.resize(fGDNTau);
429  fGradVecLin.resize(fNLinear,0);
430  fGradVecLinTst.resize(fGDNTau);
431  for (UInt_t i=0; i<fGDNTau; i++) {
432  fGradVecLinTst[i].resize(fNLinear,0);
433  fGDCoefLinTst[i].resize(fNLinear,0);
434  }
435  //
436  // error, coefs etc
437  //
438  fGDErrTst.resize(fGDNTau,0);
439  fGDErrTstOK.resize(fGDNTau,kTRUE);
440  fGDOfsTst.resize(fGDNTau,0);
442  //
443  // calculate average selectors and rule responses for the path sample size
444  //
445 }
446 
447 ////////////////////////////////////////////////////////////////////////////////
448 /// This finds the cutoff parameter tau by scanning several different paths
449 
451 {
452  if (fGDNTau<2) return 0;
453  if (fGDTauScan==0) return 0;
454 
455  if (fGDOfsTst.size()<1)
456  Log() << kFATAL << "BUG! FindGDTau() has been called BEFORE InitGD()." << Endl;
457  //
458  Log() << kINFO << "Estimating the cutoff parameter tau. The estimated time is a pessimistic maximum." << Endl;
459  //
460  // Find how many points to scan and how often to calculate the error
461  UInt_t nscan = fGDTauScan; //std::min(static_cast<Int_t>(fGDTauScan),fGDNPathSteps);
462  UInt_t netst = std::min(nscan,UInt_t(100));
463  UInt_t nscanned=0;
464  //
465  //--------------------
466  // loop over the paths
467  //--------------------
468  // The number of MAXIMUM loops is given by nscan.
469  // At each loop, the paths being far away from the minimum
470  // are rejected. Hence at each check (every netst events), the number
471  // of paths searched will be reduced.
472  // The maximum 'distance' from the minimum error rate is
473  // 1 sigma. See RiskPerfTst() for details.
474  //
475  Bool_t doloop=kTRUE;
476  UInt_t ip=0;
477  UInt_t itauMin=0;
478  Timer timer( nscan, "RuleFit" );
479  while (doloop) {
480  // make gradvec
482  // update coefs
484  // estimate error and do the sum
485  // do this at index=0, netst-1, 2*netst-1 ...
486  nscanned++;
487  if ( (ip==0) || ((ip+1)%netst==0) ) {
488  // ErrorRateRocTst( );
489  itauMin = RiskPerfTst();
490  Log() << kVERBOSE << Form("%4d",ip+1) << ". tau = " << Form("%4.4f",fGDTauVec[itauMin])
491  << " => error rate = " << fGDErrTst[itauMin] << Endl;
492  }
493  ip++;
494  doloop = ((ip<nscan) && (fGDNTauTstOK>3));
496  if (Log().GetMinType()>kVERBOSE)
497  timer.DrawProgressBar(ip);
498  }
499  //
500  // Set tau and coefs
501  // Downscale tau slightly in order to avoid numerical problems
502  //
503  if (nscanned==0) {
504  Log() << kERROR << "<FindGDTau> number of scanned loops is zero! Should NOT see this message." << Endl;
505  }
506  fGDTau = fGDTauVec[itauMin];
509  fRuleEnsemble->SetOffset( fGDOfsTst[itauMin] );
510  Log() << kINFO << "Best path found with tau = " << Form("%4.4f",fGDTau)
511  << " after " << timer.GetElapsedTime() << " " << Endl;
512 
513  return nscan;
514 }
515 
516 ////////////////////////////////////////////////////////////////////////////////
517 /// The following finds the gradient directed path in parameter space.
518 /// More work is needed... FT, 24/9/2006
519 ///
520 /// The algorithm is currently as follows (if not otherwise stated, the sample
521 /// used below is [fPathIdx1,fPathIdx2]):
522 ///
523 /// 1. Set offset to -average(y(true)) and all coefs=0 => average of F(x)==0
524 /// 2. FindGDTau() : start scanning using several paths defined by different tau
525 /// choose the tau yielding the best path
526 /// 3. start the scanning the chosen path
527 /// 4. check error rate at a given frequency
528 /// data used for check: [fPerfIdx1,fPerfIdx2]
529 /// 5. stop when either of the following conditions are fullfilled:
530 /// 1. loop index==fGDNPathSteps
531 /// 2. error > fGDErrScale*errmin
532 /// 3. only in DEBUG mode: risk is not monotonously decreasing
533 ///
534 /// The algorithm will warn if:
535 /// 1. the error rate was still decreasing when loop finished -> increase fGDNPathSteps!
536 /// 2. minimum was found at an early stage -> decrease fGDPathStep
537 /// 3. DEBUG: risk > previous risk -> entered chaotic region (regularization is too small)
538 
540 {
541  Log() << kINFO << "GD path scan - the scan stops when the max num. of steps is reached or a min is found"
542  << Endl;
543  Log() << kVERBOSE << "Number of events used per path step = " << fPathIdx2-fPathIdx1+1 << Endl;
544  Log() << kVERBOSE << "Number of events used for error estimation = " << fPerfIdx2-fPerfIdx1+1 << Endl;
545 
546  // check if debug mode
547  const Bool_t isVerbose = (Log().GetMinType()<=kVERBOSE);
548  const Bool_t isDebug = (Log().GetMinType()<=kDEBUG);
549 
550  // init GD parameters and clear coeff vectors
551  InitGD();
552 
553  // evaluate average response of rules/linear terms (with event weights)
556 
557  // initial estimate; all other a(i) are zero
558  Log() << kVERBOSE << "Creating GD path" << Endl;
559  Log() << kVERBOSE << " N(steps) = " << fGDNPathSteps << Endl;
560  Log() << kVERBOSE << " step = " << fGDPathStep << Endl;
561  Log() << kVERBOSE << " N(tau) = " << fGDNTau << Endl;
562  Log() << kVERBOSE << " N(tau steps) = " << fGDTauScan << Endl;
563  Log() << kVERBOSE << " tau range = [ " << fGDTauVec[0] << " , " << fGDTauVec[fGDNTau-1] << " ]" << Endl;
564 
565  // init ntuple
566  if (isDebug) InitNtuple();
567 
568  // DEBUG: risk scan
569  Int_t nbadrisk=0; // number of points where risk(i+1)>risk(i)
570  Double_t trisk=0; // time per risk evaluation
571  Double_t strisk=0; // total time
572  Double_t rprev=1e32; // previous risk
573 
574  // parameters set at point with min error
575  Double_t errmin=1e32; // min error
576  // Double_t riskMin=0; // risk
577  Int_t indMin=-1; // index
578  std::vector<Double_t> coefsMin; // rule coefs
579  std::vector<Double_t> lincoefsMin; // linear coefs
580  Double_t offsetMin; // offset
581 
582 
583  // DEBUG: timing
584  clock_t t0=0;
585  Double_t tgradvec;
586  Double_t tupgrade;
587  Double_t tperf;
588  Double_t stgradvec=0;
589  Double_t stupgrade=0;
590  Double_t stperf=0;
591 
592  // linear regression to estimate slope of error rate evolution
593  const UInt_t npreg=5;
594  std::vector<Double_t> valx;
595  std::vector<Double_t> valy;
596  std::vector<Double_t> valxy;
597 
598  // loop related
599  Bool_t docheck; // true if an error rate check is to be done
600  Int_t iloop=0; // loop index
601  Bool_t found=kFALSE; // true if minimum is found
602  Bool_t riskFlat=kFALSE; // DEBUG: flag is true if risk evolution behaves badly
603  Bool_t done = kFALSE; // flag that the scan is done
604 
605  // calculate how often to check error rate
606  int imod = fGDNPathSteps/100;
607  if (imod<100) imod = std::min(100,fGDNPathSteps);
608  if (imod>100) imod=100;
609 
610  // reset coefficients
612  offsetMin = fAverageTruth;
613  fRuleEnsemble->SetOffset(offsetMin);
616  for (UInt_t i=0; i<fGDOfsTst.size(); i++) {
617  fGDOfsTst[i] = offsetMin;
618  }
619  Log() << kVERBOSE << "Obtained initial offset = " << offsetMin << Endl;
620 
621  // find the best tau - returns the number of steps performed in scan
622  Int_t nprescan = FindGDTau();
623 
624  //
625  //
626  // calculate F*
627  //
628  // CalcFStar(fPerfIdx1, fPerfIdx2);
629  //
630 
631  // set some ntuple values
632  fNTRisk = rprev;
633  fNTCoefRad = -1.0;
634  fNTErrorRate = 0;
635 
636  // a local flag indicating for what reason the search was stopped
637  Int_t stopCondition=0;
638 
639  Log() << kINFO << "Fitting model..." << Endl;
640  // start loop with timer
641  Timer timer( fGDNPathSteps, "RuleFit" );
642  Log() << kWARNING;
643  while (!done) {
644  // Make gradient vector (eq 44, ref 1)
645  if (isVerbose) t0 = clock();
647  if (isVerbose) {
648  tgradvec = Double_t(clock()-t0)/CLOCKS_PER_SEC;
649  stgradvec += tgradvec;
650  }
651 
652  // Calculate the direction in parameter space (eq 25, ref 1) and update coeffs (eq 22, ref 1)
653  if (isVerbose) t0 = clock();
655  if (isVerbose) {
656  tupgrade = Double_t(clock()-t0)/CLOCKS_PER_SEC;
657  stupgrade += tupgrade;
658  }
659 
660  // don't check error rate every loop
661  docheck = ((iloop==0) ||((iloop+1)%imod==0));
662 
663  if (docheck) {
664  // draw progressbar only if not debug
665  if (!isVerbose)
666  timer.DrawProgressBar(iloop);
667  fNTNuval = Double_t(iloop)*fGDPathStep;
668  fNTRisk = 0.0;
669 
670  // check risk evolution
671 
672  if (isDebug) FillCoefficients();
674 
675  // calculate risk
676  t0 = clock();
677  fNTRisk = RiskPath();
678  trisk = Double_t(clock()-t0)/CLOCKS_PER_SEC;
679  strisk += trisk;
680  //
681  // Check for an increase in risk.
682  // Such an increase would imply that the regularization is too small.
683  // Stop the iteration if this happens.
684  //
685  if (fNTRisk>=rprev) {
686  if (fNTRisk>rprev) {
687  nbadrisk++;
688  Log() << "Risk(i+1)>=Risk(i) in path" << Endl;
689  riskFlat=(nbadrisk>3);
690  if (riskFlat) {
691  Log() << kWARNING << "Chaotic behaviour of risk evolution" << Endl;
692  Log() << "--- STOPPING MINIMISATION ---" << Endl;
693  Log() << "This may be OK if minimum is already found" << Endl;
694  }
695  }
696  }
697  rprev = fNTRisk;
698  //
699  // Estimate the error rate using cross validation
700  // Well, not quite full cross validation since we only
701  // use ONE model.
702  //
703  if (isVerbose) t0 = clock();
704  fNTErrorRate = 0;
705 
706  // Check error rate
707  Double_t errroc;//= ErrorRateRoc();
708  Double_t riskPerf = RiskPerf();
709  // Double_t optimism = Optimism();
710  //
711  errroc = riskPerf;
712  //
713  fNTErrorRate = errroc;
714  //
715  if (isVerbose) {
716  tperf = Double_t(clock()-t0)/CLOCKS_PER_SEC;
717  stperf +=tperf;
718  }
719  //
720  // Always take the last min.
721  // For each step the risk is reduced.
722  //
723  if (fNTErrorRate<=errmin) {
724  errmin = fNTErrorRate;
725  // riskMin = fNTRisk;
726  indMin = iloop;
727  fRuleEnsemble->GetCoefficients(coefsMin);
728  lincoefsMin = fRuleEnsemble->GetLinCoefficients();
729  offsetMin = fRuleEnsemble->GetOffset();
730  }
731  if ( fNTErrorRate > fGDErrScale*errmin) found = kTRUE;
732  //
733  // check slope of last couple of points
734  //
735  if (valx.size()==npreg) {
736  valx.erase(valx.begin());
737  valy.erase(valy.begin());
738  valxy.erase(valxy.begin());
739  }
740  valx.push_back(fNTNuval);
741  valy.push_back(fNTErrorRate);
742  valxy.push_back(fNTErrorRate*fNTNuval);
743 
745 
746  //
747  if (isDebug) fGDNtuple->Fill();
748  if (isVerbose) {
749  Log() << kVERBOSE << "ParamsIRE : "
750  << std::setw(10)
751  << Form("%8d",iloop+1) << " "
752  << Form("%4.4f",fNTRisk) << " "
753  << Form("%4.4f",riskPerf) << " "
754  << Form("%4.4f",fNTRisk+riskPerf) << " "
755  // << Form("%4.4f",fsigave+fbkgave) << " "
756  // << Form("%4.4f",fsigave) << " "
757  // << Form("%4.4f",fsigrms) << " "
758  // << Form("%4.4f",fbkgave) << " "
759  // << Form("%4.4f",fbkgrms) << " "
760 
761  // << Form("%4.4f",fRuleEnsemble->CoefficientRadius())
762  << Endl;
763  }
764  }
765  iloop++;
766  // Stop iteration under various conditions
767  // * The condition R(i+1)<R(i) is no longer true (when then implicit regularization is too weak)
768  // * If the current error estimate is > factor*errmin (factor = 1.1)
769  // * We have reach the last step...
770  Bool_t endOfLoop = (iloop==fGDNPathSteps);
771  if ( ((riskFlat) || (endOfLoop)) && (!found) ) {
772  if (riskFlat) {
773  stopCondition = 1;
774  }
775  else if (endOfLoop) {
776  stopCondition = 2;
777  }
778  if (indMin<0) {
779  Log() << kWARNING << "BUG TRAP: should not be here - still, this bug is harmless;)" << Endl;
780  errmin = fNTErrorRate;
781  // riskMin = fNTRisk;
782  indMin = iloop;
783  fRuleEnsemble->GetCoefficients(coefsMin);
784  lincoefsMin = fRuleEnsemble->GetLinCoefficients();
785  offsetMin = fRuleEnsemble->GetOffset();
786  }
787  found = kTRUE;
788  }
789  done = (found);
790  }
791  Log() << Endl;
792  Log() << kINFO << "Minimisation elapsed time : " << timer.GetElapsedTime() << " " << Endl;
793  Log() << kINFO << "----------------------------------------------------------------" << Endl;
794  Log() << kINFO << "Found minimum at step " << indMin+1 << " with error = " << errmin << Endl;
795  Log() << kINFO << "Reason for ending loop: ";
796  switch (stopCondition) {
797  case 0:
798  Log() << kINFO << "clear minima found";
799  break;
800  case 1:
801  Log() << kINFO << "chaotic behaviour of risk";
802  break;
803  case 2:
804  Log() << kINFO << "end of loop reached";
805  break;
806  default:
807  Log() << kINFO << "unknown!";
808  break;
809  }
810  Log() << Endl;
811  Log() << kINFO << "----------------------------------------------------------------" << Endl;
812 
813  // check if early minima - might be an indication of too large stepsize
814  if ( Double_t(indMin)/Double_t(nprescan+fGDNPathSteps) < 0.05 ) {
815  Log() << kWARNING << "Reached minimum early in the search" << Endl;
816  Log() << "Check results and maybe decrease GDStep size" << Endl;
817  }
818  //
819  // quick check of the sign of the slope for the last npreg points
820  //
821  Double_t sumx = std::accumulate( valx.begin(), valx.end(), Double_t() );
822  Double_t sumxy = std::accumulate( valxy.begin(), valxy.end(), Double_t() );
823  Double_t sumy = std::accumulate( valy.begin(), valy.end(), Double_t() );
824  Double_t slope = Double_t(valx.size())*sumxy - sumx*sumy;
825  if (slope<0) {
826  Log() << kINFO << "The error rate was still decreasing at the end of the path" << Endl;
827  Log() << kINFO << "Increase number of steps (GDNSteps)." << Endl;
828  }
829  //
830  // set coefficients
831  //
832  if (found) {
833  fRuleEnsemble->SetCoefficients( coefsMin );
834  fRuleEnsemble->SetLinCoefficients( lincoefsMin );
835  fRuleEnsemble->SetOffset( offsetMin );
836  }
837  else {
838  Log() << kFATAL << "BUG TRAP: minimum not found in MakeGDPath()" << Endl;
839  }
840 
841  //
842  // print timing info (VERBOSE mode)
843  //
844  if (isVerbose) {
845  Double_t stloop = strisk +stupgrade + stgradvec + stperf;
846  Log() << kVERBOSE << "Timing per loop (ms):" << Endl;
847  Log() << kVERBOSE << " gradvec = " << 1000*stgradvec/iloop << Endl;
848  Log() << kVERBOSE << " upgrade = " << 1000*stupgrade/iloop << Endl;
849  Log() << kVERBOSE << " risk = " << 1000*strisk/iloop << Endl;
850  Log() << kVERBOSE << " perf = " << 1000*stperf/iloop << Endl;
851  Log() << kVERBOSE << " loop = " << 1000*stloop/iloop << Endl;
852  //
853  Log() << kVERBOSE << " GDInit = " << 1000*gGDInit/iloop << Endl;
854  Log() << kVERBOSE << " GDPtr = " << 1000*gGDPtr/iloop << Endl;
855  Log() << kVERBOSE << " GDEval = " << 1000*gGDEval/iloop << Endl;
856  Log() << kVERBOSE << " GDEvalRule = " << 1000*gGDEvalRule/iloop << Endl;
857  Log() << kVERBOSE << " GDNorm = " << 1000*gGDNorm/iloop << Endl;
858  Log() << kVERBOSE << " GDRuleLoop = " << 1000*gGDRuleLoop/iloop << Endl;
859  Log() << kVERBOSE << " GDLinLoop = " << 1000*gGDLinLoop/iloop << Endl;
860  }
861  //
862  // write ntuple (DEBUG)
863  if (isDebug) fGDNtuple->Write();
864 }
865 
866 ////////////////////////////////////////////////////////////////////////////////
867 /// helper function to store the rule coefficients in local arrays
868 
870 {
872  //
873  for (UInt_t i=0; i<fNRules; i++) {
874  fNTCoeff[i] = fRuleEnsemble->GetRules(i)->GetCoefficient();
875  }
876  for (UInt_t i=0; i<fNLinear; i++) {
878  }
879 }
880 
881 ////////////////////////////////////////////////////////////////////////////////
882 /// Estimates F* (optimum scoring function) for all events for the given sets.
883 /// The result is used in ErrorRateReg().
884 /// --- NOT USED ---
885 
887 {
888  Log() << kWARNING << "<CalcFStar> Using unverified code! Check!" << Endl;
889  UInt_t neve = fPerfIdx2-fPerfIdx1+1;
890  if (neve<1) {
891  Log() << kFATAL << "<CalcFStar> Invalid start/end indices!" << Endl;
892  return;
893  }
894  //
895  const std::vector<const Event *> *events = &(fRuleFit->GetTrainingEvents());
896  //
897  fFstar.clear();
898  std::vector<Double_t> fstarSorted;
899  Double_t fstarVal;
900  // loop over all events and estimate F* for each event
901  for (UInt_t i=fPerfIdx1; i<fPerfIdx2+1; i++) {
902  const Event& e = *(*events)[i];
903  fstarVal = fRuleEnsemble->FStar(e);
904  fFstar.push_back(fstarVal);
905  fstarSorted.push_back(fstarVal);
906  if (TMath::IsNaN(fstarVal)) Log() << kFATAL << "F* is NAN!" << Endl;
907  }
908  // sort F* and find median
909  std::sort( fstarSorted.begin(), fstarSorted.end() );
910  UInt_t ind = neve/2;
911  if (neve&1) { // odd number of events
912  fFstarMedian = 0.5*(fstarSorted[ind]+fstarSorted[ind-1]);
913  }
914  else { // even
915  fFstarMedian = fstarSorted[ind];
916  }
917 }
918 
919 ////////////////////////////////////////////////////////////////////////////////
920 /// implementation of eq. 7.17 in Hastie,Tibshirani & Friedman book
921 /// this is the covariance between the estimated response yhat and the
922 /// true value y.
923 /// NOT REALLY SURE IF THIS IS CORRECT!
924 /// --- THIS IS NOT USED ---
925 
927 {
928  Log() << kWARNING << "<Optimism> Using unverified code! Check!" << Endl;
929  UInt_t neve = fPerfIdx2-fPerfIdx1+1;
930  if (neve<1) {
931  Log() << kFATAL << "<Optimism> Invalid start/end indices!" << Endl;
932  }
933  //
934  const std::vector<const Event *> *events = &(fRuleFit->GetTrainingEvents());
935  //
936  Double_t sumy=0;
937  Double_t sumyhat=0;
938  Double_t sumyhaty=0;
939  Double_t sumw2=0;
940  Double_t sumw=0;
941  Double_t yhat;
942  Double_t y;
943  Double_t w;
944  //
945  for (UInt_t i=fPerfIdx1; i<fPerfIdx2+1; i++) {
946  const Event& e = *(*events)[i];
947  yhat = fRuleEnsemble->EvalEvent(i); // evaluated using the model
948  y = (fRuleFit->GetMethodRuleFit()->DataInfo().IsSignal(&e) ? 1.0:-1.0); // the truth
949  w = fRuleFit->GetTrainingEventWeight(i)/fNEveEffPerf; // the weight, reweighted such that sum=1
950  sumy += w*y;
951  sumyhat += w*yhat;
952  sumyhaty += w*yhat*y;
953  sumw2 += w*w;
954  sumw += w;
955  }
956  Double_t div = 1.0-sumw2;
957  Double_t cov = sumyhaty - sumyhat*sumy;
958  return 2.0*cov/div;
959 }
960 
961 ////////////////////////////////////////////////////////////////////////////////
962 /// Estimates the error rate with the current set of parameters
963 /// This code is pretty messy at the moment.
964 /// Cleanup is needed.
965 /// -- NOT USED ---
966 
968 {
969  Log() << kWARNING << "<ErrorRateReg> Using unverified code! Check!" << Endl;
970  UInt_t neve = fPerfIdx2-fPerfIdx1+1;
971  if (neve<1) {
972  Log() << kFATAL << "<ErrorRateReg> Invalid start/end indices!" << Endl;
973  }
974  if (fFstar.size()!=neve) {
975  Log() << kFATAL << "--- RuleFitParams::ErrorRateReg() - F* not initialized! BUG!!!"
976  << " Fstar.size() = " << fFstar.size() << " , N(events) = " << neve << Endl;
977  }
978  //
979  Double_t sF;
980  //
981  const std::vector<const Event *> *events = &(fRuleFit->GetTrainingEvents());
982  //
983  Double_t sumdf = 0;
984  Double_t sumdfmed = 0;
985  //
986  // A bit messy here.
987  // I believe the binary error classification is appropriate here.
988  // The problem is stability.
989  //
990  for (UInt_t i=fPerfIdx1; i<fPerfIdx2+1; i++) {
991  const Event& e = *(*events)[i];
992  sF = fRuleEnsemble->EvalEvent( e );
993  // scaled abs error, eq 20 in RuleFit paper
994  sumdf += TMath::Abs(fFstar[i-fPerfIdx1] - sF);
995  sumdfmed += TMath::Abs(fFstar[i-fPerfIdx1] - fFstarMedian);
996  }
997  // scaled abs error, eq 20
998  // This error (df) is large - need to think on how to compensate...
999  //
1000  return sumdf/sumdfmed;
1001 }
1002 
1003 ////////////////////////////////////////////////////////////////////////////////
1004 ///
1005 /// Estimates the error rate with the current set of parameters
1006 /// It uses a binary estimate of (y-F*(x))
1007 /// (y-F*(x)) = (Num of events where sign(F)!=sign(y))/Neve
1008 /// y = {+1 if event is signal, -1 otherwise}
1009 /// --- NOT USED ---
1010 
1012 {
1013  Log() << kWARNING << "<ErrorRateBin> Using unverified code! Check!" << Endl;
1014  UInt_t neve = fPerfIdx2-fPerfIdx1+1;
1015  if (neve<1) {
1016  Log() << kFATAL << "<ErrorRateBin> Invalid start/end indices!" << Endl;
1017  }
1018  //
1019  const std::vector<const Event *> *events = &(fRuleFit->GetTrainingEvents());
1020  //
1021  Double_t sumdfbin = 0;
1022  Double_t dneve = Double_t(neve);
1023  Int_t signF, signy;
1024  Double_t sF;
1025  //
1026  for (UInt_t i=fPerfIdx1; i<fPerfIdx2+1; i++) {
1027  const Event& e = *(*events)[i];
1028  sF = fRuleEnsemble->EvalEvent( e );
1029  // Double_t sFstar = fRuleEnsemble->FStar(e); // THIS CAN BE CALCULATED ONCE!
1030  signF = (sF>0 ? +1:-1);
1031  // signy = (sFStar>0 ? +1:-1);
1032  signy = (fRuleFit->GetMethodRuleFit()->DataInfo().IsSignal(&e) ? +1:-1);
1033  sumdfbin += TMath::Abs(Double_t(signF-signy))*0.5;
1034  }
1035  Double_t f = sumdfbin/dneve;
1036  // Double_t df = f*TMath::Sqrt((1.0/sumdfbin) + (1.0/dneve));
1037  return f;
1038 }
1039 
1040 ////////////////////////////////////////////////////////////////////////////////
1041 /// Estimates the error rate with the current set of parameters.
1042 /// It calculates the area under the bkg rejection vs signal efficiency curve.
1043 /// The value returned is 1-area.
1044 
1045 Double_t TMVA::RuleFitParams::ErrorRateRocRaw( std::vector<Double_t> & sFsig,
1046  std::vector<Double_t> & sFbkg )
1047 
1048 {
1049  std::sort(sFsig.begin(), sFsig.end());
1050  std::sort(sFbkg.begin(), sFbkg.end());
1051  const Double_t minsig = sFsig.front();
1052  const Double_t minbkg = sFbkg.front();
1053  const Double_t maxsig = sFsig.back();
1054  const Double_t maxbkg = sFbkg.back();
1055  const Double_t minf = std::min(minsig,minbkg);
1056  const Double_t maxf = std::max(maxsig,maxbkg);
1057  const Int_t nsig = Int_t(sFsig.size());
1058  const Int_t nbkg = Int_t(sFbkg.size());
1059  const Int_t np = std::min((nsig+nbkg)/4,50);
1060  const Double_t df = (maxf-minf)/(np-1);
1061  //
1062  // calculate area under rejection/efficiency curve
1063  //
1064  Double_t fcut;
1065  std::vector<Double_t>::const_iterator indit;
1066  Int_t nrbkg;
1067  Int_t nesig;
1068  Int_t pnesig=0;
1069  Double_t rejb=0;
1070  Double_t effs=1.0;
1071  Double_t prejb=0;
1072  Double_t peffs=1.0;
1073  // Double_t drejb;
1074  Double_t deffs;
1075  Double_t area=0;
1076  Int_t npok=0;
1077  //
1078  // loop over range of F [minf,maxf]
1079  //
1080  for (Int_t i=0; i<np; i++) {
1081  fcut = minf + df*Double_t(i);
1082  indit = std::find_if(sFsig.begin(), sFsig.end(),
1083  std::bind(std::greater_equal<Double_t>(), std::placeholders::_1, fcut));
1084  nesig = sFsig.end()-indit; // number of sig accepted with F>cut
1085  if (TMath::Abs(pnesig-nesig)>0) {
1086  npok++;
1087  indit = std::find_if(sFbkg.begin(), sFbkg.end(),
1088  std::bind(std::greater_equal<Double_t>(), std::placeholders::_1, fcut));
1089  nrbkg = indit-sFbkg.begin(); // number of bkg rejected with F>cut
1090  rejb = Double_t(nrbkg)/Double_t(nbkg);
1091  effs = Double_t(nesig)/Double_t(nsig);
1092  //
1093  // drejb = rejb-prejb;
1094  deffs = effs-peffs;
1095  area += 0.5*TMath::Abs(deffs)*(rejb+prejb); // trapezoid
1096  prejb = rejb;
1097  peffs = effs;
1098  }
1099  pnesig = nesig;
1100  }
1101  area += 0.5*(1+rejb)*effs; // extrapolate to the end point
1102 
1103  return (1.0-area);
1104 }
1105 
1106 ////////////////////////////////////////////////////////////////////////////////
1107 /// Estimates the error rate with the current set of parameters.
1108 /// It calculates the area under the bkg rejection vs signal efficiency curve.
1109 /// The value returned is 1-area.
1110 /// This works but is less efficient than calculating the Risk using RiskPerf().
1111 
1113 {
1114  Log() << kWARNING << "<ErrorRateRoc> Should not be used in the current version! Check!" << Endl;
1115  UInt_t neve = fPerfIdx2-fPerfIdx1+1;
1116  if (neve<1) {
1117  Log() << kFATAL << "<ErrorRateRoc> Invalid start/end indices!" << Endl;
1118  }
1119  //
1120  const std::vector<const Event *> *events = &(fRuleFit->GetTrainingEvents());
1121  //
1122  Double_t sF;
1123  //
1124  std::vector<Double_t> sFsig;
1125  std::vector<Double_t> sFbkg;
1126  Double_t sumfsig=0;
1127  Double_t sumfbkg=0;
1128  Double_t sumf2sig=0;
1129  Double_t sumf2bkg=0;
1130  //
1131  for (UInt_t i=fPerfIdx1; i<fPerfIdx2+1; i++) {
1132  const Event& e = *(*events)[i];
1133  sF = fRuleEnsemble->EvalEvent(i);// * fRuleFit->GetTrainingEventWeight(i);
1134  if (fRuleFit->GetMethodRuleFit()->DataInfo().IsSignal(&e)) {
1135  sFsig.push_back(sF);
1136  sumfsig +=sF;
1137  sumf2sig +=sF*sF;
1138  }
1139  else {
1140  sFbkg.push_back(sF);
1141  sumfbkg +=sF;
1142  sumf2bkg +=sF*sF;
1143  }
1144  }
1145  fsigave = sumfsig/sFsig.size();
1146  fbkgave = sumfbkg/sFbkg.size();
1147  fsigrms = TMath::Sqrt(gTools().ComputeVariance(sumf2sig,sumfsig,sFsig.size()));
1148  fbkgrms = TMath::Sqrt(gTools().ComputeVariance(sumf2bkg,sumfbkg,sFbkg.size()));
1149  //
1150  return ErrorRateRocRaw( sFsig, sFbkg );
1151 }
1152 
1153 ////////////////////////////////////////////////////////////////////////////////
1154 /// Estimates the error rate with the current set of parameters.
1155 /// It calculates the area under the bkg rejection vs signal efficiency curve.
1156 /// The value returned is 1-area.
1157 ///
1158 /// See comment under ErrorRateRoc().
1159 
1161 {
1162  Log() << kWARNING << "<ErrorRateRocTst> Should not be used in the current version! Check!" << Endl;
1163  UInt_t neve = fPerfIdx2-fPerfIdx1+1;
1164  if (neve<1) {
1165  Log() << kFATAL << "<ErrorRateRocTst> Invalid start/end indices!" << Endl;
1166  return;
1167  }
1168  //
1169  const std::vector<const Event *> *events = &(fRuleFit->GetTrainingEvents());
1170  //
1171  // std::vector<Double_t> sF;
1172  Double_t sF;
1173  std::vector< std::vector<Double_t> > sFsig;
1174  std::vector< std::vector<Double_t> > sFbkg;
1175  //
1176  sFsig.resize( fGDNTau );
1177  sFbkg.resize( fGDNTau );
1178  // sF.resize( fGDNTau );
1179 
1180  for (UInt_t i=fPerfIdx1; i<fPerfIdx2+1; i++) {
1181  for (UInt_t itau=0; itau<fGDNTau; itau++) {
1182  // if (itau==0) sF = fRuleEnsemble->EvalEvent( *(*events)[i], fGDOfsTst[itau], fGDCoefTst[itau], fGDCoefLinTst[itau] );
1183  // else sF = fRuleEnsemble->EvalEvent( fGDOfsTst[itau], fGDCoefTst[itau], fGDCoefLinTst[itau] );
1184  sF = fRuleEnsemble->EvalEvent( i, fGDOfsTst[itau], fGDCoefTst[itau], fGDCoefLinTst[itau] );
1185  if (fRuleFit->GetMethodRuleFit()->DataInfo().IsSignal((*events)[i])) {
1186  sFsig[itau].push_back(sF);
1187  }
1188  else {
1189  sFbkg[itau].push_back(sF);
1190  }
1191  }
1192  }
1193  Double_t err;
1194 
1195  for (UInt_t itau=0; itau<fGDNTau; itau++) {
1196  err = ErrorRateRocRaw( sFsig[itau], sFbkg[itau] );
1197  fGDErrTst[itau] = err;
1198  }
1199 }
1200 
1201 ////////////////////////////////////////////////////////////////////////////////
1202 /// Estimates the error rate with the current set of parameters.
1203 /// using the <Perf> subsample.
1204 /// Return the tau index giving the lowest error
1205 
1207 {
1208  UInt_t neve = fPerfIdx2-fPerfIdx1+1;
1209  if (neve<1) {
1210  Log() << kFATAL << "<ErrorRateRocTst> Invalid start/end indices!" << Endl;
1211  return 0;
1212  }
1213  //
1214  Double_t sumx = 0;
1215  Double_t sumx2 = 0;
1216  Double_t maxx = -100.0;
1217  Double_t minx = 1e30;
1218  UInt_t itaumin = 0;
1219  UInt_t nok=0;
1220  for (UInt_t itau=0; itau<fGDNTau; itau++) {
1221  if (fGDErrTstOK[itau]) {
1222  nok++;
1223  fGDErrTst[itau] = RiskPerf(itau);
1224  sumx += fGDErrTst[itau];
1225  sumx2 += fGDErrTst[itau]*fGDErrTst[itau];
1226  if (fGDErrTst[itau]>maxx) maxx=fGDErrTst[itau];
1227  if (fGDErrTst[itau]<minx) {
1228  minx=fGDErrTst[itau];
1229  itaumin = itau;
1230  }
1231  }
1232  }
1233  Double_t sigx = TMath::Sqrt(gTools().ComputeVariance( sumx2, sumx, nok ) );
1234  Double_t maxacc = minx+sigx;
1235  //
1236  if (nok>0) {
1237  nok = 0;
1238  for (UInt_t itau=0; itau<fGDNTau; itau++) {
1239  if (fGDErrTstOK[itau]) {
1240  if (fGDErrTst[itau] > maxacc) {
1241  fGDErrTstOK[itau] = kFALSE;
1242  }
1243  else {
1244  nok++;
1245  }
1246  }
1247  }
1248  }
1249  fGDNTauTstOK = nok;
1250  Log() << kVERBOSE << "TAU: "
1251  << itaumin << " "
1252  << nok << " "
1253  << minx << " "
1254  << maxx << " "
1255  << sigx << Endl;
1256  //
1257  return itaumin;
1258 }
1259 
1260 ////////////////////////////////////////////////////////////////////////////////
1261 /// make test gradient vector for all tau
1262 /// same algorithm as MakeGradientVector()
1263 
1265 {
1266  UInt_t neve = fPathIdx1-fPathIdx2+1;
1267  if (neve<1) {
1268  Log() << kFATAL << "<MakeTstGradientVector> Invalid start/end indices!" << Endl;
1269  return;
1270  }
1271  //
1272  Double_t norm = 2.0/fNEveEffPath;
1273  //
1274  const std::vector<const Event *> *events = &(fRuleFit->GetTrainingEvents());
1275 
1276  // Clear gradient vectors
1277  for (UInt_t itau=0; itau<fGDNTau; itau++) {
1278  if (fGDErrTstOK[itau]) {
1279  for (UInt_t ir=0; ir<fNRules; ir++) {
1280  fGradVecTst[itau][ir]=0;
1281  }
1282  for (UInt_t il=0; il<fNLinear; il++) {
1283  fGradVecLinTst[itau][il]=0;
1284  }
1285  }
1286  }
1287  //
1288  // Double_t val; // temp store
1289  Double_t sF; // score function value
1290  Double_t r; // eq 35, ref 1
1291  Double_t y; // true score (+1 or -1)
1292  const std::vector<UInt_t> *eventRuleMap=0;
1293  UInt_t rind;
1294  //
1295  // Loop over all events
1296  //
1297  UInt_t nsfok=0;
1298  for (UInt_t i=fPathIdx1; i<fPathIdx2+1; i++) {
1299  const Event *e = (*events)[i];
1300  UInt_t nrules=0;
1301  if (fRuleEnsemble->DoRules()) {
1302  eventRuleMap = &(fRuleEnsemble->GetEventRuleMap(i));
1303  nrules = (*eventRuleMap).size();
1304  }
1305  for (UInt_t itau=0; itau<fGDNTau; itau++) { // loop over tau
1306  // if (itau==0) sF = fRuleEnsemble->EvalEvent( *e, fGDOfsTst[itau], fGDCoefTst[itau], fGDCoefLinTst[itau] );
1307  // else sF = fRuleEnsemble->EvalEvent( fGDOfsTst[itau], fGDCoefTst[itau], fGDCoefLinTst[itau] );
1308  if (fGDErrTstOK[itau]) {
1309  sF = fRuleEnsemble->EvalEvent( i, fGDOfsTst[itau], fGDCoefTst[itau], fGDCoefLinTst[itau] );
1310  if (TMath::Abs(sF)<1.0) {
1311  nsfok++;
1312  r = 0;
1313  y = (fRuleFit->GetMethodRuleFit()->DataInfo().IsSignal(e)?1.0:-1.0);
1314  r = norm*(y - sF) * fRuleFit->GetTrainingEventWeight(i);
1315  // rule gradient vector
1316  for (UInt_t ir=0; ir<nrules; ir++) {
1317  rind = (*eventRuleMap)[ir];
1318  fGradVecTst[itau][rind] += r;
1319  }
1320  // linear terms
1321  for (UInt_t il=0; il<fNLinear; il++) {
1322  fGradVecLinTst[itau][il] += r*fRuleEnsemble->EvalLinEventRaw( il,i, kTRUE );
1323  }
1324  } // if (TMath::Abs(F)<xxx)
1325  }
1326  }
1327  }
1328 }
1329 
1330 ////////////////////////////////////////////////////////////////////////////////
1331 /// Establish maximum gradient for rules, linear terms and the offset
1332 /// for all taus TODO: do not need index range!
1333 
1335 {
1336  Double_t maxr, maxl, cthresh, val;
1337  // loop over all taus
1338  for (UInt_t itau=0; itau<fGDNTau; itau++) {
1339  if (fGDErrTstOK[itau]) {
1340  // find max gradient
1341  maxr = ( (fNRules>0 ?
1342  TMath::Abs(*(std::max_element( fGradVecTst[itau].begin(), fGradVecTst[itau].end(), AbsValue()))):0) );
1343  maxl = ( (fNLinear>0 ?
1344  TMath::Abs(*(std::max_element( fGradVecLinTst[itau].begin(), fGradVecLinTst[itau].end(), AbsValue()))):0) );
1345 
1346  // Use the maximum as a threshold
1347  Double_t maxv = (maxr>maxl ? maxr:maxl);
1348  cthresh = maxv * fGDTauVec[itau];
1349 
1350  // Add to offset, if gradient is large enough:
1351  // Loop over the gradient vector and move to next set of coefficients
1352  // size of GradVec (and GradVecLin) should be 0 if learner is disabled
1353  //
1354  // Step-size is divided by 10 when looking for the best path.
1355  //
1356  if (maxv>0) {
1357  const Double_t stepScale=1.0;
1358  for (UInt_t i=0; i<fNRules; i++) {
1359  val = fGradVecTst[itau][i];
1360 
1361  if (TMath::Abs(val)>=cthresh) {
1362  fGDCoefTst[itau][i] += fGDPathStep*val*stepScale;
1363  }
1364  }
1365  // Loop over the gradient vector for the linear part and move to next set of coefficients
1366  for (UInt_t i=0; i<fNLinear; i++) {
1367  val = fGradVecLinTst[itau][i];
1368  if (TMath::Abs(val)>=cthresh) {
1369  fGDCoefLinTst[itau][i] += fGDPathStep*val*stepScale/fRuleEnsemble->GetLinNorm(i);
1370  }
1371  }
1372  }
1373  }
1374  }
1375  // set the offset - should be outside the itau loop!
1377 }
1378 
1379 ////////////////////////////////////////////////////////////////////////////////
1380 /// make gradient vector
1381 
1383 {
1384  clock_t t0;
1385  // clock_t t10;
1386  t0 = clock();
1387  //
1388  UInt_t neve = fPathIdx2-fPathIdx1+1;
1389  if (neve<1) {
1390  Log() << kFATAL << "<MakeGradientVector> Invalid start/end indices!" << Endl;
1391  return;
1392  }
1393  //
1394  const Double_t norm = 2.0/fNEveEffPath;
1395  //
1396  const std::vector<const Event *> *events = &(fRuleFit->GetTrainingEvents());
1397 
1398  // Clear gradient vectors
1399  for (UInt_t ir=0; ir<fNRules; ir++) {
1400  fGradVec[ir]=0;
1401  }
1402  for (UInt_t il=0; il<fNLinear; il++) {
1403  fGradVecLin[il]=0;
1404  }
1405  //
1406  // Double_t val; // temp store
1407  Double_t sF; // score function value
1408  Double_t r; // eq 35, ref 1
1409  Double_t y; // true score (+1 or -1)
1410  const std::vector<UInt_t> *eventRuleMap=0;
1411  UInt_t rind;
1412  //
1413  gGDInit += Double_t(clock()-t0)/CLOCKS_PER_SEC;
1414 
1415  for (UInt_t i=fPathIdx1; i<fPathIdx2+1; i++) {
1416  const Event *e = (*events)[i];
1417 
1418  // t0 = clock(); //DEB
1419  sF = fRuleEnsemble->EvalEvent( i ); // should not contain the weight
1420  // gGDEval += Double_t(clock()-t0)/CLOCKS_PER_SEC;
1421  if (TMath::Abs(sF)<1.0) {
1422  UInt_t nrules=0;
1423  if (fRuleEnsemble->DoRules()) {
1424  eventRuleMap = &(fRuleEnsemble->GetEventRuleMap(i));
1425  nrules = (*eventRuleMap).size();
1426  }
1427  y = (fRuleFit->GetMethodRuleFit()->DataInfo().IsSignal(e)?1.0:-1.0);
1428  r = norm*(y - sF) * fRuleFit->GetTrainingEventWeight(i);
1429  // rule gradient vector
1430  for (UInt_t ir=0; ir<nrules; ir++) {
1431  rind = (*eventRuleMap)[ir];
1432  fGradVec[rind] += r;
1433  }
1434  // gGDRuleLoop += Double_t(clock()-t0)/CLOCKS_PER_SEC;
1435  // linear terms
1436  // t0 = clock(); //DEB
1437  for (UInt_t il=0; il<fNLinear; il++) {
1438  fGradVecLin[il] += r*fRuleEnsemble->EvalLinEventRaw( il, i, kTRUE );
1439  }
1440  // gGDLinLoop += Double_t(clock()-t0)/CLOCKS_PER_SEC;
1441  } // if (TMath::Abs(F)<xxx)
1442  }
1443 }
1444 
1445 ////////////////////////////////////////////////////////////////////////////////
1446 /// Establish maximum gradient for rules, linear terms and the offset
1447 
1449 {
1450  Double_t maxr = ( (fRuleEnsemble->DoRules() ?
1451  TMath::Abs(*(std::max_element( fGradVec.begin(), fGradVec.end(), AbsValue()))):0) );
1452  Double_t maxl = ( (fRuleEnsemble->DoLinear() ?
1453  TMath::Abs(*(std::max_element( fGradVecLin.begin(), fGradVecLin.end(), AbsValue()))):0) );
1454  // Use the maximum as a threshold
1455  Double_t maxv = (maxr>maxl ? maxr:maxl);
1456  Double_t cthresh = maxv * fGDTau;
1457 
1458  Double_t useRThresh;
1459  Double_t useLThresh;
1460  //
1461  // Choose thresholds.
1462  //
1463  useRThresh = cthresh;
1464  useLThresh = cthresh;
1465 
1466  Double_t gval, lval, coef, lcoef;
1467 
1468  // Add to offset, if gradient is large enough:
1469  // Loop over the gradient vector and move to next set of coefficients
1470  // size of GradVec (and GradVecLin) should be 0 if learner is disabled
1471  if (maxv>0) {
1472  for (UInt_t i=0; i<fGradVec.size(); i++) {
1473  gval = fGradVec[i];
1474  if (TMath::Abs(gval)>=useRThresh) {
1475  coef = fRuleEnsemble->GetRulesConst(i)->GetCoefficient() + fGDPathStep*gval;
1476  fRuleEnsemble->GetRules(i)->SetCoefficient(coef);
1477  }
1478  }
1479 
1480  // Loop over the gradient vector for the linear part and move to next set of coefficients
1481  for (UInt_t i=0; i<fGradVecLin.size(); i++) {
1482  lval = fGradVecLin[i];
1483  if (TMath::Abs(lval)>=useLThresh) {
1485  fRuleEnsemble->SetLinCoefficient(i,lcoef);
1486  }
1487  }
1488  // Set the offset
1489  Double_t offset = CalcAverageResponse();
1490  fRuleEnsemble->SetOffset( offset );
1491  }
1492 }
1493 
1494 ////////////////////////////////////////////////////////////////////////////////
1495 /// calc average response for all test paths - TODO: see comment under CalcAverageResponse()
1496 /// note that 0 offset is used
1497 
1499 {
1500  for (UInt_t itau=0; itau<fGDNTau; itau++) {
1501  if (fGDErrTstOK[itau]) {
1502  fGDOfsTst[itau] = 0;
1503  for (UInt_t s=0; s<fNLinear; s++) {
1504  fGDOfsTst[itau] -= fGDCoefLinTst[itau][s] * fAverageSelectorPath[s];
1505  }
1506  for (UInt_t r=0; r<fNRules; r++) {
1507  fGDOfsTst[itau] -= fGDCoefTst[itau][r] * fAverageRulePath[r];
1508  }
1509  }
1510  }
1511  //
1512 }
1513 
1514 ////////////////////////////////////////////////////////////////////////////////
1515 /// calculate the average response - TODO : rewrite bad dependancy on EvaluateAverage() !
1516 ///
1517 /// note that 0 offset is used
1518 
1520 {
1521  Double_t ofs = 0;
1522  for (UInt_t s=0; s<fNLinear; s++) {
1524  }
1525  for (UInt_t r=0; r<fNRules; r++) {
1526  ofs -= fRuleEnsemble->GetRules(r)->GetCoefficient() * fAverageRulePath[r];
1527  }
1528  return ofs;
1529 }
1530 
1531 ////////////////////////////////////////////////////////////////////////////////
1532 /// calculate the average truth
1533 
1535 {
1536  if (fPathIdx2<=fPathIdx1) {
1537  Log() << kFATAL << "<CalcAverageTruth> Invalid start/end indices!" << Endl;
1538  return 0;
1539  }
1540  Double_t sum=0;
1541  Double_t ensig=0;
1542  Double_t enbkg=0;
1543  const std::vector<const Event *> *events = &(fRuleFit->GetTrainingEvents());
1544  for (UInt_t i=fPathIdx1; i<fPathIdx2+1; i++) {
1546  if (fRuleFit->GetMethodRuleFit()->DataInfo().IsSignal((*events)[i])) ensig += ew;
1547  else enbkg += ew;
1548  sum += ew*(fRuleFit->GetMethodRuleFit()->DataInfo().IsSignal((*events)[i])?1.0:-1.0);
1549  }
1550  Log() << kVERBOSE << "Effective number of signal / background = " << ensig << " / " << enbkg << Endl;
1551 
1552  return sum/fNEveEffPath;
1553 }
1554 
1555 ////////////////////////////////////////////////////////////////////////////////
1556 
1558  return (fRuleFit->GetMethodRuleFit()->DataInfo().IsSignal(e) ? 1:-1);
1559 }
1560 
1561 ////////////////////////////////////////////////////////////////////////////////
1562 
1564  fLogger->SetMinType(t);
1565 }
Double_t gGDRuleLoop
Bool_t gFIRSTORG
static long int sum(long int i)
Definition: Factory.cxx:2173
MsgLogger & Endl(MsgLogger &ml)
Definition: MsgLogger.h:158
Double_t * fNTLinCoeff
std::vector< std::vector< Double_t > > fGDCoefLinTst
std::vector< Double_t > fAverageSelectorPath
const std::vector< Double_t > & GetLinNorm() const
Definition: RuleEnsemble.h:270
const std::vector< const TMVA::Event *> & GetTrainingEvents() const
Definition: RuleFit.h:137
Double_t EvalLinEventRaw(UInt_t vind, const Event &e, Bool_t norm) const
Definition: RuleEnsemble.h:542
void MakeGradientVector()
make gradient vector
UInt_t GetNLinear() const
Definition: RuleEnsemble.h:273
virtual Int_t Fill()
Fill all branches.
Definition: TTree.cxx:4364
TH1 * h
Definition: legend2.C:5
void FillCoefficients()
helper function to store the rule coefficients in local arrays
EMsgType GetMinType() const
Definition: MsgLogger.h:71
const std::vector< UInt_t > & GetEventRuleMap(UInt_t evtidx) const
Definition: RuleEnsemble.h:301
Double_t RiskPath() const
const std::vector< TMVA::Rule * > & GetRulesConst() const
Definition: RuleEnsemble.h:267
std::vector< std::vector< Double_t > > fGradVecTst
std::vector< Double_t > fAverageRulePath
Double_t GetGDValidEveFrac() const
Short_t Min(Short_t a, Short_t b)
Definition: TMathBase.h:168
int Int_t
Definition: RtypesCore.h:41
bool Bool_t
Definition: RtypesCore.h:59
Bool_t IsNaN(Double_t x)
Definition: TMath.h:777
std::vector< Double_t > fGDTauVec
std::vector< Double_t > fFstar
void ErrorRateRocTst()
Estimates the error rate with the current set of parameters.
Short_t Abs(Short_t d)
Definition: TMathBase.h:108
const std::vector< Double_t > & GetLinCoefficients() const
Definition: RuleEnsemble.h:269
Double_t GetEventLinearValNorm(UInt_t i) const
Definition: RuleEnsemble.h:299
void SetLinCoefficients(const std::vector< Double_t > &v)
Definition: RuleEnsemble.h:114
void MakeTstGradientVector()
make test gradient vector for all tau same algorithm as MakeGradientVector()
#define rprev(otri1, otri2)
Definition: triangle.c:1024
std::vector< std::vector< Double_t > > fGDCoefTst
Int_t FindGDTau()
This finds the cutoff parameter tau by scanning several different paths.
Double_t GetEventRuleVal(UInt_t i) const
Definition: RuleEnsemble.h:297
UInt_t GetNRules() const
Definition: RuleEnsemble.h:266
MsgLogger & Log() const
message logger
Double_t Risk(UInt_t ind1, UInt_t ind2, Double_t neff) const
risk assessment
Double_t FStar() const
We want to estimate F* = argmin Eyx( L(y,F(x) ), min wrt F(x) F(x) = FL(x) + FR(x) ...
Double_t CalcAverageTruth()
calculate the average truth
DataSetInfo & DataInfo() const
Definition: MethodBase.h:399
Double_t ErrorRateBin()
Estimates the error rate with the current set of parameters It uses a binary estimate of (y-F*(x)) (y...
void SetMinType(EMsgType minType)
Definition: MsgLogger.h:72
RuleEnsemble * GetRuleEnsemblePtr()
Definition: RuleFit.h:145
Double_t GetWeight() const
return the event weight - depending on whether the flag IgnoreNegWeightsInTraining is or not...
Definition: Event.cxx:382
void SetMsgType(EMsgType t)
TString GetElapsedTime(Bool_t Scientific=kTRUE)
returns pretty string with elapsed time
Definition: Timer.cxx:134
Double_t LossFunction(const Event &e) const
Implementation of squared-error ramp loss function (eq 39,40 in ref 1) This is used for binary Classi...
virtual Int_t Write(const char *name=0, Int_t option=0, Int_t bufsize=0)
Write this object to the current directory.
Definition: TTree.cxx:9212
Double_t GetGDPathEveFrac() const
std::vector< Char_t > fGDErrTstOK
ROOT::R::TRInterface & r
Definition: Object.C:4
Bool_t DoLinear() const
Definition: RuleEnsemble.h:257
Double_t ErrorRateReg()
Estimates the error rate with the current set of parameters This code is pretty messy at the moment...
Double_t ErrorRateRoc()
Estimates the error rate with the current set of parameters.
Double_t CoefficientRadius()
Calculates sqrt(Sum(a_i^2)), i=1..N (NOTE do not include a0)
std::vector< Double_t > fGDOfsTst
void SetOffset(Double_t v=0.0)
Definition: RuleEnsemble.h:112
void ClearCoefficients(Double_t val=0)
Definition: RuleEnsemble.h:123
const TMVA::Event * GetRuleMapEvent(UInt_t evtidx) const
Definition: RuleEnsemble.h:302
Double_t Optimism()
implementation of eq.
unsigned int UInt_t
Definition: RtypesCore.h:42
char * Form(const char *fmt,...)
Double_t Penalty() const
This is the "lasso" penalty To be used for regression.
Tools & gTools()
void ClearLinCoefficients(Double_t val=0)
Definition: RuleEnsemble.h:124
Bool_t IsRuleMapOK() const
Definition: RuleEnsemble.h:303
const Bool_t kFALSE
Definition: RtypesCore.h:88
void SetLinCoefficient(UInt_t i, Double_t v)
Definition: RuleEnsemble.h:115
Double_t gGDEval
void SetCoefficients(const std::vector< Double_t > &v)
set all rule coefficients
void InitNtuple()
initializes the ntuple
Double_t GetTrainingEventWeight(UInt_t i) const
Definition: RuleFit.h:133
double Double_t
Definition: RtypesCore.h:55
Double_t y[n]
Definition: legend1.C:17
void CalcFStar()
Estimates F* (optimum scoring function) for all events for the given sets.
Double_t gGDEvalRule
static constexpr double s
virtual ~RuleFitParams()
destructor
you should not use this method at all Int_t Int_t Double_t Double_t Double_t e
Definition: TRolke.cxx:630
std::vector< std::vector< Double_t > > fGradVecLinTst
Double_t gGDNorm
void UpdateCoefficients()
Establish maximum gradient for rules, linear terms and the offset.
ostringstream derivative to redirect and format output
Definition: MsgLogger.h:59
Double_t ErrorRateRocRaw(std::vector< Double_t > &sFsig, std::vector< Double_t > &sFbkg)
Estimates the error rate with the current set of parameters.
RuleFitParams()
constructor
virtual Int_t Branch(TCollection *list, Int_t bufsize=32000, Int_t splitlevel=99, const char *name="")
Create one branch for each element in the collection.
Definition: TTree.cxx:1701
Double_t gGDInit
std::vector< Double_t > fGradVecLin
Double_t GetOffset() const
Definition: RuleEnsemble.h:265
std::vector< Double_t > fGradVec
Double_t EvalLinEvent() const
Definition: RuleEnsemble.h:564
Bool_t DoRules() const
Definition: RuleEnsemble.h:258
Short_t Max(Short_t a, Short_t b)
Definition: TMathBase.h:200
void Init()
Initializes all parameters using the RuleEnsemble and the training tree.
Double_t CalcAverageResponse()
calculate the average response - TODO : rewrite bad dependancy on EvaluateAverage() ! ...
Double_t gGDLinLoop
RuleEnsemble * fRuleEnsemble
std::vector< TMVA::Rule * > & GetRules()
Definition: RuleEnsemble.h:268
Bool_t IsSignal(const Event *ev) const
void DrawProgressBar(Int_t, const TString &comment="")
draws progress bar in color or B&W caution:
Definition: Timer.cxx:190
void GetCoefficients(std::vector< Double_t > &v)
Retrieve all rule coefficients.
void InitGD()
Initialize GD path search.
A TTree object has a header with a name and a title.
Definition: TTree.h:70
Double_t EvalEvent() const
Definition: RuleEnsemble.h:416
Double_t gGDPtr
void UpdateTstCoefficients()
Establish maximum gradient for rules, linear terms and the offset for all taus TODO: do not need inde...
const MethodRuleFit * GetMethodRuleFit() const
Definition: RuleFit.h:148
UInt_t RiskPerfTst()
Estimates the error rate with the current set of parameters.
std::vector< Double_t > fGDErrTst
Double_t Sqrt(Double_t x)
Definition: TMath.h:590
void EvaluateAverage(UInt_t ind1, UInt_t ind2, std::vector< Double_t > &avsel, std::vector< Double_t > &avrul)
evaluate the average of each variable and f(x) in the given range
void MakeGDPath()
The following finds the gradient directed path in parameter space.
const Bool_t kTRUE
Definition: RtypesCore.h:87
Double_t RiskPerf() const
Timing information for training and evaluation of MVA methods.
Definition: Timer.h:58
Bool_t gFIRSTTST
Int_t Type(const Event *e) const
void CalcTstAverageResponse()
calc average response for all test paths - TODO: see comment under CalcAverageResponse() note that 0 ...