Logo ROOT   6.10/09
Reference Guide
RooKeysPdf.cxx
Go to the documentation of this file.
1 /*****************************************************************************
2  * Project: RooFit *
3  * Package: RooFitModels *
4  * @(#)root/roofit:$Id$
5  * Authors: *
6  * GR, Gerhard Raven, UC San Diego, raven@slac.stanford.edu *
7  * DK, David Kirkby, UC Irvine, dkirkby@uci.edu *
8  * WV, Wouter Verkerke, UC Santa Barbara, verkerke@slac.stanford.edu *
9  * *
10  * Copyright (c) 2000-2005, Regents of the University of California *
11  * and Stanford University. All rights reserved. *
12  * *
13  * Redistribution and use in source and binary forms, *
14  * with or without modification, are permitted according to the terms *
15  * listed in LICENSE (http://roofit.sourceforge.net/license.txt) *
16  *****************************************************************************/
17 
18 /** \class RooKeysPdf
19  \ingroup Roofit
20 
21 Class RooKeysPdf implements a one-dimensional kernel estimation p.d.f which model the distribution
22 of an arbitrary input dataset as a superposition of Gaussian kernels, one for each data point,
23 each contributing 1/N to the total integral of the p.d.f.
24 If the 'adaptive mode' is enabled, the width of the Gaussian is adaptively calculated from the
25 local density of events, i.e. narrow for regions with high event density to preserve details and
26 wide for regions with log event density to promote smoothness. The details of the general algorithm
27 are described in the following paper:
28 
29 Cranmer KS, Kernel Estimation in High-Energy Physics.
30  Computer Physics Communications 136:198-207,2001 - e-Print Archive: hep ex/0011057
31 **/
32 
33 #include "RooFit.h"
34 
35 #include <limits>
36 #include <algorithm>
37 #include <cmath>
38 #include "Riostream.h"
39 #include "TMath.h"
40 
41 #include "RooKeysPdf.h"
42 #include "RooAbsReal.h"
43 #include "RooRealVar.h"
44 #include "RooRandom.h"
45 #include "RooDataSet.h"
46 #include "RooTrace.h"
47 
48 #include "TError.h"
49 
50 using namespace std;
51 
53 
56 
57 ////////////////////////////////////////////////////////////////////////////////
58 /// coverity[UNINIT_CTOR]
59 
60  RooKeysPdf::RooKeysPdf() : _nEvents(0), _dataPts(0), _dataWgts(0), _weights(0), _sumWgt(0),
61  _mirrorLeft(kFALSE), _mirrorRight(kFALSE),
62  _asymLeft(kFALSE), _asymRight(kFALSE)
63 {
65 }
66 
67 ////////////////////////////////////////////////////////////////////////////////
68 /// cache stuff about x
69 
70 RooKeysPdf::RooKeysPdf(const char *name, const char *title,
72  Mirror mirror, Double_t rho) :
73  RooAbsPdf(name,title),
74  _x("x","observable",this,x),
75  _nEvents(0),
76  _dataPts(0),
77  _dataWgts(0),
78  _weights(0),
79  _mirrorLeft(mirror==MirrorLeft || mirror==MirrorBoth || mirror==MirrorLeftAsymRight),
80  _mirrorRight(mirror==MirrorRight || mirror==MirrorBoth || mirror==MirrorAsymLeftRight),
81  _asymLeft(mirror==MirrorAsymLeft || mirror==MirrorAsymLeftRight || mirror==MirrorAsymBoth),
82  _asymRight(mirror==MirrorAsymRight || mirror==MirrorLeftAsymRight || mirror==MirrorAsymBoth),
83  _rho(rho)
84 {
85  snprintf(_varName, 128,"%s", x.GetName());
86  RooAbsRealLValue& real= (RooRealVar&)(_x.arg());
87  _lo = real.getMin();
88  _hi = real.getMax();
89  _binWidth = (_hi-_lo)/(_nPoints-1);
90 
91  // form the lookup table
92  LoadDataSet(data);
94 }
95 
96 ////////////////////////////////////////////////////////////////////////////////
97 /// cache stuff about x
98 
99 RooKeysPdf::RooKeysPdf(const char *name, const char *title,
100  RooAbsReal& xpdf, RooRealVar& xdata, RooDataSet& data,
101  Mirror mirror, Double_t rho) :
102  RooAbsPdf(name,title),
103  _x("x","Observable",this,xpdf),
104  _nEvents(0),
105  _dataPts(0),
106  _dataWgts(0),
107  _weights(0),
108  _mirrorLeft(mirror==MirrorLeft || mirror==MirrorBoth || mirror==MirrorLeftAsymRight),
109  _mirrorRight(mirror==MirrorRight || mirror==MirrorBoth || mirror==MirrorAsymLeftRight),
110  _asymLeft(mirror==MirrorAsymLeft || mirror==MirrorAsymLeftRight || mirror==MirrorAsymBoth),
111  _asymRight(mirror==MirrorAsymRight || mirror==MirrorLeftAsymRight || mirror==MirrorAsymBoth),
112  _rho(rho)
113 {
114  snprintf(_varName, 128,"%s", xdata.GetName());
115  RooAbsRealLValue& real= (RooRealVar&)(xdata);
116  _lo = real.getMin();
117  _hi = real.getMax();
118  _binWidth = (_hi-_lo)/(_nPoints-1);
119 
120  // form the lookup table
121  LoadDataSet(data);
123 }
124 
125 ////////////////////////////////////////////////////////////////////////////////
126 
127 RooKeysPdf::RooKeysPdf(const RooKeysPdf& other, const char* name):
128  RooAbsPdf(other,name), _x("x",this,other._x), _nEvents(other._nEvents),
129  _dataPts(0), _dataWgts(0), _weights(0), _sumWgt(0),
131  _asymLeft(other._asymLeft), _asymRight(other._asymRight),
132  _rho( other._rho ) {
133  // cache stuff about x
134  snprintf(_varName, 128, "%s", other._varName );
135  _lo = other._lo;
136  _hi = other._hi;
137  _binWidth = other._binWidth;
138 
139  // copy over data and weights... not necessary, commented out for speed
140 // _dataPts = new Double_t[_nEvents];
141 // _weights = new Double_t[_nEvents];
142 // for (Int_t i= 0; i<_nEvents; i++) {
143 // _dataPts[i]= other._dataPts[i];
144 // _weights[i]= other._weights[i];
145 // }
146 
147  // copy over the lookup table
148  for (Int_t i= 0; i<_nPoints+1; i++)
149  _lookupTable[i]= other._lookupTable[i];
150 
152 }
153 
154 ////////////////////////////////////////////////////////////////////////////////
155 
157  delete[] _dataPts;
158  delete[] _dataWgts;
159  delete[] _weights;
160 
162 }
163 
164 ////////////////////////////////////////////////////////////////////////////////
165 /// small helper structure
166 
167 namespace {
168  struct Data {
169  Double_t x;
170  Double_t w;
171  };
172  // helper to order two Data structures
173  struct cmp {
174  inline bool operator()(const struct Data& a, const struct Data& b) const
175  { return a.x < b.x; }
176  };
177 }
179  delete[] _dataPts;
180  delete[] _dataWgts;
181  delete[] _weights;
182 
183  std::vector<Data> tmp;
184  tmp.reserve((1 + _mirrorLeft + _mirrorRight) * data.numEntries());
185  Double_t x0 = 0., x1 = 0., x2 = 0.;
186  _sumWgt = 0.;
187  // read the data set into tmp and accumulate some statistics
188  RooRealVar& real = (RooRealVar&)(data.get()->operator[](_varName));
189  for (Int_t i = 0; i < data.numEntries(); ++i) {
190  data.get(i);
191  const Double_t x = real.getVal();
192  const Double_t w = data.weight();
193  x0 += w;
194  x1 += w * x;
195  x2 += w * x * x;
197 
198  Data p;
199  p.x = x, p.w = w;
200  tmp.push_back(p);
201  if (_mirrorLeft) {
202  p.x = 2. * _lo - x;
203  tmp.push_back(p);
204  }
205  if (_mirrorRight) {
206  p.x = 2. * _hi - x;
207  tmp.push_back(p);
208  }
209  }
210  // sort the entire data set so that values of x are increasing
211  std::sort(tmp.begin(), tmp.end(), cmp());
212 
213  // copy the sorted data set to its final destination
214  _nEvents = tmp.size();
215  _dataPts = new Double_t[_nEvents];
216  _dataWgts = new Double_t[_nEvents];
217  for (unsigned i = 0; i < tmp.size(); ++i) {
218  _dataPts[i] = tmp[i].x;
219  _dataWgts[i] = tmp[i].w;
220  }
221  {
222  // free tmp
223  std::vector<Data> tmp2;
224  tmp2.swap(tmp);
225  }
226 
227  Double_t meanv=x1/x0;
228  Double_t sigmav=std::sqrt(x2/x0-meanv*meanv);
229  Double_t h=std::pow(Double_t(4)/Double_t(3),0.2)*std::pow(_nEvents,-0.2)*_rho;
230  Double_t hmin=h*sigmav*std::sqrt(2.)/10;
231  Double_t norm=h*std::sqrt(sigmav)/(2.0*std::sqrt(3.0));
232 
234  for(Int_t j=0;j<_nEvents;++j) {
235  _weights[j]=norm/std::sqrt(g(_dataPts[j],h*sigmav));
236  if (_weights[j]<hmin) _weights[j]=hmin;
237  }
238 
239  // The idea below is that beyond nSigma sigma, the value of the exponential
240  // in the Gaussian is well below the machine precision of a double, so it
241  // does not contribute any more. That way, we can limit how many bins of the
242  // binned approximation in _lookupTable we have to touch when filling it.
243  for (Int_t i=0;i<_nPoints+1;++i) _lookupTable[i] = 0.;
244  for(Int_t j=0;j<_nEvents;++j) {
245  const Double_t xlo = std::min(_hi,
246  std::max(_lo, _dataPts[j] - _nSigma * _weights[j]));
247  const Double_t xhi = std::max(_lo,
248  std::min(_hi, _dataPts[j] + _nSigma * _weights[j]));
249  if (xlo >= xhi) continue;
250  const Double_t chi2incr = _binWidth / _weights[j] / std::sqrt(2.);
251  const Double_t weightratio = _dataWgts[j] / _weights[j];
252  const Int_t binlo = static_cast<Int_t>(std::floor((xlo - _lo) / _binWidth));
253  const Int_t binhi = static_cast<Int_t>(_nPoints - std::floor((_hi - xhi) / _binWidth));
254  const Double_t x = (Double_t(_nPoints - binlo) * _lo +
255  Double_t(binlo) * _hi) / Double_t(_nPoints);
256  Double_t chi = (x - _dataPts[j]) / _weights[j] / std::sqrt(2.);
257  for (Int_t k = binlo; k <= binhi; ++k, chi += chi2incr) {
258  _lookupTable[k] += weightratio * std::exp(- chi * chi);
259  }
260  }
261  if (_asymLeft) {
262  for(Int_t j=0;j<_nEvents;++j) {
263  const Double_t xlo = std::min(_hi,
264  std::max(_lo, 2. * _lo - _dataPts[j] + _nSigma * _weights[j]));
265  const Double_t xhi = std::max(_lo,
266  std::min(_hi, 2. * _lo - _dataPts[j] - _nSigma * _weights[j]));
267  if (xlo >= xhi) continue;
268  const Double_t chi2incr = _binWidth / _weights[j] / std::sqrt(2.);
269  const Double_t weightratio = _dataWgts[j] / _weights[j];
270  const Int_t binlo = static_cast<Int_t>(std::floor((xlo - _lo) / _binWidth));
271  const Int_t binhi = static_cast<Int_t>(_nPoints - std::floor((_hi - xhi) / _binWidth));
272  const Double_t x = (Double_t(_nPoints - binlo) * _lo +
273  Double_t(binlo) * _hi) / Double_t(_nPoints);
274  Double_t chi = (x - (2. * _lo - _dataPts[j])) / _weights[j] / std::sqrt(2.);
275  for (Int_t k = binlo; k <= binhi; ++k, chi += chi2incr) {
276  _lookupTable[k] -= weightratio * std::exp(- chi * chi);
277  }
278  }
279  }
280  if (_asymRight) {
281  for(Int_t j=0;j<_nEvents;++j) {
282  const Double_t xlo = std::min(_hi,
283  std::max(_lo, 2. * _hi - _dataPts[j] + _nSigma * _weights[j]));
284  const Double_t xhi = std::max(_lo,
285  std::min(_hi, 2. * _hi - _dataPts[j] - _nSigma * _weights[j]));
286  if (xlo >= xhi) continue;
287  const Double_t chi2incr = _binWidth / _weights[j] / std::sqrt(2.);
288  const Double_t weightratio = _dataWgts[j] / _weights[j];
289  const Int_t binlo = static_cast<Int_t>(std::floor((xlo - _lo) / _binWidth));
290  const Int_t binhi = static_cast<Int_t>(_nPoints - std::floor((_hi - xhi) / _binWidth));
291  const Double_t x = (Double_t(_nPoints - binlo) * _lo +
292  Double_t(binlo) * _hi) / Double_t(_nPoints);
293  Double_t chi = (x - (2. * _hi - _dataPts[j])) / _weights[j] / std::sqrt(2.);
294  for (Int_t k = binlo; k <= binhi; ++k, chi += chi2incr) {
295  _lookupTable[k] -= weightratio * std::exp(- chi * chi);
296  }
297  }
298  }
299  static const Double_t sqrt2pi(std::sqrt(2*TMath::Pi()));
300  for (Int_t i=0;i<_nPoints+1;++i)
301  _lookupTable[i] /= sqrt2pi * _sumWgt;
302 }
303 
304 ////////////////////////////////////////////////////////////////////////////////
305 
308  if (i<0) {
309 // cerr << "got point below lower bound:"
310 // << Double_t(_x) << " < " << _lo
311 // << " -- performing linear extrapolation..." << endl;
312  i=0;
313  }
314  if (i>_nPoints-1) {
315 // cerr << "got point above upper bound:"
316 // << Double_t(_x) << " > " << _hi
317 // << " -- performing linear extrapolation..." << endl;
318  i=_nPoints-1;
319  }
321 
322  // for now do simple linear interpolation.
323  // one day replace by splines...
324  Double_t ret = (_lookupTable[i]+dx*(_lookupTable[i+1]-_lookupTable[i]));
325  if (ret<0) ret=0 ;
326  return ret ;
327 }
328 
330  RooArgSet& allVars, RooArgSet& analVars, const char* /* rangeName */) const
331 {
332  if (matchArgs(allVars, analVars, _x)) return 1;
333  return 0;
334 }
335 
336 Double_t RooKeysPdf::analyticalIntegral(Int_t code, const char* rangeName) const
337 {
338  R__ASSERT(1 == code);
339  // this code is based on _lookupTable and uses linear interpolation, just as
340  // evaluate(); integration is done using the trapez rule
341  const Double_t xmin = std::max(_lo, _x.min(rangeName));
342  const Double_t xmax = std::min(_hi, _x.max(rangeName));
343  const Int_t imin = (Int_t)floor((xmin - _lo) / _binWidth);
344  const Int_t imax = std::min((Int_t)floor((xmax - _lo) / _binWidth),
345  _nPoints - 1);
346  Double_t sum = 0.;
347  // sum up complete bins in middle
348  if (imin + 1 < imax)
349  sum += _lookupTable[imin + 1] + _lookupTable[imax];
350  for (Int_t i = imin + 2; i < imax; ++i)
351  sum += 2. * _lookupTable[i];
352  sum *= _binWidth * 0.5;
353  // treat incomplete bins
354  const Double_t dxmin = (xmin - (_lo + imin * _binWidth)) / _binWidth;
355  const Double_t dxmax = (xmax - (_lo + imax * _binWidth)) / _binWidth;
356  if (imin < imax) {
357  // first bin
358  sum += _binWidth * (1. - dxmin) * 0.5 * (_lookupTable[imin + 1] +
359  _lookupTable[imin] + dxmin *
360  (_lookupTable[imin + 1] - _lookupTable[imin]));
361  // last bin
362  sum += _binWidth * dxmax * 0.5 * (_lookupTable[imax] +
363  _lookupTable[imax] + dxmax *
364  (_lookupTable[imax + 1] - _lookupTable[imax]));
365  } else if (imin == imax) {
366  // first bin == last bin
367  sum += _binWidth * (dxmax - dxmin) * 0.5 * (
368  _lookupTable[imin] + dxmin *
369  (_lookupTable[imin + 1] - _lookupTable[imin]) +
370  _lookupTable[imax] + dxmax *
371  (_lookupTable[imax + 1] - _lookupTable[imax]));
372  }
373  return sum;
374 }
375 
377 {
378  if (vars.contains(*_x.absArg())) return 1;
379  return 0;
380 }
381 
383 {
384  R__ASSERT(1 == code);
385  Double_t max = -std::numeric_limits<Double_t>::max();
386  for (Int_t i = 0; i <= _nPoints; ++i)
387  if (max < _lookupTable[i]) max = _lookupTable[i];
388  return max;
389 }
390 
391 ////////////////////////////////////////////////////////////////////////////////
392 
394  Double_t y=0;
395  // since data is sorted, we can be a little faster because we know which data
396  // points contribute
397  Double_t* it = std::lower_bound(_dataPts, _dataPts + _nEvents,
398  x - _nSigma * sigmav);
399  if (it >= (_dataPts + _nEvents)) return 0.;
400  Double_t* iend = std::upper_bound(it, _dataPts + _nEvents,
401  x + _nSigma * sigmav);
402  for ( ; it < iend; ++it) {
403  const Double_t r = (x - *it) / sigmav;
404  y += std::exp(-0.5 * r * r);
405  }
406 
407  static const Double_t sqrt2pi(std::sqrt(2*TMath::Pi()));
408  return y/(sigmav*sqrt2pi*_nEvents);
409 }
virtual const char * GetName() const
Returns name of object.
Definition: TNamed.h:47
Double_t _lookupTable[_nPoints+1]
Definition: RooKeysPdf.h:67
Double_t * _dataWgts
Definition: RooKeysPdf.h:62
static long int sum(long int i)
Definition: Factory.cxx:2162
float xmin
Definition: THbookFile.cxx:93
virtual Int_t getMaxVal(const RooArgSet &vars) const
Advertise capability to determine maximum value of function for given set of observables.
Definition: RooKeysPdf.cxx:376
virtual Double_t analyticalIntegral(Int_t code, const char *rangeName=0) const
Implements the actual analytical integral(s) advertised by getAnalyticalIntegral. ...
Definition: RooKeysPdf.cxx:336
Bool_t matchArgs(const RooArgSet &allDeps, RooArgSet &numDeps, const RooArgProxy &a) const
Utility function for use in getAnalyticalIntegral().
Double_t _lo
Definition: RooKeysPdf.h:76
Double_t getVal(const RooArgSet *set=0) const
Definition: RooAbsReal.h:64
Double_t * _weights
Definition: RooKeysPdf.h:63
Definition: data.h:2
TH1 * h
Definition: legend2.C:5
Bool_t _mirrorLeft
Definition: RooKeysPdf.h:71
Double_t _sumWgt
Definition: RooKeysPdf.h:64
#define R__ASSERT(e)
Definition: TError.h:96
int Int_t
Definition: RtypesCore.h:41
TArc * a
Definition: textangle.C:12
STL namespace.
TRObject operator()(const T1 &t1) const
double sqrt(double)
static const double x2[5]
#define TRACE_CREATE
Definition: RooTrace.h:22
Double_t x[n]
Definition: legend1.C:17
Class RooKeysPdf implements a one-dimensional kernel estimation p.d.f which model the distribution of...
Definition: RooKeysPdf.h:25
double pow(double, double)
Bool_t _asymRight
Definition: RooKeysPdf.h:72
void LoadDataSet(RooDataSet &data)
Definition: RooKeysPdf.cxx:178
constexpr Double_t Pi()
Definition: TMath.h:40
Double_t _binWidth
Definition: RooKeysPdf.h:76
static const Double_t _nSigma
Definition: RooKeysPdf.h:58
virtual Double_t weight() const
Return event weight of current event.
RooRealVar represents a fundamental (non-derived) real valued object.
Definition: RooRealVar.h:36
Double_t g(Double_t x, Double_t sigma) const
Definition: RooKeysPdf.cxx:393
virtual ~RooKeysPdf()
Definition: RooKeysPdf.cxx:156
RooKeysPdf()
coverity[UNINIT_CTOR]
Definition: RooKeysPdf.cxx:60
Char_t _varName[128]
Definition: RooKeysPdf.h:75
TRandom2 r(17)
virtual Double_t maxVal(Int_t code) const
Return maximum value for set of observables identified by code assigned in getMaxVal.
Definition: RooKeysPdf.cxx:382
RooAbsArg * absArg() const
Definition: RooArgProxy.h:37
double floor(double)
Double_t _hi
Definition: RooKeysPdf.h:76
float xmax
Definition: THbookFile.cxx:93
Int_t _nEvents
Definition: RooKeysPdf.h:60
RooDataSet is a container class to hold unbinned data.
Definition: RooDataSet.h:29
Bool_t _mirrorRight
Definition: RooKeysPdf.h:71
REAL epsilon
Definition: triangle.c:617
virtual const RooArgSet * get(Int_t index) const
Return RooArgSet with coordinates of event &#39;index&#39;.
Double_t _rho
Definition: RooKeysPdf.h:77
const Bool_t kFALSE
Definition: RtypesCore.h:92
virtual Int_t getAnalyticalIntegral(RooArgSet &allVars, RooArgSet &analVars, const char *rangeName=0) const
Interface function getAnalyticalIntergral advertises the analytical integrals that are supported...
Definition: RooKeysPdf.cxx:329
static const double x1[5]
#define ClassImp(name)
Definition: Rtypes.h:336
Double_t min(const char *rname=0) const
Definition: RooRealProxy.h:56
double Double_t
Definition: RtypesCore.h:55
RooAbsReal is the common abstract base class for objects that represent a real value and implements f...
Definition: RooAbsReal.h:53
Double_t y[n]
Definition: legend1.C:17
#define TRACE_DESTROY
Definition: RooTrace.h:23
Bool_t _asymLeft
Definition: RooKeysPdf.h:72
Double_t max(const char *rname=0) const
Definition: RooRealProxy.h:57
RooAbsRealLValue is the common abstract base class for objects that represent a real value that may a...
RooAbsPdf is the abstract interface for all probability density functions The class provides hybrid a...
Definition: RooAbsPdf.h:41
Double_t evaluate() const
Definition: RooKeysPdf.cxx:306
you should not use this method at all Int_t Int_t Double_t Double_t Double_t Int_t Double_t Double_t Double_t Double_t b
Definition: TRolke.cxx:630
RooRealProxy _x
Definition: RooKeysPdf.h:52
#define snprintf
Definition: civetweb.c:822
Bool_t contains(const RooAbsArg &var) const
const RooAbsReal & arg() const
Definition: RooRealProxy.h:43
double exp(double)
double norm(double *x, double *p)
Definition: unuranDistr.cxx:40
double log(double)
virtual Int_t numEntries() const
Definition: RooAbsData.cxx:269
Double_t * _dataPts
Definition: RooKeysPdf.h:61