ROOT  6.07/01
Reference Guide
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Properties Friends Macros Groups Pages
RooKeysPdf.cxx
Go to the documentation of this file.
1 /*****************************************************************************
2  * Project: RooFit *
3  * Package: RooFitModels *
4  * @(#)root/roofit:$Id$
5  * Authors: *
6  * GR, Gerhard Raven, UC San Diego, raven@slac.stanford.edu *
7  * DK, David Kirkby, UC Irvine, dkirkby@uci.edu *
8  * WV, Wouter Verkerke, UC Santa Barbara, verkerke@slac.stanford.edu *
9  * *
10  * Copyright (c) 2000-2005, Regents of the University of California *
11  * and Stanford University. All rights reserved. *
12  * *
13  * Redistribution and use in source and binary forms, *
14  * with or without modification, are permitted according to the terms *
15  * listed in LICENSE (http://roofit.sourceforge.net/license.txt) *
16  *****************************************************************************/
17 #include "RooFit.h"
18 
19 #include <limits>
20 #include <algorithm>
21 #include <cmath>
22 #include "Riostream.h"
23 #include "TMath.h"
24 
25 #include "RooKeysPdf.h"
26 #include "RooAbsReal.h"
27 #include "RooRealVar.h"
28 #include "RooRandom.h"
29 #include "RooDataSet.h"
30 #include "RooTrace.h"
31 
32 #include "TError.h"
33 
34 using namespace std;
35 
37 
38 
39 /**
40 \file RooKeysPdf.cxx
41 \class RooKeysPdf
42 \ingroup Roofit
43 
44 Class RooKeysPdf implements a one-dimensional kernel estimation p.d.f which model the distribution
45 of an arbitrary input dataset as a superposition of Gaussian kernels, one for each data point,
46 each contributing 1/N to the total integral of the p.d.f.
47 If the 'adaptive mode' is enabled, the width of the Gaussian is adaptively calculated from the
48 local density of events, i.e. narrow for regions with high event density to preserve details and
49 wide for regions with log event density to promote smoothness. The details of the general algorithm
50 are described in the following paper:
51 Cranmer KS, Kernel Estimation in High-Energy Physics.
52  Computer Physics Communications 136:198-207,2001 - e-Print Archive: hep ex/0011057
53 **/
54 
55 const Double_t RooKeysPdf::_nSigma = std::sqrt(-2. *
56  std::log(std::numeric_limits<Double_t>::epsilon()));
57 
58 ////////////////////////////////////////////////////////////////////////////////
59 /// coverity[UNINIT_CTOR]
60 
61  RooKeysPdf::RooKeysPdf() : _nEvents(0), _dataPts(0), _dataWgts(0), _weights(0), _sumWgt(0),
62  _mirrorLeft(kFALSE), _mirrorRight(kFALSE),
63  _asymLeft(kFALSE), _asymRight(kFALSE)
64 {
66 }
67 
68 
69 ////////////////////////////////////////////////////////////////////////////////
70 /// cache stuff about x
71 
72 RooKeysPdf::RooKeysPdf(const char *name, const char *title,
73  RooAbsReal& x, RooDataSet& data,
74  Mirror mirror, Double_t rho) :
75  RooAbsPdf(name,title),
76  _x("x","observable",this,x),
77  _nEvents(0),
78  _dataPts(0),
79  _dataWgts(0),
80  _weights(0),
81  _mirrorLeft(mirror==MirrorLeft || mirror==MirrorBoth || mirror==MirrorLeftAsymRight),
82  _mirrorRight(mirror==MirrorRight || mirror==MirrorBoth || mirror==MirrorAsymLeftRight),
83  _asymLeft(mirror==MirrorAsymLeft || mirror==MirrorAsymLeftRight || mirror==MirrorAsymBoth),
84  _asymRight(mirror==MirrorAsymRight || mirror==MirrorLeftAsymRight || mirror==MirrorAsymBoth),
85  _rho(rho)
86 {
87  snprintf(_varName, 128,"%s", x.GetName());
88  RooAbsRealLValue& real= (RooRealVar&)(_x.arg());
89  _lo = real.getMin();
90  _hi = real.getMax();
91  _binWidth = (_hi-_lo)/(_nPoints-1);
92 
93  // form the lookup table
94  LoadDataSet(data);
96 }
97 
98 
99 
100 ////////////////////////////////////////////////////////////////////////////////
101 /// cache stuff about x
102 
103 RooKeysPdf::RooKeysPdf(const char *name, const char *title,
104  RooAbsReal& xpdf, RooRealVar& xdata, RooDataSet& data,
105  Mirror mirror, Double_t rho) :
106  RooAbsPdf(name,title),
107  _x("x","Observable",this,xpdf),
108  _nEvents(0),
109  _dataPts(0),
110  _dataWgts(0),
111  _weights(0),
112  _mirrorLeft(mirror==MirrorLeft || mirror==MirrorBoth || mirror==MirrorLeftAsymRight),
113  _mirrorRight(mirror==MirrorRight || mirror==MirrorBoth || mirror==MirrorAsymLeftRight),
114  _asymLeft(mirror==MirrorAsymLeft || mirror==MirrorAsymLeftRight || mirror==MirrorAsymBoth),
115  _asymRight(mirror==MirrorAsymRight || mirror==MirrorLeftAsymRight || mirror==MirrorAsymBoth),
116  _rho(rho)
117 {
118  snprintf(_varName, 128,"%s", xdata.GetName());
119  RooAbsRealLValue& real= (RooRealVar&)(xdata);
120  _lo = real.getMin();
121  _hi = real.getMax();
122  _binWidth = (_hi-_lo)/(_nPoints-1);
123 
124  // form the lookup table
125  LoadDataSet(data);
127 }
128 
129 
130 
131 ////////////////////////////////////////////////////////////////////////////////
132 
133 RooKeysPdf::RooKeysPdf(const RooKeysPdf& other, const char* name):
134  RooAbsPdf(other,name), _x("x",this,other._x), _nEvents(other._nEvents),
135  _dataPts(0), _dataWgts(0), _weights(0), _sumWgt(0),
136  _mirrorLeft( other._mirrorLeft ), _mirrorRight( other._mirrorRight ),
137  _asymLeft(other._asymLeft), _asymRight(other._asymRight),
138  _rho( other._rho ) {
139  // cache stuff about x
140  snprintf(_varName, 128, "%s", other._varName );
141  _lo = other._lo;
142  _hi = other._hi;
143  _binWidth = other._binWidth;
144 
145  // copy over data and weights... not necessary, commented out for speed
146 // _dataPts = new Double_t[_nEvents];
147 // _weights = new Double_t[_nEvents];
148 // for (Int_t i= 0; i<_nEvents; i++) {
149 // _dataPts[i]= other._dataPts[i];
150 // _weights[i]= other._weights[i];
151 // }
152 
153  // copy over the lookup table
154  for (Int_t i= 0; i<_nPoints+1; i++)
155  _lookupTable[i]= other._lookupTable[i];
156 
158 }
159 
160 
161 ////////////////////////////////////////////////////////////////////////////////
162 
164  delete[] _dataPts;
165  delete[] _dataWgts;
166  delete[] _weights;
167 
169 }
170 
171 
172 
173 ////////////////////////////////////////////////////////////////////////////////
174 /// small helper structure
175 
176 namespace {
177  struct Data {
178  Double_t x;
179  Double_t w;
180  };
181  // helper to order two Data structures
182  struct cmp {
183  inline bool operator()(const struct Data& a, const struct Data& b) const
184  { return a.x < b.x; }
185  };
186 }
188  delete[] _dataPts;
189  delete[] _dataWgts;
190  delete[] _weights;
191 
192  std::vector<Data> tmp;
193  tmp.reserve((1 + _mirrorLeft + _mirrorRight) * data.numEntries());
194  Double_t x0 = 0., x1 = 0., x2 = 0.;
195  _sumWgt = 0.;
196  // read the data set into tmp and accumulate some statistics
197  RooRealVar& real = (RooRealVar&)(data.get()->operator[](_varName));
198  for (Int_t i = 0; i < data.numEntries(); ++i) {
199  data.get(i);
200  const Double_t x = real.getVal();
201  const Double_t w = data.weight();
202  x0 += w;
203  x1 += w * x;
204  x2 += w * x * x;
206 
207  Data p;
208  p.x = x, p.w = w;
209  tmp.push_back(p);
210  if (_mirrorLeft) {
211  p.x = 2. * _lo - x;
212  tmp.push_back(p);
213  }
214  if (_mirrorRight) {
215  p.x = 2. * _hi - x;
216  tmp.push_back(p);
217  }
218  }
219  // sort the entire data set so that values of x are increasing
220  std::sort(tmp.begin(), tmp.end(), cmp());
221 
222  // copy the sorted data set to its final destination
223  _nEvents = tmp.size();
224  _dataPts = new Double_t[_nEvents];
225  _dataWgts = new Double_t[_nEvents];
226  for (unsigned i = 0; i < tmp.size(); ++i) {
227  _dataPts[i] = tmp[i].x;
228  _dataWgts[i] = tmp[i].w;
229  }
230  {
231  // free tmp
232  std::vector<Data> tmp2;
233  tmp2.swap(tmp);
234  }
235 
236  Double_t meanv=x1/x0;
237  Double_t sigmav=std::sqrt(x2/x0-meanv*meanv);
238  Double_t h=std::pow(Double_t(4)/Double_t(3),0.2)*std::pow(_nEvents,-0.2)*_rho;
239  Double_t hmin=h*sigmav*std::sqrt(2.)/10;
240  Double_t norm=h*std::sqrt(sigmav)/(2.0*std::sqrt(3.0));
241 
243  for(Int_t j=0;j<_nEvents;++j) {
244  _weights[j]=norm/std::sqrt(g(_dataPts[j],h*sigmav));
245  if (_weights[j]<hmin) _weights[j]=hmin;
246  }
247 
248  // The idea below is that beyond nSigma sigma, the value of the exponential
249  // in the Gaussian is well below the machine precision of a double, so it
250  // does not contribute any more. That way, we can limit how many bins of the
251  // binned approximation in _lookupTable we have to touch when filling it.
252  for (Int_t i=0;i<_nPoints+1;++i) _lookupTable[i] = 0.;
253  for(Int_t j=0;j<_nEvents;++j) {
254  const Double_t xlo = std::min(_hi,
255  std::max(_lo, _dataPts[j] - _nSigma * _weights[j]));
256  const Double_t xhi = std::max(_lo,
257  std::min(_hi, _dataPts[j] + _nSigma * _weights[j]));
258  if (xlo >= xhi) continue;
259  const Double_t chi2incr = _binWidth / _weights[j] / std::sqrt(2.);
260  const Double_t weightratio = _dataWgts[j] / _weights[j];
261  const Int_t binlo = static_cast<Int_t>(std::floor((xlo - _lo) / _binWidth));
262  const Int_t binhi = static_cast<Int_t>(_nPoints - std::floor((_hi - xhi) / _binWidth));
263  const Double_t x = (Double_t(_nPoints - binlo) * _lo +
264  Double_t(binlo) * _hi) / Double_t(_nPoints);
265  Double_t chi = (x - _dataPts[j]) / _weights[j] / std::sqrt(2.);
266  for (Int_t k = binlo; k <= binhi; ++k, chi += chi2incr) {
267  _lookupTable[k] += weightratio * std::exp(- chi * chi);
268  }
269  }
270  if (_asymLeft) {
271  for(Int_t j=0;j<_nEvents;++j) {
272  const Double_t xlo = std::min(_hi,
273  std::max(_lo, 2. * _lo - _dataPts[j] + _nSigma * _weights[j]));
274  const Double_t xhi = std::max(_lo,
275  std::min(_hi, 2. * _lo - _dataPts[j] - _nSigma * _weights[j]));
276  if (xlo >= xhi) continue;
277  const Double_t chi2incr = _binWidth / _weights[j] / std::sqrt(2.);
278  const Double_t weightratio = _dataWgts[j] / _weights[j];
279  const Int_t binlo = static_cast<Int_t>(std::floor((xlo - _lo) / _binWidth));
280  const Int_t binhi = static_cast<Int_t>(_nPoints - std::floor((_hi - xhi) / _binWidth));
281  const Double_t x = (Double_t(_nPoints - binlo) * _lo +
282  Double_t(binlo) * _hi) / Double_t(_nPoints);
283  Double_t chi = (x - (2. * _lo - _dataPts[j])) / _weights[j] / std::sqrt(2.);
284  for (Int_t k = binlo; k <= binhi; ++k, chi += chi2incr) {
285  _lookupTable[k] -= weightratio * std::exp(- chi * chi);
286  }
287  }
288  }
289  if (_asymRight) {
290  for(Int_t j=0;j<_nEvents;++j) {
291  const Double_t xlo = std::min(_hi,
292  std::max(_lo, 2. * _hi - _dataPts[j] + _nSigma * _weights[j]));
293  const Double_t xhi = std::max(_lo,
294  std::min(_hi, 2. * _hi - _dataPts[j] - _nSigma * _weights[j]));
295  if (xlo >= xhi) continue;
296  const Double_t chi2incr = _binWidth / _weights[j] / std::sqrt(2.);
297  const Double_t weightratio = _dataWgts[j] / _weights[j];
298  const Int_t binlo = static_cast<Int_t>(std::floor((xlo - _lo) / _binWidth));
299  const Int_t binhi = static_cast<Int_t>(_nPoints - std::floor((_hi - xhi) / _binWidth));
300  const Double_t x = (Double_t(_nPoints - binlo) * _lo +
301  Double_t(binlo) * _hi) / Double_t(_nPoints);
302  Double_t chi = (x - (2. * _hi - _dataPts[j])) / _weights[j] / std::sqrt(2.);
303  for (Int_t k = binlo; k <= binhi; ++k, chi += chi2incr) {
304  _lookupTable[k] -= weightratio * std::exp(- chi * chi);
305  }
306  }
307  }
308  static const Double_t sqrt2pi(std::sqrt(2*TMath::Pi()));
309  for (Int_t i=0;i<_nPoints+1;++i)
310  _lookupTable[i] /= sqrt2pi * _sumWgt;
311 }
312 
313 ////////////////////////////////////////////////////////////////////////////////
314 
317  if (i<0) {
318 // cerr << "got point below lower bound:"
319 // << Double_t(_x) << " < " << _lo
320 // << " -- performing linear extrapolation..." << endl;
321  i=0;
322  }
323  if (i>_nPoints-1) {
324 // cerr << "got point above upper bound:"
325 // << Double_t(_x) << " > " << _hi
326 // << " -- performing linear extrapolation..." << endl;
327  i=_nPoints-1;
328  }
330 
331  // for now do simple linear interpolation.
332  // one day replace by splines...
333  Double_t ret = (_lookupTable[i]+dx*(_lookupTable[i+1]-_lookupTable[i]));
334  if (ret<0) ret=0 ;
335  return ret ;
336 }
337 
339  RooArgSet& allVars, RooArgSet& analVars, const char* /* rangeName */) const
340 {
341  if (matchArgs(allVars, analVars, _x)) return 1;
342  return 0;
343 }
344 
345 Double_t RooKeysPdf::analyticalIntegral(Int_t code, const char* rangeName) const
346 {
347  R__ASSERT(1 == code);
348  // this code is based on _lookupTable and uses linear interpolation, just as
349  // evaluate(); integration is done using the trapez rule
350  const Double_t xmin = std::max(_lo, _x.min(rangeName));
351  const Double_t xmax = std::min(_hi, _x.max(rangeName));
352  const Int_t imin = (Int_t)floor((xmin - _lo) / _binWidth);
353  const Int_t imax = std::min((Int_t)floor((xmax - _lo) / _binWidth),
354  _nPoints - 1);
355  Double_t sum = 0.;
356  // sum up complete bins in middle
357  if (imin + 1 < imax)
358  sum += _lookupTable[imin + 1] + _lookupTable[imax];
359  for (Int_t i = imin + 2; i < imax; ++i)
360  sum += 2. * _lookupTable[i];
361  sum *= _binWidth * 0.5;
362  // treat incomplete bins
363  const Double_t dxmin = (xmin - (_lo + imin * _binWidth)) / _binWidth;
364  const Double_t dxmax = (xmax - (_lo + imax * _binWidth)) / _binWidth;
365  if (imin < imax) {
366  // first bin
367  sum += _binWidth * (1. - dxmin) * 0.5 * (_lookupTable[imin + 1] +
368  _lookupTable[imin] + dxmin *
369  (_lookupTable[imin + 1] - _lookupTable[imin]));
370  // last bin
371  sum += _binWidth * dxmax * 0.5 * (_lookupTable[imax] +
372  _lookupTable[imax] + dxmax *
373  (_lookupTable[imax + 1] - _lookupTable[imax]));
374  } else if (imin == imax) {
375  // first bin == last bin
376  sum += _binWidth * (dxmax - dxmin) * 0.5 * (
377  _lookupTable[imin] + dxmin *
378  (_lookupTable[imin + 1] - _lookupTable[imin]) +
379  _lookupTable[imax] + dxmax *
380  (_lookupTable[imax + 1] - _lookupTable[imax]));
381  }
382  return sum;
383 }
384 
386 {
387  if (vars.contains(*_x.absArg())) return 1;
388  return 0;
389 }
390 
392 {
393  R__ASSERT(1 == code);
395  for (Int_t i = 0; i <= _nPoints; ++i)
396  if (max < _lookupTable[i]) max = _lookupTable[i];
397  return max;
398 }
399 
400 
401 ////////////////////////////////////////////////////////////////////////////////
402 
404  Double_t y=0;
405  // since data is sorted, we can be a little faster because we know which data
406  // points contribute
407  Double_t* it = std::lower_bound(_dataPts, _dataPts + _nEvents,
408  x - _nSigma * sigmav);
409  if (it >= (_dataPts + _nEvents)) return 0.;
410  Double_t* iend = std::upper_bound(it, _dataPts + _nEvents,
411  x + _nSigma * sigmav);
412  for ( ; it < iend; ++it) {
413  const Double_t r = (x - *it) / sigmav;
414  y += std::exp(-0.5 * r * r);
415  }
416 
417  static const Double_t sqrt2pi(std::sqrt(2*TMath::Pi()));
418  return y/(sigmav*sqrt2pi*_nEvents);
419 }
virtual Double_t analyticalIntegral(Int_t code, const char *rangeName=0) const
Implements the actual analytical integral(s) advertised by getAnalyticalIntegral. ...
Definition: RooKeysPdf.cxx:345
Double_t _lookupTable[_nPoints+1]
Definition: RooKeysPdf.h:67
Double_t * _dataWgts
Definition: RooKeysPdf.h:62
float xmin
Definition: THbookFile.cxx:93
static Vc_ALWAYS_INLINE int_v min(const int_v &x, const int_v &y)
Definition: vector.h:433
Double_t _lo
Definition: RooKeysPdf.h:76
Double_t * _weights
Definition: RooKeysPdf.h:63
TH1 * h
Definition: legend2.C:5
virtual Double_t maxVal(Int_t code) const
Return maximum value for set of observables identified by code assigned in getMaxVal.
Definition: RooKeysPdf.cxx:391
Bool_t _mirrorLeft
Definition: RooKeysPdf.h:71
Bool_t contains(const RooAbsArg &var) const
Double_t g(Double_t x, Double_t sigma) const
Definition: RooKeysPdf.cxx:403
Double_t _sumWgt
Definition: RooKeysPdf.h:64
#define R__ASSERT(e)
Definition: TError.h:98
virtual Int_t getAnalyticalIntegral(RooArgSet &allVars, RooArgSet &analVars, const char *rangeName=0) const
Interface function getAnalyticalIntergral advertises the analytical integrals that are supported...
Definition: RooKeysPdf.cxx:338
int Int_t
Definition: RtypesCore.h:41
TArc * a
Definition: textangle.C:12
const Bool_t kFALSE
Definition: Rtypes.h:92
const RooAbsReal & arg() const
Definition: RooRealProxy.h:43
double sqrt(double)
static const double x2[5]
#define TRACE_CREATE
Definition: RooTrace.h:23
Double_t x[n]
Definition: legend1.C:17
Class RooKeysPdf implements a one-dimensional kernel estimation p.d.f which model the distribution of...
Definition: RooKeysPdf.h:25
double pow(double, double)
Double_t evaluate() const
Definition: RooKeysPdf.cxx:315
Bool_t _asymRight
Definition: RooKeysPdf.h:72
std::vector< std::vector< double > > Data
Double_t getVal(const RooArgSet *set=0) const
Definition: RooAbsReal.h:64
void LoadDataSet(RooDataSet &data)
Definition: RooKeysPdf.cxx:187
Double_t _binWidth
Definition: RooKeysPdf.h:76
static const Double_t _nSigma
Definition: RooKeysPdf.h:58
RooRealVar represents a fundamental (non-derived) real valued object.
Definition: RooRealVar.h:37
virtual Int_t getMaxVal(const RooArgSet &vars) const
Advertise capability to determine maximum value of function for given set of observables.
Definition: RooKeysPdf.cxx:385
virtual ~RooKeysPdf()
Definition: RooKeysPdf.cxx:163
Char_t _varName[128]
Definition: RooKeysPdf.h:75
ROOT::R::TRInterface & r
Definition: Object.C:4
virtual Double_t weight() const
Return event weight of current event.
TPaveLabel title(3, 27.1, 15, 28.7,"ROOT Environment and Tools")
virtual Int_t numEntries() const
Definition: RooAbsData.cxx:293
double floor(double)
tuple w
Definition: qtexample.py:51
Double_t _hi
Definition: RooKeysPdf.h:76
RooAbsArg * absArg() const
Definition: RooArgProxy.h:37
virtual const char * GetName() const
Returns name of object.
Definition: TNamed.h:51
float xmax
Definition: THbookFile.cxx:93
Int_t _nEvents
Definition: RooKeysPdf.h:60
RooDataSet is a container class to hold unbinned data.
Definition: RooDataSet.h:29
Bool_t _mirrorRight
Definition: RooKeysPdf.h:71
REAL epsilon
Definition: triangle.c:617
Double_t _rho
Definition: RooKeysPdf.h:77
Double_t Pi()
Definition: TMath.h:44
TRObject operator()(const T1 &t1) const
static const double x1[5]
#define ClassImp(name)
Definition: Rtypes.h:279
double Double_t
Definition: RtypesCore.h:55
RooAbsReal is the common abstract base class for objects that represent a real value and implements f...
Definition: RooAbsReal.h:53
Double_t y[n]
Definition: legend1.C:17
#define TRACE_DESTROY
Definition: RooTrace.h:24
static Vc_ALWAYS_INLINE int_v max(const int_v &x, const int_v &y)
Definition: vector.h:440
#define name(a, b)
Definition: linkTestLib0.cpp:5
Bool_t _asymLeft
Definition: RooKeysPdf.h:72
RooAbsRealLValue is the common abstract base class for objects that represent a real value that may a...
RooAbsPdf is the abstract interface for all probability density functions The class provides hybrid a...
Definition: RooAbsPdf.h:41
Double_t min(const char *rname=0) const
Definition: RooRealProxy.h:56
RooRealProxy _x
Definition: RooKeysPdf.h:52
Bool_t matchArgs(const RooArgSet &allDeps, RooArgSet &numDeps, const RooArgProxy &a) const
Utility function for use in getAnalyticalIntegral().
double exp(double)
double norm(double *x, double *p)
Definition: unuranDistr.cxx:40
virtual const RooArgSet * get(Int_t index) const
Return RooArgSet with coordinates of event 'index'.
Double_t max(const char *rname=0) const
Definition: RooRealProxy.h:57
double log(double)
Double_t * _dataPts
Definition: RooKeysPdf.h:61