Logo ROOT  
Reference Guide
RooKeysPdf.cxx
Go to the documentation of this file.
1/*****************************************************************************
2 * Project: RooFit *
3 * Package: RooFitModels *
4 * @(#)root/roofit:$Id$
5 * Authors: *
6 * GR, Gerhard Raven, UC San Diego, raven@slac.stanford.edu *
7 * DK, David Kirkby, UC Irvine, dkirkby@uci.edu *
8 * WV, Wouter Verkerke, UC Santa Barbara, verkerke@slac.stanford.edu *
9 * *
10 * Copyright (c) 2000-2005, Regents of the University of California *
11 * and Stanford University. All rights reserved. *
12 * *
13 * Redistribution and use in source and binary forms, *
14 * with or without modification, are permitted according to the terms *
15 * listed in LICENSE (http://roofit.sourceforge.net/license.txt) *
16 *****************************************************************************/
17
18/** \class RooKeysPdf
19 \ingroup Roofit
20
21Class RooKeysPdf implements a one-dimensional kernel estimation p.d.f which model the distribution
22of an arbitrary input dataset as a superposition of Gaussian kernels, one for each data point,
23each contributing 1/N to the total integral of the pdf.
24If the 'adaptive mode' is enabled, the width of the Gaussian is adaptively calculated from the
25local density of events, i.e. narrow for regions with high event density to preserve details and
26wide for regions with low event density to promote smoothness. The details of the general algorithm
27are described in the following paper:
28
29Cranmer KS, Kernel Estimation in High-Energy Physics.
30 Computer Physics Communications 136:198-207,2001 - e-Print Archive: hep ex/0011057
31**/
32
33#include <limits>
34#include <algorithm>
35#include <cmath>
36#include <iostream>
37#include "TMath.h"
38#include "snprintf.h"
39#include "RooKeysPdf.h"
40#include "RooAbsReal.h"
41#include "RooRealVar.h"
42#include "RooRandom.h"
43#include "RooDataSet.h"
44#include "RooTrace.h"
45
46#include "TError.h"
47
48using namespace std;
49
51
52const double RooKeysPdf::_nSigma = std::sqrt(-2. *
54
55////////////////////////////////////////////////////////////////////////////////
56/// coverity[UNINIT_CTOR]
57
58 RooKeysPdf::RooKeysPdf() : _nEvents(0), _dataPts(0), _dataWgts(0), _weights(0), _sumWgt(0),
59 _mirrorLeft(false), _mirrorRight(false),
60 _asymLeft(false), _asymRight(false)
61{
63}
64
65////////////////////////////////////////////////////////////////////////////////
66/// cache stuff about x
67
68RooKeysPdf::RooKeysPdf(const char *name, const char *title,
70 Mirror mirror, double rho) :
71 RooAbsPdf(name,title),
72 _x("x","observable",this,x),
73 _nEvents(0),
74 _dataPts(0),
75 _dataWgts(0),
76 _weights(0),
77 _mirrorLeft(mirror==MirrorLeft || mirror==MirrorBoth || mirror==MirrorLeftAsymRight),
78 _mirrorRight(mirror==MirrorRight || mirror==MirrorBoth || mirror==MirrorAsymLeftRight),
79 _asymLeft(mirror==MirrorAsymLeft || mirror==MirrorAsymLeftRight || mirror==MirrorAsymBoth),
80 _asymRight(mirror==MirrorAsymRight || mirror==MirrorLeftAsymRight || mirror==MirrorAsymBoth),
81 _rho(rho)
82{
83 snprintf(_varName, 128,"%s", x.GetName());
84 RooAbsRealLValue& real= (RooRealVar&)(_x.arg());
85 _lo = real.getMin();
86 _hi = real.getMax();
87 _binWidth = (_hi-_lo)/(_nPoints-1);
88
89 // form the lookup table
92}
93
94////////////////////////////////////////////////////////////////////////////////
95/// cache stuff about x
96
97RooKeysPdf::RooKeysPdf(const char *name, const char *title,
98 RooAbsReal& xpdf, RooRealVar& xdata, RooDataSet& data,
99 Mirror mirror, double rho) :
100 RooAbsPdf(name,title),
101 _x("x","Observable",this,xpdf),
102 _nEvents(0),
103 _dataPts(0),
104 _dataWgts(0),
105 _weights(0),
106 _mirrorLeft(mirror==MirrorLeft || mirror==MirrorBoth || mirror==MirrorLeftAsymRight),
107 _mirrorRight(mirror==MirrorRight || mirror==MirrorBoth || mirror==MirrorAsymLeftRight),
108 _asymLeft(mirror==MirrorAsymLeft || mirror==MirrorAsymLeftRight || mirror==MirrorAsymBoth),
109 _asymRight(mirror==MirrorAsymRight || mirror==MirrorLeftAsymRight || mirror==MirrorAsymBoth),
110 _rho(rho)
111{
112 snprintf(_varName, 128,"%s", xdata.GetName());
113 RooAbsRealLValue& real= (RooRealVar&)(xdata);
114 _lo = real.getMin();
115 _hi = real.getMax();
116 _binWidth = (_hi-_lo)/(_nPoints-1);
117
118 // form the lookup table
121}
122
123////////////////////////////////////////////////////////////////////////////////
124
125RooKeysPdf::RooKeysPdf(const RooKeysPdf& other, const char* name):
126 RooAbsPdf(other,name), _x("x",this,other._x), _nEvents(other._nEvents),
127 _dataPts(0), _dataWgts(0), _weights(0), _sumWgt(0),
128 _mirrorLeft( other._mirrorLeft ), _mirrorRight( other._mirrorRight ),
129 _asymLeft(other._asymLeft), _asymRight(other._asymRight),
130 _rho( other._rho ) {
131 // cache stuff about x
132 snprintf(_varName, 128, "%s", other._varName );
133 _lo = other._lo;
134 _hi = other._hi;
135 _binWidth = other._binWidth;
136
137 // copy over data and weights... not necessary, commented out for speed
138// _dataPts = new double[_nEvents];
139// _weights = new double[_nEvents];
140// for (Int_t i= 0; i<_nEvents; i++) {
141// _dataPts[i]= other._dataPts[i];
142// _weights[i]= other._weights[i];
143// }
144
145 // copy over the lookup table
146 for (Int_t i= 0; i<_nPoints+1; i++)
147 _lookupTable[i]= other._lookupTable[i];
148
150}
151
152////////////////////////////////////////////////////////////////////////////////
153
155 delete[] _dataPts;
156 delete[] _dataWgts;
157 delete[] _weights;
158
160}
161
162////////////////////////////////////////////////////////////////////////////////
163/// small helper structure
164
165namespace {
166 struct Data {
167 double x;
168 double w;
169 };
170 // helper to order two Data structures
171 struct cmp {
172 inline bool operator()(const struct Data& a, const struct Data& b) const
173 { return a.x < b.x; }
174 };
175}
177 delete[] _dataPts;
178 delete[] _dataWgts;
179 delete[] _weights;
180
181 std::vector<Data> tmp;
182 tmp.reserve((1 + _mirrorLeft + _mirrorRight) * data.numEntries());
183 double x0 = 0., x1 = 0., x2 = 0.;
184 _sumWgt = 0.;
185 // read the data set into tmp and accumulate some statistics
186 RooRealVar& real = (RooRealVar&)(data.get()->operator[](_varName));
187 for (Int_t i = 0; i < data.numEntries(); ++i) {
188 data.get(i);
189 const double x = real.getVal();
190 const double w = data.weight();
191 x0 += w;
192 x1 += w * x;
193 x2 += w * x * x;
195
196 Data p;
197 p.x = x, p.w = w;
198 tmp.push_back(p);
199 if (_mirrorLeft) {
200 p.x = 2. * _lo - x;
201 tmp.push_back(p);
202 }
203 if (_mirrorRight) {
204 p.x = 2. * _hi - x;
205 tmp.push_back(p);
206 }
207 }
208 // sort the entire data set so that values of x are increasing
209 std::sort(tmp.begin(), tmp.end(), cmp());
210
211 // copy the sorted data set to its final destination
212 _nEvents = tmp.size();
213 _dataPts = new double[_nEvents];
214 _dataWgts = new double[_nEvents];
215 for (unsigned i = 0; i < tmp.size(); ++i) {
216 _dataPts[i] = tmp[i].x;
217 _dataWgts[i] = tmp[i].w;
218 }
219 {
220 // free tmp
221 std::vector<Data> tmp2;
222 tmp2.swap(tmp);
223 }
224
225 double meanv=x1/x0;
226 double sigmav=std::sqrt(x2/x0-meanv*meanv);
227 double h=std::pow(double(4)/double(3),0.2)*std::pow(_nEvents,-0.2)*_rho;
228 double hmin=h*sigmav*std::sqrt(2.)/10;
229 double norm=h*std::sqrt(sigmav)/(2.0*std::sqrt(3.0));
230
231 _weights=new double[_nEvents];
232 for(Int_t j=0;j<_nEvents;++j) {
233 _weights[j]=norm/std::sqrt(g(_dataPts[j],h*sigmav));
234 if (_weights[j]<hmin) _weights[j]=hmin;
235 }
236
237 // The idea below is that beyond nSigma sigma, the value of the exponential
238 // in the Gaussian is well below the machine precision of a double, so it
239 // does not contribute any more. That way, we can limit how many bins of the
240 // binned approximation in _lookupTable we have to touch when filling it.
241 for (Int_t i=0;i<_nPoints+1;++i) _lookupTable[i] = 0.;
242 for(Int_t j=0;j<_nEvents;++j) {
243 const double xlo = std::min(_hi,
244 std::max(_lo, _dataPts[j] - _nSigma * _weights[j]));
245 const double xhi = std::max(_lo,
246 std::min(_hi, _dataPts[j] + _nSigma * _weights[j]));
247 if (xlo >= xhi) continue;
248 const double chi2incr = _binWidth / _weights[j] / std::sqrt(2.);
249 const double weightratio = _dataWgts[j] / _weights[j];
250 const Int_t binlo = static_cast<Int_t>(std::floor((xlo - _lo) / _binWidth));
251 const Int_t binhi = static_cast<Int_t>(_nPoints - std::floor((_hi - xhi) / _binWidth));
252 const double x = (double(_nPoints - binlo) * _lo +
253 double(binlo) * _hi) / double(_nPoints);
254 double chi = (x - _dataPts[j]) / _weights[j] / std::sqrt(2.);
255 for (Int_t k = binlo; k <= binhi; ++k, chi += chi2incr) {
256 _lookupTable[k] += weightratio * std::exp(- chi * chi);
257 }
258 }
259 if (_asymLeft) {
260 for(Int_t j=0;j<_nEvents;++j) {
261 const double xlo = std::min(_hi,
262 std::max(_lo, 2. * _lo - _dataPts[j] + _nSigma * _weights[j]));
263 const double xhi = std::max(_lo,
264 std::min(_hi, 2. * _lo - _dataPts[j] - _nSigma * _weights[j]));
265 if (xlo >= xhi) continue;
266 const double chi2incr = _binWidth / _weights[j] / std::sqrt(2.);
267 const double weightratio = _dataWgts[j] / _weights[j];
268 const Int_t binlo = static_cast<Int_t>(std::floor((xlo - _lo) / _binWidth));
269 const Int_t binhi = static_cast<Int_t>(_nPoints - std::floor((_hi - xhi) / _binWidth));
270 const double x = (double(_nPoints - binlo) * _lo +
271 double(binlo) * _hi) / double(_nPoints);
272 double chi = (x - (2. * _lo - _dataPts[j])) / _weights[j] / std::sqrt(2.);
273 for (Int_t k = binlo; k <= binhi; ++k, chi += chi2incr) {
274 _lookupTable[k] -= weightratio * std::exp(- chi * chi);
275 }
276 }
277 }
278 if (_asymRight) {
279 for(Int_t j=0;j<_nEvents;++j) {
280 const double xlo = std::min(_hi,
281 std::max(_lo, 2. * _hi - _dataPts[j] + _nSigma * _weights[j]));
282 const double xhi = std::max(_lo,
283 std::min(_hi, 2. * _hi - _dataPts[j] - _nSigma * _weights[j]));
284 if (xlo >= xhi) continue;
285 const double chi2incr = _binWidth / _weights[j] / std::sqrt(2.);
286 const double weightratio = _dataWgts[j] / _weights[j];
287 const Int_t binlo = static_cast<Int_t>(std::floor((xlo - _lo) / _binWidth));
288 const Int_t binhi = static_cast<Int_t>(_nPoints - std::floor((_hi - xhi) / _binWidth));
289 const double x = (double(_nPoints - binlo) * _lo +
290 double(binlo) * _hi) / double(_nPoints);
291 double chi = (x - (2. * _hi - _dataPts[j])) / _weights[j] / std::sqrt(2.);
292 for (Int_t k = binlo; k <= binhi; ++k, chi += chi2incr) {
293 _lookupTable[k] -= weightratio * std::exp(- chi * chi);
294 }
295 }
296 }
297 static const double sqrt2pi(std::sqrt(2*TMath::Pi()));
298 for (Int_t i=0;i<_nPoints+1;++i)
299 _lookupTable[i] /= sqrt2pi * _sumWgt;
300}
301
302////////////////////////////////////////////////////////////////////////////////
303
304double RooKeysPdf::evaluate() const {
305 Int_t i = (Int_t)floor((double(_x)-_lo)/_binWidth);
306 if (i<0) {
307// cerr << "got point below lower bound:"
308// << double(_x) << " < " << _lo
309// << " -- performing linear extrapolation..." << endl;
310 i=0;
311 }
312 if (i>_nPoints-1) {
313// cerr << "got point above upper bound:"
314// << double(_x) << " > " << _hi
315// << " -- performing linear extrapolation..." << endl;
316 i=_nPoints-1;
317 }
318 double dx = (double(_x)-(_lo+i*_binWidth))/_binWidth;
319
320 // for now do simple linear interpolation.
321 // one day replace by splines...
322 double ret = (_lookupTable[i]+dx*(_lookupTable[i+1]-_lookupTable[i]));
323 if (ret<0) ret=0 ;
324 return ret ;
325}
326
328 RooArgSet& allVars, RooArgSet& analVars, const char* /* rangeName */) const
329{
330 if (matchArgs(allVars, analVars, _x)) return 1;
331 return 0;
332}
333
334double RooKeysPdf::analyticalIntegral(Int_t code, const char* rangeName) const
335{
336 R__ASSERT(1 == code);
337 // this code is based on _lookupTable and uses linear interpolation, just as
338 // evaluate(); integration is done using the trapez rule
339 const double xmin = std::max(_lo, _x.min(rangeName));
340 const double xmax = std::min(_hi, _x.max(rangeName));
341 const Int_t imin = (Int_t)floor((xmin - _lo) / _binWidth);
342 const Int_t imax = std::min((Int_t)floor((xmax - _lo) / _binWidth),
343 _nPoints - 1);
344 double sum = 0.;
345 // sum up complete bins in middle
346 if (imin + 1 < imax)
347 sum += _lookupTable[imin + 1] + _lookupTable[imax];
348 for (Int_t i = imin + 2; i < imax; ++i)
349 sum += 2. * _lookupTable[i];
350 sum *= _binWidth * 0.5;
351 // treat incomplete bins
352 const double dxmin = (xmin - (_lo + imin * _binWidth)) / _binWidth;
353 const double dxmax = (xmax - (_lo + imax * _binWidth)) / _binWidth;
354 if (imin < imax) {
355 // first bin
356 sum += _binWidth * (1. - dxmin) * 0.5 * (_lookupTable[imin + 1] +
357 _lookupTable[imin] + dxmin *
358 (_lookupTable[imin + 1] - _lookupTable[imin]));
359 // last bin
360 sum += _binWidth * dxmax * 0.5 * (_lookupTable[imax] +
361 _lookupTable[imax] + dxmax *
362 (_lookupTable[imax + 1] - _lookupTable[imax]));
363 } else if (imin == imax) {
364 // first bin == last bin
365 sum += _binWidth * (dxmax - dxmin) * 0.5 * (
366 _lookupTable[imin] + dxmin *
367 (_lookupTable[imin + 1] - _lookupTable[imin]) +
368 _lookupTable[imax] + dxmax *
369 (_lookupTable[imax + 1] - _lookupTable[imax]));
370 }
371 return sum;
372}
373
375{
376 if (vars.contains(*_x.absArg())) return 1;
377 return 0;
378}
379
380double RooKeysPdf::maxVal(Int_t code) const
381{
382 R__ASSERT(1 == code);
383 double max = -std::numeric_limits<double>::max();
384 for (Int_t i = 0; i <= _nPoints; ++i)
385 if (max < _lookupTable[i]) max = _lookupTable[i];
386 return max;
387}
388
389////////////////////////////////////////////////////////////////////////////////
390
391double RooKeysPdf::g(double x,double sigmav) const {
392 double y=0;
393 // since data is sorted, we can be a little faster because we know which data
394 // points contribute
395 double* it = std::lower_bound(_dataPts, _dataPts + _nEvents,
396 x - _nSigma * sigmav);
397 if (it >= (_dataPts + _nEvents)) return 0.;
398 double* iend = std::upper_bound(it, _dataPts + _nEvents,
399 x + _nSigma * sigmav);
400 for ( ; it < iend; ++it) {
401 const double r = (x - *it) / sigmav;
402 y += std::exp(-0.5 * r * r);
403 }
404
405 static const double sqrt2pi(std::sqrt(2*TMath::Pi()));
406 return y/(sigmav*sqrt2pi*_nEvents);
407}
#define h(i)
Definition: RSha256.hxx:106
#define TRACE_DESTROY
Definition: RooTrace.h:24
#define TRACE_CREATE
Definition: RooTrace.h:23
int Int_t
Definition: RtypesCore.h:45
#define ClassImp(name)
Definition: Rtypes.h:375
#define R__ASSERT(e)
Definition: TError.h:118
winID h TVirtualViewer3D TVirtualGLPainter p
Option_t Option_t TPoint TPoint const char GetTextMagnitude GetFillStyle GetLineColor GetLineWidth GetMarkerStyle GetTextAlign GetTextColor GetTextSize void data
Option_t Option_t TPoint TPoint const char GetTextMagnitude GetFillStyle GetLineColor GetLineWidth GetMarkerStyle GetTextAlign GetTextColor GetTextSize void char Point_t Rectangle_t hmin
Option_t Option_t TPoint TPoint const char GetTextMagnitude GetFillStyle GetLineColor GetLineWidth GetMarkerStyle GetTextAlign GetTextColor GetTextSize void char Point_t Rectangle_t WindowAttributes_t Float_t Float_t Float_t b
Option_t Option_t TPoint TPoint const char GetTextMagnitude GetFillStyle GetLineColor GetLineWidth GetMarkerStyle GetTextAlign GetTextColor GetTextSize void char Point_t Rectangle_t WindowAttributes_t Float_t r
Option_t Option_t TPoint TPoint const char x2
Option_t Option_t TPoint TPoint const char x1
char name[80]
Definition: TGX11.cxx:110
float xmin
Definition: THbookFile.cxx:95
float xmax
Definition: THbookFile.cxx:95
TRObject operator()(const T1 &t1) const
#define snprintf
Definition: civetweb.c:1540
bool contains(const RooAbsArg &var) const
Check if collection contains an argument with the same name as var.
RooAbsRealLValue is the common abstract base class for objects that represent a real value that may a...
virtual double getMax(const char *name=0) const
Get maximum of currently defined range.
virtual double getMin(const char *name=0) const
Get minimum of currently defined range.
RooAbsReal is the common abstract base class for objects that represent a real value and implements f...
Definition: RooAbsReal.h:64
double getVal(const RooArgSet *normalisationSet=nullptr) const
Evaluate object.
Definition: RooAbsReal.h:94
bool matchArgs(const RooArgSet &allDeps, RooArgSet &numDeps, const RooArgProxy &a) const
Utility function for use in getAnalyticalIntegral().
RooAbsArg * absArg() const
Return pointer to contained argument.
Definition: RooArgProxy.h:40
RooArgSet is a container object that can hold multiple RooAbsArg objects.
Definition: RooArgSet.h:57
RooDataSet is a container class to hold unbinned data.
Definition: RooDataSet.h:55
Class RooKeysPdf implements a one-dimensional kernel estimation p.d.f which model the distribution of...
Definition: RooKeysPdf.h:25
Int_t getAnalyticalIntegral(RooArgSet &allVars, RooArgSet &analVars, const char *rangeName=0) const override
Interface function getAnalyticalIntergral advertises the analytical integrals that are supported.
Definition: RooKeysPdf.cxx:327
double _binWidth
Definition: RooKeysPdf.h:76
double _sumWgt
Definition: RooKeysPdf.h:64
double * _dataWgts
Definition: RooKeysPdf.h:62
RooKeysPdf()
coverity[UNINIT_CTOR]
Definition: RooKeysPdf.cxx:58
double _lookupTable[_nPoints+1]
Definition: RooKeysPdf.h:67
double maxVal(Int_t code) const override
Return maximum value for set of observables identified by code assigned in getMaxVal.
Definition: RooKeysPdf.cxx:380
bool _mirrorRight
Definition: RooKeysPdf.h:71
double _rho
Definition: RooKeysPdf.h:77
double _hi
Definition: RooKeysPdf.h:76
Char_t _varName[128]
Definition: RooKeysPdf.h:75
double g(double x, double sigma) const
Definition: RooKeysPdf.cxx:391
Int_t _nEvents
Definition: RooKeysPdf.h:60
void LoadDataSet(RooDataSet &data)
Definition: RooKeysPdf.cxx:176
RooRealProxy _x
Definition: RooKeysPdf.h:52
double analyticalIntegral(Int_t code, const char *rangeName=0) const override
Implements the actual analytical integral(s) advertised by getAnalyticalIntegral.
Definition: RooKeysPdf.cxx:334
double * _weights
Definition: RooKeysPdf.h:63
bool _mirrorLeft
Definition: RooKeysPdf.h:71
double evaluate() const override
Evaluate this PDF / function / constant. Needs to be overridden by all derived classes.
Definition: RooKeysPdf.cxx:304
~RooKeysPdf() override
Definition: RooKeysPdf.cxx:154
double _lo
Definition: RooKeysPdf.h:76
double * _dataPts
Definition: RooKeysPdf.h:61
bool _asymRight
Definition: RooKeysPdf.h:72
static const double _nSigma
Definition: RooKeysPdf.h:58
bool _asymLeft
Definition: RooKeysPdf.h:72
Int_t getMaxVal(const RooArgSet &vars) const override
Advertise capability to determine maximum value of function for given set of observables.
Definition: RooKeysPdf.cxx:374
RooRealVar represents a variable that can be changed from the outside.
Definition: RooRealVar.h:40
double min(const char *rname=0) const
Query lower limit of range. This requires the payload to be RooAbsRealLValue or derived.
double max(const char *rname=0) const
Query upper limit of range. This requires the payload to be RooAbsRealLValue or derived.
const T & arg() const
Return reference to object held in proxy.
const char * GetName() const override
Returns name of object.
Definition: TNamed.h:47
RVec< PromoteType< T > > floor(const RVec< T > &v)
Definition: RVec.hxx:1773
RVec< PromoteTypes< T0, T1 > > pow(const T0 &x, const RVec< T1 > &v)
Definition: RVec.hxx:1753
RVec< PromoteType< T > > log(const RVec< T > &v)
Definition: RVec.hxx:1748
RVec< PromoteType< T > > exp(const RVec< T > &v)
Definition: RVec.hxx:1744
Double_t y[n]
Definition: legend1.C:17
Double_t x[n]
Definition: legend1.C:17
VecExpr< UnaryOp< Sqrt< T >, VecExpr< A, T, D >, T >, T, D > sqrt(const VecExpr< A, T, D > &rhs)
constexpr Double_t Pi()
Definition: TMath.h:37
auto * a
Definition: textangle.C:12
static uint64_t sum(uint64_t i)
Definition: Factory.cxx:2345
double epsilon
Definition: triangle.c:618