Logo ROOT  
Reference Guide
 
Loading...
Searching...
No Matches
RooKeysPdf.cxx
Go to the documentation of this file.
1/*****************************************************************************
2 * Project: RooFit *
3 * Package: RooFitModels *
4 * @(#)root/roofit:$Id$
5 * Authors: *
6 * GR, Gerhard Raven, UC San Diego, raven@slac.stanford.edu *
7 * DK, David Kirkby, UC Irvine, dkirkby@uci.edu *
8 * WV, Wouter Verkerke, UC Santa Barbara, verkerke@slac.stanford.edu *
9 * *
10 * Copyright (c) 2000-2005, Regents of the University of California *
11 * and Stanford University. All rights reserved. *
12 * *
13 * Redistribution and use in source and binary forms, *
14 * with or without modification, are permitted according to the terms *
15 * listed in LICENSE (http://roofit.sourceforge.net/license.txt) *
16 *****************************************************************************/
17
18/** \class RooKeysPdf
19 \ingroup Roofit
20
21Class RooKeysPdf implements a one-dimensional kernel estimation p.d.f which model the distribution
22of an arbitrary input dataset as a superposition of Gaussian kernels, one for each data point,
23each contributing 1/N to the total integral of the pdf.
24If the 'adaptive mode' is enabled, the width of the Gaussian is adaptively calculated from the
25local density of events, i.e. narrow for regions with high event density to preserve details and
26wide for regions with low event density to promote smoothness. The details of the general algorithm
27are described in the following paper:
28
29Cranmer KS, Kernel Estimation in High-Energy Physics.
30 Computer Physics Communications 136:198-207,2001 - e-Print Archive: hep ex/0011057
31**/
32
33#include <limits>
34#include <algorithm>
35#include <cmath>
36#include <iostream>
37#include "TMath.h"
38#include "snprintf.h"
39#include "RooKeysPdf.h"
40#include "RooRealVar.h"
41#include "RooRandom.h"
42#include "RooDataSet.h"
43#include "RooTrace.h"
44
45#include "TError.h"
46
47using namespace std;
48
50
51const double RooKeysPdf::_nSigma = std::sqrt(-2. *
52 std::log(std::numeric_limits<double>::epsilon()));
53
54////////////////////////////////////////////////////////////////////////////////
55/// coverity[UNINIT_CTOR]
56
58{
60}
61
62////////////////////////////////////////////////////////////////////////////////
63/// cache stuff about x
64
65RooKeysPdf::RooKeysPdf(const char *name, const char *title,
67 Mirror mirror, double rho) :
68 RooAbsPdf(name,title),
69 _x("x","observable",this,x),
70 _mirrorLeft(mirror==MirrorLeft || mirror==MirrorBoth || mirror==MirrorLeftAsymRight),
71 _mirrorRight(mirror==MirrorRight || mirror==MirrorBoth || mirror==MirrorAsymLeftRight),
72 _asymLeft(mirror==MirrorAsymLeft || mirror==MirrorAsymLeftRight || mirror==MirrorAsymBoth),
73 _asymRight(mirror==MirrorAsymRight || mirror==MirrorLeftAsymRight || mirror==MirrorAsymBoth),
74 _rho(rho)
75{
76 snprintf(_varName, 128,"%s", x.GetName());
77 RooAbsRealLValue& real= (RooRealVar&)(_x.arg());
78 _lo = real.getMin();
79 _hi = real.getMax();
80 _binWidth = (_hi-_lo)/(_nPoints-1);
81
82 // form the lookup table
85}
86
87////////////////////////////////////////////////////////////////////////////////
88/// cache stuff about x
89
90RooKeysPdf::RooKeysPdf(const char *name, const char *title,
91 RooAbsReal& xpdf, RooRealVar& xdata, RooDataSet& data,
92 Mirror mirror, double rho) :
93 RooAbsPdf(name,title),
94 _x("x","Observable",this,xpdf),
95 _mirrorLeft(mirror==MirrorLeft || mirror==MirrorBoth || mirror==MirrorLeftAsymRight),
96 _mirrorRight(mirror==MirrorRight || mirror==MirrorBoth || mirror==MirrorAsymLeftRight),
97 _asymLeft(mirror==MirrorAsymLeft || mirror==MirrorAsymLeftRight || mirror==MirrorAsymBoth),
98 _asymRight(mirror==MirrorAsymRight || mirror==MirrorLeftAsymRight || mirror==MirrorAsymBoth),
99 _rho(rho)
100{
101 snprintf(_varName, 128,"%s", xdata.GetName());
102 RooAbsRealLValue& real= (RooRealVar&)(xdata);
103 _lo = real.getMin();
104 _hi = real.getMax();
105 _binWidth = (_hi-_lo)/(_nPoints-1);
106
107 // form the lookup table
110}
111
112////////////////////////////////////////////////////////////////////////////////
113
114RooKeysPdf::RooKeysPdf(const RooKeysPdf& other, const char* name):
115 RooAbsPdf(other,name), _x("x",this,other._x), _nEvents(other._nEvents),
116 _mirrorLeft( other._mirrorLeft ), _mirrorRight( other._mirrorRight ),
117 _asymLeft(other._asymLeft), _asymRight(other._asymRight),
118 _rho( other._rho ) {
119 // cache stuff about x
120 snprintf(_varName, 128, "%s", other._varName );
121 _lo = other._lo;
122 _hi = other._hi;
123 _binWidth = other._binWidth;
124
125 // copy over data and weights... not necessary, commented out for speed
126// _dataPts = new double[_nEvents];
127// _weights = new double[_nEvents];
128// for (Int_t i= 0; i<_nEvents; i++) {
129// _dataPts[i]= other._dataPts[i];
130// _weights[i]= other._weights[i];
131// }
132
133 // copy over the lookup table
134 for (Int_t i= 0; i<_nPoints+1; i++)
135 _lookupTable[i]= other._lookupTable[i];
136
138}
139
140////////////////////////////////////////////////////////////////////////////////
141
143 delete[] _dataPts;
144 delete[] _dataWgts;
145 delete[] _weights;
146
148}
149
150////////////////////////////////////////////////////////////////////////////////
151/// small helper structure
152
153namespace {
154 struct Data {
155 double x;
156 double w;
157 };
158 // helper to order two Data structures
159 struct cmp {
160 inline bool operator()(const struct Data& a, const struct Data& b) const
161 { return a.x < b.x; }
162 };
163}
165 delete[] _dataPts;
166 delete[] _dataWgts;
167 delete[] _weights;
168
169 std::vector<Data> tmp;
170 tmp.reserve((1 + _mirrorLeft + _mirrorRight) * data.numEntries());
171 double x0 = 0., x1 = 0., x2 = 0.;
172 _sumWgt = 0.;
173 // read the data set into tmp and accumulate some statistics
174 RooRealVar& real = (RooRealVar&)(data.get()->operator[](_varName));
175 for (Int_t i = 0; i < data.numEntries(); ++i) {
176 data.get(i);
177 const double x = real.getVal();
178 const double w = data.weight();
179 x0 += w;
180 x1 += w * x;
181 x2 += w * x * x;
183
184 Data p;
185 p.x = x, p.w = w;
186 tmp.push_back(p);
187 if (_mirrorLeft) {
188 p.x = 2. * _lo - x;
189 tmp.push_back(p);
190 }
191 if (_mirrorRight) {
192 p.x = 2. * _hi - x;
193 tmp.push_back(p);
194 }
195 }
196 // sort the entire data set so that values of x are increasing
197 std::sort(tmp.begin(), tmp.end(), cmp());
198
199 // copy the sorted data set to its final destination
200 _nEvents = tmp.size();
201 _dataPts = new double[_nEvents];
202 _dataWgts = new double[_nEvents];
203 for (unsigned i = 0; i < tmp.size(); ++i) {
204 _dataPts[i] = tmp[i].x;
205 _dataWgts[i] = tmp[i].w;
206 }
207 {
208 // free tmp
209 std::vector<Data> tmp2;
210 tmp2.swap(tmp);
211 }
212
213 double meanv=x1/x0;
214 double sigmav=std::sqrt(x2/x0-meanv*meanv);
215 double h=std::pow(double(4)/double(3),0.2)*std::pow(_sumWgt,-0.2)*_rho;
216 double hmin=h*sigmav*std::sqrt(2.)/10;
217 double norm=h*std::sqrt(sigmav * _sumWgt)/(2.0*std::sqrt(3.0));
218
219 _weights=new double[_nEvents];
220 for(Int_t j=0;j<_nEvents;++j) {
221 _weights[j] = norm / std::sqrt(_dataWgts[j] * g(_dataPts[j],h*sigmav));
222 if (_weights[j]<hmin) _weights[j]=hmin;
223 }
224
225 // The idea below is that beyond nSigma sigma, the value of the exponential
226 // in the Gaussian is well below the machine precision of a double, so it
227 // does not contribute any more. That way, we can limit how many bins of the
228 // binned approximation in _lookupTable we have to touch when filling it.
229 for (Int_t i=0;i<_nPoints+1;++i) _lookupTable[i] = 0.;
230 for(Int_t j=0;j<_nEvents;++j) {
231 const double xlo = std::min(_hi,
232 std::max(_lo, _dataPts[j] - _nSigma * _weights[j]));
233 const double xhi = std::max(_lo,
234 std::min(_hi, _dataPts[j] + _nSigma * _weights[j]));
235 if (xlo >= xhi) continue;
236 const double chi2incr = _binWidth / _weights[j] / std::sqrt(2.);
237 const double weightratio = _dataWgts[j] / _weights[j];
238 const Int_t binlo = static_cast<Int_t>(std::floor((xlo - _lo) / _binWidth));
239 const Int_t binhi = static_cast<Int_t>(_nPoints - std::floor((_hi - xhi) / _binWidth));
240 const double x = (double(_nPoints - binlo) * _lo +
241 double(binlo) * _hi) / double(_nPoints);
242 double chi = (x - _dataPts[j]) / _weights[j] / std::sqrt(2.);
243 for (Int_t k = binlo; k <= binhi; ++k, chi += chi2incr) {
244 _lookupTable[k] += weightratio * std::exp(- chi * chi);
245 }
246 }
247 if (_asymLeft) {
248 for(Int_t j=0;j<_nEvents;++j) {
249 const double xlo = std::min(_hi,
250 std::max(_lo, 2. * _lo - _dataPts[j] + _nSigma * _weights[j]));
251 const double xhi = std::max(_lo,
252 std::min(_hi, 2. * _lo - _dataPts[j] - _nSigma * _weights[j]));
253 if (xlo >= xhi) continue;
254 const double chi2incr = _binWidth / _weights[j] / std::sqrt(2.);
255 const double weightratio = _dataWgts[j] / _weights[j];
256 const Int_t binlo = static_cast<Int_t>(std::floor((xlo - _lo) / _binWidth));
257 const Int_t binhi = static_cast<Int_t>(_nPoints - std::floor((_hi - xhi) / _binWidth));
258 const double x = (double(_nPoints - binlo) * _lo +
259 double(binlo) * _hi) / double(_nPoints);
260 double chi = (x - (2. * _lo - _dataPts[j])) / _weights[j] / std::sqrt(2.);
261 for (Int_t k = binlo; k <= binhi; ++k, chi += chi2incr) {
262 _lookupTable[k] -= weightratio * std::exp(- chi * chi);
263 }
264 }
265 }
266 if (_asymRight) {
267 for(Int_t j=0;j<_nEvents;++j) {
268 const double xlo = std::min(_hi,
269 std::max(_lo, 2. * _hi - _dataPts[j] + _nSigma * _weights[j]));
270 const double xhi = std::max(_lo,
271 std::min(_hi, 2. * _hi - _dataPts[j] - _nSigma * _weights[j]));
272 if (xlo >= xhi) continue;
273 const double chi2incr = _binWidth / _weights[j] / std::sqrt(2.);
274 const double weightratio = _dataWgts[j] / _weights[j];
275 const Int_t binlo = static_cast<Int_t>(std::floor((xlo - _lo) / _binWidth));
276 const Int_t binhi = static_cast<Int_t>(_nPoints - std::floor((_hi - xhi) / _binWidth));
277 const double x = (double(_nPoints - binlo) * _lo +
278 double(binlo) * _hi) / double(_nPoints);
279 double chi = (x - (2. * _hi - _dataPts[j])) / _weights[j] / std::sqrt(2.);
280 for (Int_t k = binlo; k <= binhi; ++k, chi += chi2incr) {
281 _lookupTable[k] -= weightratio * std::exp(- chi * chi);
282 }
283 }
284 }
285 static const double sqrt2pi(std::sqrt(2*TMath::Pi()));
286 for (Int_t i=0;i<_nPoints+1;++i)
287 _lookupTable[i] /= sqrt2pi * _sumWgt;
288}
289
290////////////////////////////////////////////////////////////////////////////////
291
292double RooKeysPdf::evaluate() const {
293 Int_t i = (Int_t)floor((double(_x)-_lo)/_binWidth);
294 if (i<0) {
295// cerr << "got point below lower bound:"
296// << double(_x) << " < " << _lo
297// << " -- performing linear extrapolation..." << endl;
298 i=0;
299 }
300 if (i>_nPoints-1) {
301// cerr << "got point above upper bound:"
302// << double(_x) << " > " << _hi
303// << " -- performing linear extrapolation..." << endl;
304 i=_nPoints-1;
305 }
306 double dx = (double(_x)-(_lo+i*_binWidth))/_binWidth;
307
308 // for now do simple linear interpolation.
309 // one day replace by splines...
310 double ret = (_lookupTable[i]+dx*(_lookupTable[i+1]-_lookupTable[i]));
311 if (ret<0) ret=0 ;
312 return ret ;
313}
314
316 RooArgSet& allVars, RooArgSet& analVars, const char* /* rangeName */) const
317{
318 if (matchArgs(allVars, analVars, _x)) return 1;
319 return 0;
320}
321
322double RooKeysPdf::analyticalIntegral(Int_t code, const char* rangeName) const
323{
324 R__ASSERT(1 == code);
325 // this code is based on _lookupTable and uses linear interpolation, just as
326 // evaluate(); integration is done using the trapez rule
327 const double xmin = std::max(_lo, _x.min(rangeName));
328 const double xmax = std::min(_hi, _x.max(rangeName));
329 const Int_t imin = (Int_t)floor((xmin - _lo) / _binWidth);
330 const Int_t imax = std::min((Int_t)floor((xmax - _lo) / _binWidth),
331 _nPoints - 1);
332 double sum = 0.;
333 // sum up complete bins in middle
334 if (imin + 1 < imax)
335 sum += _lookupTable[imin + 1] + _lookupTable[imax];
336 for (Int_t i = imin + 2; i < imax; ++i)
337 sum += 2. * _lookupTable[i];
338 sum *= _binWidth * 0.5;
339 // treat incomplete bins
340 const double dxmin = (xmin - (_lo + imin * _binWidth)) / _binWidth;
341 const double dxmax = (xmax - (_lo + imax * _binWidth)) / _binWidth;
342 if (imin < imax) {
343 // first bin
344 sum += _binWidth * (1. - dxmin) * 0.5 * (_lookupTable[imin + 1] +
345 _lookupTable[imin] + dxmin *
346 (_lookupTable[imin + 1] - _lookupTable[imin]));
347 // last bin
348 sum += _binWidth * dxmax * 0.5 * (_lookupTable[imax] +
349 _lookupTable[imax] + dxmax *
350 (_lookupTable[imax + 1] - _lookupTable[imax]));
351 } else if (imin == imax) {
352 // first bin == last bin
353 sum += _binWidth * (dxmax - dxmin) * 0.5 * (
354 _lookupTable[imin] + dxmin *
355 (_lookupTable[imin + 1] - _lookupTable[imin]) +
356 _lookupTable[imax] + dxmax *
357 (_lookupTable[imax + 1] - _lookupTable[imax]));
358 }
359 return sum;
360}
361
363{
364 if (vars.contains(*_x.absArg())) return 1;
365 return 0;
366}
367
368double RooKeysPdf::maxVal(Int_t code) const
369{
370 R__ASSERT(1 == code);
371 double max = -std::numeric_limits<double>::max();
372 for (Int_t i = 0; i <= _nPoints; ++i)
373 if (max < _lookupTable[i]) max = _lookupTable[i];
374 return max;
375}
376
377////////////////////////////////////////////////////////////////////////////////
378
379double RooKeysPdf::g(double x,double sigmav) const {
380 double y=0;
381 // since data is sorted, we can be a little faster because we know which data
382 // points contribute
383 double* it = std::lower_bound(_dataPts, _dataPts + _nEvents,
384 x - _nSigma * sigmav);
385 if (it >= (_dataPts + _nEvents)) return 0.;
386 double* iend = std::upper_bound(it, _dataPts + _nEvents,
387 x + _nSigma * sigmav);
388 for ( ; it < iend; ++it) {
389 const double r = (x - *it) / sigmav;
390 y += std::exp(-0.5 * r * r);
391 }
392
393 static const double sqrt2pi(std::sqrt(2*TMath::Pi()));
394 return y/(sigmav*sqrt2pi);
395}
#define b(i)
Definition RSha256.hxx:100
#define g(i)
Definition RSha256.hxx:105
#define a(i)
Definition RSha256.hxx:99
#define h(i)
Definition RSha256.hxx:106
#define TRACE_DESTROY
Definition RooTrace.h:24
#define TRACE_CREATE
Definition RooTrace.h:23
int Int_t
Definition RtypesCore.h:45
#define ClassImp(name)
Definition Rtypes.h:377
#define R__ASSERT(e)
Definition TError.h:118
winID h TVirtualViewer3D TVirtualGLPainter p
Option_t Option_t TPoint TPoint const char GetTextMagnitude GetFillStyle GetLineColor GetLineWidth GetMarkerStyle GetTextAlign GetTextColor GetTextSize void data
Option_t Option_t TPoint TPoint const char GetTextMagnitude GetFillStyle GetLineColor GetLineWidth GetMarkerStyle GetTextAlign GetTextColor GetTextSize void char Point_t Rectangle_t hmin
Option_t Option_t TPoint TPoint const char GetTextMagnitude GetFillStyle GetLineColor GetLineWidth GetMarkerStyle GetTextAlign GetTextColor GetTextSize void char Point_t Rectangle_t WindowAttributes_t Float_t r
Option_t Option_t TPoint TPoint const char x2
Option_t Option_t TPoint TPoint const char x1
char name[80]
Definition TGX11.cxx:110
float xmin
float xmax
TRObject operator()(const T1 &t1) const
#define snprintf
Definition civetweb.c:1540
bool contains(const RooAbsArg &var) const
Check if collection contains an argument with the same name as var.
Abstract interface for all probability density functions.
Definition RooAbsPdf.h:40
RooAbsRealLValue is the common abstract base class for objects that represent a real value that may a...
virtual double getMax(const char *name=nullptr) const
Get maximum of currently defined range.
virtual double getMin(const char *name=nullptr) const
Get minimum of currently defined range.
Abstract base class for objects that represent a real value and implements functionality common to al...
Definition RooAbsReal.h:59
double getVal(const RooArgSet *normalisationSet=nullptr) const
Evaluate object.
Definition RooAbsReal.h:103
bool matchArgs(const RooArgSet &allDeps, RooArgSet &numDeps, const RooArgProxy &a) const
Utility function for use in getAnalyticalIntegral().
RooAbsArg * absArg() const
Return pointer to contained argument.
Definition RooArgProxy.h:46
RooArgSet is a container object that can hold multiple RooAbsArg objects.
Definition RooArgSet.h:55
RooDataSet is a container class to hold unbinned data.
Definition RooDataSet.h:57
Class RooKeysPdf implements a one-dimensional kernel estimation p.d.f which model the distribution of...
Definition RooKeysPdf.h:24
double _binWidth
Definition RooKeysPdf.h:77
double _sumWgt
Definition RooKeysPdf.h:63
static constexpr int _nPoints
Definition RooKeysPdf.h:65
double * _dataWgts
Definition RooKeysPdf.h:61
RooKeysPdf()
coverity[UNINIT_CTOR]
double _lookupTable[_nPoints+1]
Definition RooKeysPdf.h:66
double maxVal(Int_t code) const override
Return maximum value for set of observables identified by code assigned in getMaxVal.
bool _mirrorRight
Definition RooKeysPdf.h:71
double _rho
Definition RooKeysPdf.h:78
double _hi
Definition RooKeysPdf.h:77
Char_t _varName[128]
Definition RooKeysPdf.h:76
double g(double x, double sigma) const
Int_t _nEvents
Definition RooKeysPdf.h:59
void LoadDataSet(RooDataSet &data)
Int_t getAnalyticalIntegral(RooArgSet &allVars, RooArgSet &analVars, const char *rangeName=nullptr) const override
Interface function getAnalyticalIntergral advertises the analytical integrals that are supported.
RooRealProxy _x
Definition RooKeysPdf.h:51
double * _weights
Definition RooKeysPdf.h:62
double analyticalIntegral(Int_t code, const char *rangeName=nullptr) const override
Implements the actual analytical integral(s) advertised by getAnalyticalIntegral.
bool _mirrorLeft
Definition RooKeysPdf.h:70
double evaluate() const override
Evaluate this PDF / function / constant. Needs to be overridden by all derived classes.
~RooKeysPdf() override
double _lo
Definition RooKeysPdf.h:77
double * _dataPts
Definition RooKeysPdf.h:60
bool _asymRight
Definition RooKeysPdf.h:73
static const double _nSigma
Definition RooKeysPdf.h:57
bool _asymLeft
Definition RooKeysPdf.h:72
Int_t getMaxVal(const RooArgSet &vars) const override
Advertise capability to determine maximum value of function for given set of observables.
RooRealVar represents a variable that can be changed from the outside.
Definition RooRealVar.h:37
double max(const char *rname=nullptr) const
Query upper limit of range. This requires the payload to be RooAbsRealLValue or derived.
const T & arg() const
Return reference to object held in proxy.
double min(const char *rname=nullptr) const
Query lower limit of range. This requires the payload to be RooAbsRealLValue or derived.
const char * GetName() const override
Returns name of object.
Definition TNamed.h:47
Double_t y[n]
Definition legend1.C:17
Double_t x[n]
Definition legend1.C:17
constexpr Double_t Pi()
Definition TMath.h:37
static uint64_t sum(uint64_t i)
Definition Factory.cxx:2345