Logo ROOT  
Reference Guide
 
Loading...
Searching...
No Matches
RooKeysPdf.cxx
Go to the documentation of this file.
1/*****************************************************************************
2 * Project: RooFit *
3 * Package: RooFitModels *
4 * @(#)root/roofit:$Id$
5 * Authors: *
6 * GR, Gerhard Raven, UC San Diego, raven@slac.stanford.edu *
7 * DK, David Kirkby, UC Irvine, dkirkby@uci.edu *
8 * WV, Wouter Verkerke, UC Santa Barbara, verkerke@slac.stanford.edu *
9 * *
10 * Copyright (c) 2000-2005, Regents of the University of California *
11 * and Stanford University. All rights reserved. *
12 * *
13 * Redistribution and use in source and binary forms, *
14 * with or without modification, are permitted according to the terms *
15 * listed in LICENSE (http://roofit.sourceforge.net/license.txt) *
16 *****************************************************************************/
17
18/** \class RooKeysPdf
19 \ingroup Roofit
20
21Class RooKeysPdf implements a one-dimensional kernel estimation p.d.f which model the distribution
22of an arbitrary input dataset as a superposition of Gaussian kernels, one for each data point,
23each contributing 1/N to the total integral of the pdf.
24If the 'adaptive mode' is enabled, the width of the Gaussian is adaptively calculated from the
25local density of events, i.e. narrow for regions with high event density to preserve details and
26wide for regions with low event density to promote smoothness. The details of the general algorithm
27are described in the following paper:
28
29Cranmer KS, Kernel Estimation in High-Energy Physics.
30 Computer Physics Communications 136:198-207,2001 - e-Print Archive: hep ex/0011057
31**/
32
33#include <limits>
34#include <algorithm>
35#include <cmath>
36#include <iostream>
37#include "TMath.h"
38#include "snprintf.h"
39#include "RooKeysPdf.h"
40#include "RooRealVar.h"
41#include "RooRandom.h"
42#include "RooDataSet.h"
43#include "RooTrace.h"
44
45#include "TError.h"
46
48
49const double RooKeysPdf::_nSigma = std::sqrt(-2. *
50 std::log(std::numeric_limits<double>::epsilon()));
51
52////////////////////////////////////////////////////////////////////////////////
53/// coverity[UNINIT_CTOR]
54
56{
58}
59
60////////////////////////////////////////////////////////////////////////////////
61/// cache stuff about x
62
63RooKeysPdf::RooKeysPdf(const char *name, const char *title, RooAbsReal &x, RooDataSet &data, Mirror mirror, double rho)
64 : RooKeysPdf(name, title, x, static_cast<RooRealVar &>(x), data, mirror, rho)
65{
66}
67
68////////////////////////////////////////////////////////////////////////////////
69/// cache stuff about x
70
71RooKeysPdf::RooKeysPdf(const char *name, const char *title, RooAbsReal &xpdf, RooRealVar &xdata, RooDataSet &data,
72 Mirror mirror, double rho)
73 : RooAbsPdf(name, title),
74 _x("x", "Observable", this, xpdf),
75 _mirrorLeft(mirror == MirrorLeft || mirror == MirrorBoth || mirror == MirrorLeftAsymRight),
76 _mirrorRight(mirror == MirrorRight || mirror == MirrorBoth || mirror == MirrorAsymLeftRight),
77 _asymLeft(mirror == MirrorAsymLeft || mirror == MirrorAsymLeftRight || mirror == MirrorAsymBoth),
78 _asymRight(mirror == MirrorAsymRight || mirror == MirrorLeftAsymRight || mirror == MirrorAsymBoth),
79 _lo(xdata.getMin()),
80 _hi(xdata.getMax()),
81 _binWidth((_hi - _lo) / (_nPoints - 1)),
82 _rho(rho)
83{
84 snprintf(_varName, 128,"%s", xdata.GetName());
85
86 // form the lookup table
89}
90
91////////////////////////////////////////////////////////////////////////////////
92
93RooKeysPdf::RooKeysPdf(const RooKeysPdf &other, const char *name)
94 : RooAbsPdf(other, name),
95 _x("x", this, other._x),
96 _nEvents(other._nEvents),
97 _mirrorLeft(other._mirrorLeft),
98 _mirrorRight(other._mirrorRight),
99 _asymLeft(other._asymLeft),
100 _asymRight(other._asymRight),
101 _lo(other._lo),
102 _hi(other._hi),
103 _binWidth(other._binWidth),
104 _rho(other._rho)
105{
106 // cache stuff about x
107 snprintf(_varName, 128, "%s", other._varName );
108
109 // copy over data and weights... not necessary, commented out for speed
110// _dataPts = new double[_nEvents];
111// _weights = new double[_nEvents];
112// for (Int_t i= 0; i<_nEvents; i++) {
113// _dataPts[i]= other._dataPts[i];
114// _weights[i]= other._weights[i];
115// }
116
117 // copy over the lookup table
118 for (Int_t i= 0; i<_nPoints+1; i++)
119 _lookupTable[i]= other._lookupTable[i];
120
122}
123
124////////////////////////////////////////////////////////////////////////////////
125
127 delete[] _dataPts;
128 delete[] _dataWgts;
129 delete[] _weights;
130
132}
133
134////////////////////////////////////////////////////////////////////////////////
135/// small helper structure
136
137namespace {
138 struct Data {
139 double x;
140 double w;
141 };
142 // helper to order two Data structures
143 struct cmp {
144 inline bool operator()(const struct Data& a, const struct Data& b) const
145 { return a.x < b.x; }
146 };
147}
149 delete[] _dataPts;
150 delete[] _dataWgts;
151 delete[] _weights;
152
153 std::vector<Data> tmp;
154 tmp.reserve((1 + _mirrorLeft + _mirrorRight) * data.numEntries());
155 double x0 = 0.;
156 double x1 = 0.;
157 double x2 = 0.;
158 _sumWgt = 0.;
159 // read the data set into tmp and accumulate some statistics
160 RooRealVar& real = static_cast<RooRealVar&>(data.get()->operator[](_varName));
161 for (Int_t i = 0; i < data.numEntries(); ++i) {
162 data.get(i);
163 const double x = real.getVal();
164 const double w = data.weight();
165 x0 += w;
166 x1 += w * x;
167 x2 += w * x * x;
169
170 Data p;
171 p.x = x, p.w = w;
172 tmp.push_back(p);
173 if (_mirrorLeft) {
174 p.x = 2. * _lo - x;
175 tmp.push_back(p);
176 }
177 if (_mirrorRight) {
178 p.x = 2. * _hi - x;
179 tmp.push_back(p);
180 }
181 }
182 // sort the entire data set so that values of x are increasing
183 std::sort(tmp.begin(), tmp.end(), cmp());
184
185 // copy the sorted data set to its final destination
186 _nEvents = tmp.size();
187 _dataPts = new double[_nEvents];
188 _dataWgts = new double[_nEvents];
189 for (unsigned i = 0; i < tmp.size(); ++i) {
190 _dataPts[i] = tmp[i].x;
191 _dataWgts[i] = tmp[i].w;
192 }
193 {
194 // free tmp
195 std::vector<Data> tmp2;
196 tmp2.swap(tmp);
197 }
198
199 double meanv=x1/x0;
200 double sigmav=std::sqrt(x2/x0-meanv*meanv);
201 double h=std::pow(double(4)/double(3),0.2)*std::pow(_sumWgt,-0.2)*_rho;
202 double hmin=h*sigmav*std::sqrt(2.)/10;
203 double norm=h*std::sqrt(sigmav * _sumWgt)/(2.0*std::sqrt(3.0));
204
205 _weights=new double[_nEvents];
206 for(Int_t j=0;j<_nEvents;++j) {
207 _weights[j] = norm / std::sqrt(_dataWgts[j] * g(_dataPts[j],h*sigmav));
208 if (_weights[j]<hmin) _weights[j]=hmin;
209 }
210
211 // The idea below is that beyond nSigma sigma, the value of the exponential
212 // in the Gaussian is well below the machine precision of a double, so it
213 // does not contribute any more. That way, we can limit how many bins of the
214 // binned approximation in _lookupTable we have to touch when filling it.
215 for (Int_t i=0;i<_nPoints+1;++i) _lookupTable[i] = 0.;
216 for(Int_t j=0;j<_nEvents;++j) {
217 const double xlo = std::min(_hi,
218 std::max(_lo, _dataPts[j] - _nSigma * _weights[j]));
219 const double xhi = std::max(_lo,
220 std::min(_hi, _dataPts[j] + _nSigma * _weights[j]));
221 if (xlo >= xhi) continue;
222 const double chi2incr = _binWidth / _weights[j] / std::sqrt(2.);
223 const double weightratio = _dataWgts[j] / _weights[j];
224 const Int_t binlo = static_cast<Int_t>(std::floor((xlo - _lo) / _binWidth));
225 const Int_t binhi = static_cast<Int_t>(_nPoints - std::floor((_hi - xhi) / _binWidth));
226 const double x = (double(_nPoints - binlo) * _lo +
227 double(binlo) * _hi) / double(_nPoints);
228 double chi = (x - _dataPts[j]) / _weights[j] / std::sqrt(2.);
229 for (Int_t k = binlo; k <= binhi; ++k, chi += chi2incr) {
230 _lookupTable[k] += weightratio * std::exp(- chi * chi);
231 }
232 }
233 if (_asymLeft) {
234 for(Int_t j=0;j<_nEvents;++j) {
235 const double xlo = std::min(_hi,
236 std::max(_lo, 2. * _lo - _dataPts[j] + _nSigma * _weights[j]));
237 const double xhi = std::max(_lo,
238 std::min(_hi, 2. * _lo - _dataPts[j] - _nSigma * _weights[j]));
239 if (xlo >= xhi) continue;
240 const double chi2incr = _binWidth / _weights[j] / std::sqrt(2.);
241 const double weightratio = _dataWgts[j] / _weights[j];
242 const Int_t binlo = static_cast<Int_t>(std::floor((xlo - _lo) / _binWidth));
243 const Int_t binhi = static_cast<Int_t>(_nPoints - std::floor((_hi - xhi) / _binWidth));
244 const double x = (double(_nPoints - binlo) * _lo +
245 double(binlo) * _hi) / double(_nPoints);
246 double chi = (x - (2. * _lo - _dataPts[j])) / _weights[j] / std::sqrt(2.);
247 for (Int_t k = binlo; k <= binhi; ++k, chi += chi2incr) {
248 _lookupTable[k] -= weightratio * std::exp(- chi * chi);
249 }
250 }
251 }
252 if (_asymRight) {
253 for(Int_t j=0;j<_nEvents;++j) {
254 const double xlo = std::min(_hi,
255 std::max(_lo, 2. * _hi - _dataPts[j] + _nSigma * _weights[j]));
256 const double xhi = std::max(_lo,
257 std::min(_hi, 2. * _hi - _dataPts[j] - _nSigma * _weights[j]));
258 if (xlo >= xhi) continue;
259 const double chi2incr = _binWidth / _weights[j] / std::sqrt(2.);
260 const double weightratio = _dataWgts[j] / _weights[j];
261 const Int_t binlo = static_cast<Int_t>(std::floor((xlo - _lo) / _binWidth));
262 const Int_t binhi = static_cast<Int_t>(_nPoints - std::floor((_hi - xhi) / _binWidth));
263 const double x = (double(_nPoints - binlo) * _lo +
264 double(binlo) * _hi) / double(_nPoints);
265 double chi = (x - (2. * _hi - _dataPts[j])) / _weights[j] / std::sqrt(2.);
266 for (Int_t k = binlo; k <= binhi; ++k, chi += chi2incr) {
267 _lookupTable[k] -= weightratio * std::exp(- chi * chi);
268 }
269 }
270 }
271 static const double sqrt2pi(std::sqrt(2*TMath::Pi()));
272 for (Int_t i=0;i<_nPoints+1;++i)
273 _lookupTable[i] /= sqrt2pi * _sumWgt;
274}
275
276////////////////////////////////////////////////////////////////////////////////
277
278double RooKeysPdf::evaluate() const {
279 Int_t i = (Int_t)floor((double(_x)-_lo)/_binWidth);
280 if (i<0) {
281// cerr << "got point below lower bound:"
282// << double(_x) << " < " << _lo
283// << " -- performing linear extrapolation..." << endl;
284 i=0;
285 }
286 if (i>_nPoints-1) {
287// cerr << "got point above upper bound:"
288// << double(_x) << " > " << _hi
289// << " -- performing linear extrapolation..." << endl;
290 i=_nPoints-1;
291 }
292 double dx = (double(_x)-(_lo+i*_binWidth))/_binWidth;
293
294 // for now do simple linear interpolation.
295 // one day replace by splines...
296 double ret = (_lookupTable[i]+dx*(_lookupTable[i+1]-_lookupTable[i]));
297 if (ret<0) ret=0 ;
298 return ret ;
299}
300
302 RooArgSet& allVars, RooArgSet& analVars, const char* /* rangeName */) const
303{
304 if (matchArgs(allVars, analVars, _x)) return 1;
305 return 0;
306}
307
308double RooKeysPdf::analyticalIntegral(Int_t code, const char* rangeName) const
309{
310 R__ASSERT(1 == code);
311 // this code is based on _lookupTable and uses linear interpolation, just as
312 // evaluate(); integration is done using the trapez rule
313 const double xmin = std::max(_lo, _x.min(rangeName));
314 const double xmax = std::min(_hi, _x.max(rangeName));
315 const Int_t imin = (Int_t)floor((xmin - _lo) / _binWidth);
316 const Int_t imax = std::min((Int_t)floor((xmax - _lo) / _binWidth),
317 _nPoints - 1);
318 double sum = 0.;
319 // sum up complete bins in middle
320 if (imin + 1 < imax)
321 sum += _lookupTable[imin + 1] + _lookupTable[imax];
322 for (Int_t i = imin + 2; i < imax; ++i)
323 sum += 2. * _lookupTable[i];
324 sum *= _binWidth * 0.5;
325 // treat incomplete bins
326 const double dxmin = (xmin - (_lo + imin * _binWidth)) / _binWidth;
327 const double dxmax = (xmax - (_lo + imax * _binWidth)) / _binWidth;
328 if (imin < imax) {
329 // first bin
330 sum += _binWidth * (1. - dxmin) * 0.5 * (_lookupTable[imin + 1] +
331 _lookupTable[imin] + dxmin *
332 (_lookupTable[imin + 1] - _lookupTable[imin]));
333 // last bin
334 sum += _binWidth * dxmax * 0.5 * (_lookupTable[imax] +
335 _lookupTable[imax] + dxmax *
336 (_lookupTable[imax + 1] - _lookupTable[imax]));
337 } else if (imin == imax) {
338 // first bin == last bin
339 sum += _binWidth * (dxmax - dxmin) * 0.5 * (
340 _lookupTable[imin] + dxmin *
341 (_lookupTable[imin + 1] - _lookupTable[imin]) +
342 _lookupTable[imax] + dxmax *
343 (_lookupTable[imax + 1] - _lookupTable[imax]));
344 }
345 return sum;
346}
347
349{
350 if (vars.contains(*_x.absArg())) return 1;
351 return 0;
352}
353
354double RooKeysPdf::maxVal(Int_t code) const
355{
356 R__ASSERT(1 == code);
357 double max = -std::numeric_limits<double>::max();
358 for (Int_t i = 0; i <= _nPoints; ++i)
359 if (max < _lookupTable[i]) max = _lookupTable[i];
360 return max;
361}
362
363////////////////////////////////////////////////////////////////////////////////
364
365double RooKeysPdf::g(double x,double sigmav) const {
366 double y=0;
367 // since data is sorted, we can be a little faster because we know which data
368 // points contribute
369 double* it = std::lower_bound(_dataPts, _dataPts + _nEvents,
370 x - _nSigma * sigmav);
371 if (it >= (_dataPts + _nEvents)) return 0.;
372 double* iend = std::upper_bound(it, _dataPts + _nEvents,
373 x + _nSigma * sigmav);
374 for ( ; it < iend; ++it) {
375 const double r = (x - *it) / sigmav;
376 y += std::exp(-0.5 * r * r);
377 }
378
379 static const double sqrt2pi(std::sqrt(2*TMath::Pi()));
380 return y/(sigmav*sqrt2pi);
381}
#define b(i)
Definition RSha256.hxx:100
#define g(i)
Definition RSha256.hxx:105
#define a(i)
Definition RSha256.hxx:99
#define h(i)
Definition RSha256.hxx:106
#define TRACE_DESTROY
Definition RooTrace.h:24
#define TRACE_CREATE
Definition RooTrace.h:23
int Int_t
Definition RtypesCore.h:45
#define ClassImp(name)
Definition Rtypes.h:382
#define R__ASSERT(e)
Checks condition e and reports a fatal error if it's false.
Definition TError.h:125
winID h TVirtualViewer3D TVirtualGLPainter p
Option_t Option_t TPoint TPoint const char GetTextMagnitude GetFillStyle GetLineColor GetLineWidth GetMarkerStyle GetTextAlign GetTextColor GetTextSize void data
Option_t Option_t TPoint TPoint const char GetTextMagnitude GetFillStyle GetLineColor GetLineWidth GetMarkerStyle GetTextAlign GetTextColor GetTextSize void char Point_t Rectangle_t hmin
Option_t Option_t TPoint TPoint const char GetTextMagnitude GetFillStyle GetLineColor GetLineWidth GetMarkerStyle GetTextAlign GetTextColor GetTextSize void char Point_t Rectangle_t WindowAttributes_t Float_t r
Option_t Option_t TPoint TPoint const char x2
Option_t Option_t TPoint TPoint const char x1
char name[80]
Definition TGX11.cxx:110
float xmin
float xmax
TRObject operator()(const T1 &t1) const
#define snprintf
Definition civetweb.c:1540
bool contains(const RooAbsArg &var) const
Check if collection contains an argument with the same name as var.
Abstract interface for all probability density functions.
Definition RooAbsPdf.h:40
Abstract base class for objects that represent a real value and implements functionality common to al...
Definition RooAbsReal.h:59
double getVal(const RooArgSet *normalisationSet=nullptr) const
Evaluate object.
Definition RooAbsReal.h:103
bool matchArgs(const RooArgSet &allDeps, RooArgSet &numDeps, const RooArgProxy &a) const
Utility function for use in getAnalyticalIntegral().
RooAbsArg * absArg() const
Return pointer to contained argument.
Definition RooArgProxy.h:46
RooArgSet is a container object that can hold multiple RooAbsArg objects.
Definition RooArgSet.h:24
Container class to hold unbinned data.
Definition RooDataSet.h:34
Class RooKeysPdf implements a one-dimensional kernel estimation p.d.f which model the distribution of...
Definition RooKeysPdf.h:24
double _binWidth
Definition RooKeysPdf.h:77
double _sumWgt
Definition RooKeysPdf.h:63
static constexpr int _nPoints
Definition RooKeysPdf.h:65
double * _dataWgts
Definition RooKeysPdf.h:61
RooKeysPdf()
coverity[UNINIT_CTOR]
double _lookupTable[_nPoints+1]
Definition RooKeysPdf.h:66
double maxVal(Int_t code) const override
Return maximum value for set of observables identified by code assigned in getMaxVal.
bool _mirrorRight
Definition RooKeysPdf.h:71
double _rho
Definition RooKeysPdf.h:78
double _hi
Definition RooKeysPdf.h:77
Char_t _varName[128]
Definition RooKeysPdf.h:76
double g(double x, double sigma) const
Int_t _nEvents
Definition RooKeysPdf.h:59
void LoadDataSet(RooDataSet &data)
Int_t getAnalyticalIntegral(RooArgSet &allVars, RooArgSet &analVars, const char *rangeName=nullptr) const override
Interface function getAnalyticalIntergral advertises the analytical integrals that are supported.
RooRealProxy _x
Definition RooKeysPdf.h:51
double * _weights
Definition RooKeysPdf.h:62
double analyticalIntegral(Int_t code, const char *rangeName=nullptr) const override
Implements the actual analytical integral(s) advertised by getAnalyticalIntegral.
bool _mirrorLeft
Definition RooKeysPdf.h:70
double evaluate() const override
Evaluate this PDF / function / constant. Needs to be overridden by all derived classes.
~RooKeysPdf() override
double _lo
Definition RooKeysPdf.h:77
double * _dataPts
Definition RooKeysPdf.h:60
bool _asymRight
Definition RooKeysPdf.h:73
static const double _nSigma
Definition RooKeysPdf.h:57
bool _asymLeft
Definition RooKeysPdf.h:72
Int_t getMaxVal(const RooArgSet &vars) const override
Advertise capability to determine maximum value of function for given set of observables.
Variable that can be changed from the outside.
Definition RooRealVar.h:37
double max(const char *rname=nullptr) const
Query upper limit of range. This requires the payload to be RooAbsRealLValue or derived.
double min(const char *rname=nullptr) const
Query lower limit of range. This requires the payload to be RooAbsRealLValue or derived.
const char * GetName() const override
Returns name of object.
Definition TNamed.h:47
Double_t y[n]
Definition legend1.C:17
Double_t x[n]
Definition legend1.C:17
constexpr Double_t Pi()
Definition TMath.h:37
static uint64_t sum(uint64_t i)
Definition Factory.cxx:2345