Logo ROOT  
Reference Guide
 
All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Properties Friends Macros Modules Pages
Loading...
Searching...
No Matches
RooKeysPdf.cxx
Go to the documentation of this file.
1/*****************************************************************************
2 * Project: RooFit *
3 * Package: RooFitModels *
4 * @(#)root/roofit:$Id$
5 * Authors: *
6 * GR, Gerhard Raven, UC San Diego, raven@slac.stanford.edu *
7 * DK, David Kirkby, UC Irvine, dkirkby@uci.edu *
8 * WV, Wouter Verkerke, UC Santa Barbara, verkerke@slac.stanford.edu *
9 * *
10 * Copyright (c) 2000-2005, Regents of the University of California *
11 * and Stanford University. All rights reserved. *
12 * *
13 * Redistribution and use in source and binary forms, *
14 * with or without modification, are permitted according to the terms *
15 * listed in LICENSE (http://roofit.sourceforge.net/license.txt) *
16 *****************************************************************************/
17
18/** \class RooKeysPdf
19 \ingroup Roofit
20
21Class RooKeysPdf implements a one-dimensional kernel estimation p.d.f which model the distribution
22of an arbitrary input dataset as a superposition of Gaussian kernels, one for each data point,
23each contributing 1/N to the total integral of the pdf.
24If the 'adaptive mode' is enabled, the width of the Gaussian is adaptively calculated from the
25local density of events, i.e. narrow for regions with high event density to preserve details and
26wide for regions with low event density to promote smoothness. The details of the general algorithm
27are described in the following paper:
28
29Cranmer KS, Kernel Estimation in High-Energy Physics.
30 Computer Physics Communications 136:198-207,2001 - e-Print Archive: hep ex/0011057
31**/
32
33#include <limits>
34#include <algorithm>
35#include <cmath>
36#include <iostream>
37#include "TMath.h"
38#include "snprintf.h"
39#include "RooKeysPdf.h"
40#include "RooRealVar.h"
41#include "RooRandom.h"
42#include "RooDataSet.h"
43#include "RooTrace.h"
44
45#include "TError.h"
46
47
48const double RooKeysPdf::_nSigma = std::sqrt(-2. *
49 std::log(std::numeric_limits<double>::epsilon()));
50
51////////////////////////////////////////////////////////////////////////////////
52/// coverity[UNINIT_CTOR]
53
58
59////////////////////////////////////////////////////////////////////////////////
60/// cache stuff about x
61
62RooKeysPdf::RooKeysPdf(const char *name, const char *title, RooAbsReal &x, RooDataSet &data, Mirror mirror, double rho)
63 : RooKeysPdf(name, title, x, static_cast<RooRealVar &>(x), data, mirror, rho)
64{
65}
66
67////////////////////////////////////////////////////////////////////////////////
68/// cache stuff about x
69
71 Mirror mirror, double rho)
72 : RooAbsPdf(name, title),
73 _x("x", "Observable", this, xpdf),
74 _mirrorLeft(mirror == MirrorLeft || mirror == MirrorBoth || mirror == MirrorLeftAsymRight),
75 _mirrorRight(mirror == MirrorRight || mirror == MirrorBoth || mirror == MirrorAsymLeftRight),
76 _asymLeft(mirror == MirrorAsymLeft || mirror == MirrorAsymLeftRight || mirror == MirrorAsymBoth),
77 _asymRight(mirror == MirrorAsymRight || mirror == MirrorLeftAsymRight || mirror == MirrorAsymBoth),
78 _lo(xdata.getMin()),
79 _hi(xdata.getMax()),
80 _binWidth((_hi - _lo) / (_nPoints - 1)),
81 _rho(rho)
82{
83 snprintf(_varName, 128,"%s", xdata.GetName());
84
85 // form the lookup table
88}
89
90////////////////////////////////////////////////////////////////////////////////
91
94 _x("x", this, other._x),
95 _nEvents(other._nEvents),
96 _mirrorLeft(other._mirrorLeft),
97 _mirrorRight(other._mirrorRight),
98 _asymLeft(other._asymLeft),
99 _asymRight(other._asymRight),
100 _lo(other._lo),
101 _hi(other._hi),
102 _binWidth(other._binWidth),
103 _rho(other._rho)
104{
105 // cache stuff about x
106 snprintf(_varName, 128, "%s", other._varName );
107
108 // copy over data and weights... not necessary, commented out for speed
109// _dataPts = new double[_nEvents];
110// _weights = new double[_nEvents];
111// for (Int_t i= 0; i<_nEvents; i++) {
112// _dataPts[i]= other._dataPts[i];
113// _weights[i]= other._weights[i];
114// }
115
116 // copy over the lookup table
117 for (Int_t i= 0; i<_nPoints+1; i++)
118 _lookupTable[i]= other._lookupTable[i];
119
121}
122
123////////////////////////////////////////////////////////////////////////////////
124
126 delete[] _dataPts;
127 delete[] _dataWgts;
128 delete[] _weights;
129
131}
132
133////////////////////////////////////////////////////////////////////////////////
134/// small helper structure
135
136namespace {
137 struct Data {
138 double x;
139 double w;
140 };
141 // helper to order two Data structures
142 struct cmp {
143 inline bool operator()(const struct Data& a, const struct Data& b) const
144 { return a.x < b.x; }
145 };
146}
148 delete[] _dataPts;
149 delete[] _dataWgts;
150 delete[] _weights;
151
152 std::vector<Data> tmp;
153 tmp.reserve((1 + _mirrorLeft + _mirrorRight) * data.numEntries());
154 double x0 = 0.;
155 double x1 = 0.;
156 double x2 = 0.;
157 _sumWgt = 0.;
158 // read the data set into tmp and accumulate some statistics
159 RooRealVar& real = static_cast<RooRealVar&>(data.get()->operator[](_varName));
160 for (Int_t i = 0; i < data.numEntries(); ++i) {
161 data.get(i);
162 const double x = real.getVal();
163 const double w = data.weight();
164 x0 += w;
165 x1 += w * x;
166 x2 += w * x * x;
168
169 Data p;
170 p.x = x, p.w = w;
171 tmp.push_back(p);
172 if (_mirrorLeft) {
173 p.x = 2. * _lo - x;
174 tmp.push_back(p);
175 }
176 if (_mirrorRight) {
177 p.x = 2. * _hi - x;
178 tmp.push_back(p);
179 }
180 }
181 // sort the entire data set so that values of x are increasing
182 std::sort(tmp.begin(), tmp.end(), cmp());
183
184 // copy the sorted data set to its final destination
185 _nEvents = tmp.size();
186 _dataPts = new double[_nEvents];
187 _dataWgts = new double[_nEvents];
188 for (unsigned i = 0; i < tmp.size(); ++i) {
189 _dataPts[i] = tmp[i].x;
190 _dataWgts[i] = tmp[i].w;
191 }
192 {
193 // free tmp
194 std::vector<Data> tmp2;
195 tmp2.swap(tmp);
196 }
197
198 double meanv=x1/x0;
199 double sigmav=std::sqrt(x2/x0-meanv*meanv);
200 double h=std::pow(double(4)/double(3),0.2)*std::pow(_sumWgt,-0.2)*_rho;
201 double hmin=h*sigmav*std::sqrt(2.)/10;
202 double norm=h*std::sqrt(sigmav * _sumWgt)/(2.0*std::sqrt(3.0));
203
204 _weights=new double[_nEvents];
205 for(Int_t j=0;j<_nEvents;++j) {
206 _weights[j] = norm / std::sqrt(_dataWgts[j] * g(_dataPts[j],h*sigmav));
207 if (_weights[j]<hmin) _weights[j]=hmin;
208 }
209
210 // The idea below is that beyond nSigma sigma, the value of the exponential
211 // in the Gaussian is well below the machine precision of a double, so it
212 // does not contribute any more. That way, we can limit how many bins of the
213 // binned approximation in _lookupTable we have to touch when filling it.
214 for (Int_t i=0;i<_nPoints+1;++i) _lookupTable[i] = 0.;
215 for(Int_t j=0;j<_nEvents;++j) {
216 const double xlo = std::min(_hi,
217 std::max(_lo, _dataPts[j] - _nSigma * _weights[j]));
218 const double xhi = std::max(_lo,
219 std::min(_hi, _dataPts[j] + _nSigma * _weights[j]));
220 if (xlo >= xhi) continue;
221 const double chi2incr = _binWidth / _weights[j] / std::sqrt(2.);
222 const double weightratio = _dataWgts[j] / _weights[j];
223 const Int_t binlo = static_cast<Int_t>(std::floor((xlo - _lo) / _binWidth));
224 const Int_t binhi = static_cast<Int_t>(_nPoints - std::floor((_hi - xhi) / _binWidth));
225 const double x = (double(_nPoints - binlo) * _lo +
226 double(binlo) * _hi) / double(_nPoints);
227 double chi = (x - _dataPts[j]) / _weights[j] / std::sqrt(2.);
228 for (Int_t k = binlo; k <= binhi; ++k, chi += chi2incr) {
229 _lookupTable[k] += weightratio * std::exp(- chi * chi);
230 }
231 }
232 if (_asymLeft) {
233 for(Int_t j=0;j<_nEvents;++j) {
234 const double xlo = std::min(_hi,
235 std::max(_lo, 2. * _lo - _dataPts[j] + _nSigma * _weights[j]));
236 const double xhi = std::max(_lo,
237 std::min(_hi, 2. * _lo - _dataPts[j] - _nSigma * _weights[j]));
238 if (xlo >= xhi) continue;
239 const double chi2incr = _binWidth / _weights[j] / std::sqrt(2.);
240 const double weightratio = _dataWgts[j] / _weights[j];
241 const Int_t binlo = static_cast<Int_t>(std::floor((xlo - _lo) / _binWidth));
242 const Int_t binhi = static_cast<Int_t>(_nPoints - std::floor((_hi - xhi) / _binWidth));
243 const double x = (double(_nPoints - binlo) * _lo +
244 double(binlo) * _hi) / double(_nPoints);
245 double chi = (x - (2. * _lo - _dataPts[j])) / _weights[j] / std::sqrt(2.);
246 for (Int_t k = binlo; k <= binhi; ++k, chi += chi2incr) {
247 _lookupTable[k] -= weightratio * std::exp(- chi * chi);
248 }
249 }
250 }
251 if (_asymRight) {
252 for(Int_t j=0;j<_nEvents;++j) {
253 const double xlo = std::min(_hi,
254 std::max(_lo, 2. * _hi - _dataPts[j] + _nSigma * _weights[j]));
255 const double xhi = std::max(_lo,
256 std::min(_hi, 2. * _hi - _dataPts[j] - _nSigma * _weights[j]));
257 if (xlo >= xhi) continue;
258 const double chi2incr = _binWidth / _weights[j] / std::sqrt(2.);
259 const double weightratio = _dataWgts[j] / _weights[j];
260 const Int_t binlo = static_cast<Int_t>(std::floor((xlo - _lo) / _binWidth));
261 const Int_t binhi = static_cast<Int_t>(_nPoints - std::floor((_hi - xhi) / _binWidth));
262 const double x = (double(_nPoints - binlo) * _lo +
263 double(binlo) * _hi) / double(_nPoints);
264 double chi = (x - (2. * _hi - _dataPts[j])) / _weights[j] / std::sqrt(2.);
265 for (Int_t k = binlo; k <= binhi; ++k, chi += chi2incr) {
266 _lookupTable[k] -= weightratio * std::exp(- chi * chi);
267 }
268 }
269 }
270 static const double sqrt2pi(std::sqrt(2*TMath::Pi()));
271 for (Int_t i=0;i<_nPoints+1;++i)
273}
274
275////////////////////////////////////////////////////////////////////////////////
276
277double RooKeysPdf::evaluate() const {
278 Int_t i = (Int_t)floor((double(_x)-_lo)/_binWidth);
279 if (i<0) {
280// cerr << "got point below lower bound:"
281// << double(_x) << " < " << _lo
282// << " -- performing linear extrapolation..." << std::endl;
283 i=0;
284 }
285 if (i>_nPoints-1) {
286// cerr << "got point above upper bound:"
287// << double(_x) << " > " << _hi
288// << " -- performing linear extrapolation..." << std::endl;
289 i=_nPoints-1;
290 }
291 double dx = (double(_x)-(_lo+i*_binWidth))/_binWidth;
292
293 // for now do simple linear interpolation.
294 // one day replace by splines...
295 double ret = (_lookupTable[i]+dx*(_lookupTable[i+1]-_lookupTable[i]));
296 if (ret<0) ret=0 ;
297 return ret ;
298}
299
301 RooArgSet& allVars, RooArgSet& analVars, const char* /* rangeName */) const
302{
303 if (matchArgs(allVars, analVars, _x)) return 1;
304 return 0;
305}
306
307double RooKeysPdf::analyticalIntegral(Int_t code, const char* rangeName) const
308{
309 R__ASSERT(1 == code);
310 // this code is based on _lookupTable and uses linear interpolation, just as
311 // evaluate(); integration is done using the trapez rule
312 const double xmin = std::max(_lo, _x.min(rangeName));
313 const double xmax = std::min(_hi, _x.max(rangeName));
314 const Int_t imin = (Int_t)floor((xmin - _lo) / _binWidth);
315 const Int_t imax = std::min((Int_t)floor((xmax - _lo) / _binWidth),
316 _nPoints - 1);
317 double sum = 0.;
318 // sum up complete bins in middle
319 if (imin + 1 < imax)
321 for (Int_t i = imin + 2; i < imax; ++i)
322 sum += 2. * _lookupTable[i];
323 sum *= _binWidth * 0.5;
324 // treat incomplete bins
325 const double dxmin = (xmin - (_lo + imin * _binWidth)) / _binWidth;
326 const double dxmax = (xmax - (_lo + imax * _binWidth)) / _binWidth;
327 if (imin < imax) {
328 // first bin
329 sum += _binWidth * (1. - dxmin) * 0.5 * (_lookupTable[imin + 1] +
332 // last bin
333 sum += _binWidth * dxmax * 0.5 * (_lookupTable[imax] +
336 } else if (imin == imax) {
337 // first bin == last bin
338 sum += _binWidth * (dxmax - dxmin) * 0.5 * (
343 }
344 return sum;
345}
346
348{
349 if (vars.contains(*_x.absArg())) return 1;
350 return 0;
351}
352
353double RooKeysPdf::maxVal(Int_t code) const
354{
355 R__ASSERT(1 == code);
356 double max = -std::numeric_limits<double>::max();
357 for (Int_t i = 0; i <= _nPoints; ++i)
358 if (max < _lookupTable[i]) max = _lookupTable[i];
359 return max;
360}
361
362////////////////////////////////////////////////////////////////////////////////
363
364double RooKeysPdf::g(double x,double sigmav) const {
365 double y=0;
366 // since data is sorted, we can be a little faster because we know which data
367 // points contribute
368 double* it = std::lower_bound(_dataPts, _dataPts + _nEvents,
369 x - _nSigma * sigmav);
370 if (it >= (_dataPts + _nEvents)) return 0.;
371 double* iend = std::upper_bound(it, _dataPts + _nEvents,
372 x + _nSigma * sigmav);
373 for ( ; it < iend; ++it) {
374 const double r = (x - *it) / sigmav;
375 y += std::exp(-0.5 * r * r);
376 }
377
378 static const double sqrt2pi(std::sqrt(2*TMath::Pi()));
379 return y/(sigmav*sqrt2pi);
380}
#define b(i)
Definition RSha256.hxx:100
#define g(i)
Definition RSha256.hxx:105
#define a(i)
Definition RSha256.hxx:99
#define h(i)
Definition RSha256.hxx:106
#define TRACE_DESTROY
Definition RooTrace.h:24
#define TRACE_CREATE
Definition RooTrace.h:23
int Int_t
Definition RtypesCore.h:45
ROOT::Detail::TRangeCast< T, true > TRangeDynCast
TRangeDynCast is an adapter class that allows the typed iteration through a TCollection.
#define R__ASSERT(e)
Checks condition e and reports a fatal error if it's false.
Definition TError.h:125
winID h TVirtualViewer3D TVirtualGLPainter p
Option_t Option_t TPoint TPoint const char GetTextMagnitude GetFillStyle GetLineColor GetLineWidth GetMarkerStyle GetTextAlign GetTextColor GetTextSize void data
Option_t Option_t TPoint TPoint const char GetTextMagnitude GetFillStyle GetLineColor GetLineWidth GetMarkerStyle GetTextAlign GetTextColor GetTextSize void char Point_t Rectangle_t hmin
Option_t Option_t TPoint TPoint const char GetTextMagnitude GetFillStyle GetLineColor GetLineWidth GetMarkerStyle GetTextAlign GetTextColor GetTextSize void char Point_t Rectangle_t WindowAttributes_t Float_t r
Option_t Option_t TPoint TPoint const char x2
Option_t Option_t TPoint TPoint const char x1
char name[80]
Definition TGX11.cxx:110
float xmin
float xmax
TRObject operator()(const T1 &t1) const
#define snprintf
Definition civetweb.c:1540
bool contains(const RooAbsArg &var) const
Check if collection contains an argument with the same name as var.
Abstract interface for all probability density functions.
Definition RooAbsPdf.h:40
Abstract base class for objects that represent a real value and implements functionality common to al...
Definition RooAbsReal.h:59
bool matchArgs(const RooArgSet &allDeps, RooArgSet &numDeps, const RooArgProxy &a) const
Utility function for use in getAnalyticalIntegral().
RooAbsArg * absArg() const
Return pointer to contained argument.
Definition RooArgProxy.h:46
RooArgSet is a container object that can hold multiple RooAbsArg objects.
Definition RooArgSet.h:24
Container class to hold unbinned data.
Definition RooDataSet.h:34
Class RooKeysPdf implements a one-dimensional kernel estimation p.d.f which model the distribution of...
Definition RooKeysPdf.h:24
double _binWidth
Definition RooKeysPdf.h:77
double _sumWgt
Definition RooKeysPdf.h:63
static constexpr int _nPoints
Definition RooKeysPdf.h:65
double * _dataWgts
Definition RooKeysPdf.h:61
RooKeysPdf()
coverity[UNINIT_CTOR]
double _lookupTable[_nPoints+1]
Definition RooKeysPdf.h:66
double maxVal(Int_t code) const override
Return maximum value for set of observables identified by code assigned in getMaxVal.
bool _mirrorRight
Definition RooKeysPdf.h:71
double _rho
Definition RooKeysPdf.h:78
double _hi
Definition RooKeysPdf.h:77
Char_t _varName[128]
Definition RooKeysPdf.h:76
double g(double x, double sigma) const
Int_t _nEvents
Definition RooKeysPdf.h:59
void LoadDataSet(RooDataSet &data)
Int_t getAnalyticalIntegral(RooArgSet &allVars, RooArgSet &analVars, const char *rangeName=nullptr) const override
Interface function getAnalyticalIntergral advertises the analytical integrals that are supported.
RooRealProxy _x
Definition RooKeysPdf.h:51
double * _weights
Definition RooKeysPdf.h:62
double analyticalIntegral(Int_t code, const char *rangeName=nullptr) const override
Implements the actual analytical integral(s) advertised by getAnalyticalIntegral.
bool _mirrorLeft
Definition RooKeysPdf.h:70
double evaluate() const override
Evaluate this PDF / function / constant. Needs to be overridden by all derived classes.
~RooKeysPdf() override
double _lo
Definition RooKeysPdf.h:77
double * _dataPts
Definition RooKeysPdf.h:60
bool _asymRight
Definition RooKeysPdf.h:73
static const double _nSigma
Definition RooKeysPdf.h:57
bool _asymLeft
Definition RooKeysPdf.h:72
Int_t getMaxVal(const RooArgSet &vars) const override
Advertise capability to determine maximum value of function for given set of observables.
Variable that can be changed from the outside.
Definition RooRealVar.h:37
double max(const char *rname=nullptr) const
Query upper limit of range. This requires the payload to be RooAbsRealLValue or derived.
double min(const char *rname=nullptr) const
Query lower limit of range. This requires the payload to be RooAbsRealLValue or derived.
Double_t y[n]
Definition legend1.C:17
Double_t x[n]
Definition legend1.C:17
constexpr Double_t Pi()
Definition TMath.h:37
static uint64_t sum(uint64_t i)
Definition Factory.cxx:2345