doc/v632/RooKeysPdf_8cxx_source.html

/*****************************************************************************

 * Project: RooFit                                                           *

 * Package: RooFitModels                                                     *

 * @(#)root/roofit:$Id$

 * Authors:                                                                  *

 *   GR, Gerhard Raven,   UC San Diego,        raven@slac.stanford.edu       *

 *   DK, David Kirkby,    UC Irvine,         dkirkby@uci.edu                 *

 *   WV, Wouter Verkerke, UC Santa Barbara, verkerke@slac.stanford.edu       *

 *                                                                           *

 * Copyright (c) 2000-2005, Regents of the University of California          *

 *                          and Stanford University. All rights reserved.    *

 *                                                                           *

 * Redistribution and use in source and binary forms,                        *

 * with or without modification, are permitted according to the terms        *

 * listed in LICENSE (http://roofit.sourceforge.net/license.txt)             *

 *****************************************************************************/


/** \class RooKeysPdf

    \ingroup Roofit


Class RooKeysPdf implements a one-dimensional kernel estimation p.d.f which model the distribution

of an arbitrary input dataset as a superposition of Gaussian kernels, one for each data point,

each contributing 1/N to the total integral of the pdf.

If the 'adaptive mode' is enabled, the width of the Gaussian is adaptively calculated from the

local density of events, i.e. narrow for regions with high event density to preserve details and

wide for regions with low event density to promote smoothness. The details of the general algorithm

are described in the following paper:


Cranmer KS, Kernel Estimation in High-Energy Physics.

            Computer Physics Communications 136:198-207,2001 - e-Print Archive: hep ex/0011057

**/


#include <limits>

#include <algorithm>

#include <cmath>

#include <iostream>

#include "TMath.h"

#include "snprintf.h"

#include "RooKeysPdf.h"

#include "RooRealVar.h"

#include "RooRandom.h"

#include "RooDataSet.h"

#include "RooTrace.h"


#include "TError.h"


ClassImp(RooKeysPdf);


const double RooKeysPdf::_nSigma = std::sqrt(-2. *

    std::log(std::numeric_limits<double>::epsilon()));


////////////////////////////////////////////////////////////////////////////////

/// coverity[UNINIT_CTOR]


RooKeysPdf::RooKeysPdf()

{

   TRACE_CREATE;

}


////////////////////////////////////////////////////////////////////////////////

/// cache stuff about x


RooKeysPdf::RooKeysPdf(const char *name, const char *title, RooAbsReal &x, RooDataSet &data, Mirror mirror, double rho)

   : RooKeysPdf(name, title, x, static_cast<RooRealVar &>(x), data, mirror, rho)

{

}


////////////////////////////////////////////////////////////////////////////////

/// cache stuff about x


RooKeysPdf::RooKeysPdf(const char *name, const char *title, RooAbsReal &xpdf, RooRealVar &xdata, RooDataSet &data,

                       Mirror mirror, double rho)

   : RooAbsPdf(name, title),

     _x("x", "Observable", this, xpdf),

     _mirrorLeft(mirror == MirrorLeft || mirror == MirrorBoth || mirror == MirrorLeftAsymRight),

     _mirrorRight(mirror == MirrorRight || mirror == MirrorBoth || mirror == MirrorAsymLeftRight),

     _asymLeft(mirror == MirrorAsymLeft || mirror == MirrorAsymLeftRight || mirror == MirrorAsymBoth),

     _asymRight(mirror == MirrorAsymRight || mirror == MirrorLeftAsymRight || mirror == MirrorAsymBoth),

     _lo(xdata.getMin()),

     _hi(xdata.getMax()),

     _binWidth((_hi - _lo) / (_nPoints - 1)),

     _rho(rho)

{

  snprintf(_varName, 128,"%s", xdata.GetName());


  // form the lookup table

  LoadDataSet(data);

  TRACE_CREATE;

}


////////////////////////////////////////////////////////////////////////////////


RooKeysPdf::RooKeysPdf(const RooKeysPdf &other, const char *name)

   : RooAbsPdf(other, name),

     _x("x", this, other._x),

     _nEvents(other._nEvents),

     _mirrorLeft(other._mirrorLeft),

     _mirrorRight(other._mirrorRight),

     _asymLeft(other._asymLeft),

     _asymRight(other._asymRight),

     _lo(other._lo),

     _hi(other._hi),

     _binWidth(other._binWidth),

     _rho(other._rho)

{

  // cache stuff about x

  snprintf(_varName, 128, "%s", other._varName );


  // copy over data and weights... not necessary, commented out for speed

//    _dataPts = new double[_nEvents];

//    _weights = new double[_nEvents];

//    for (Int_t i= 0; i<_nEvents; i++) {

//      _dataPts[i]= other._dataPts[i];

//      _weights[i]= other._weights[i];

//    }


  // copy over the lookup table

  for (Int_t i= 0; i<_nPoints+1; i++)

    _lookupTable[i]= other._lookupTable[i];


  TRACE_CREATE;

}


////////////////////////////////////////////////////////////////////////////////


RooKeysPdf::~RooKeysPdf() {

  delete[] _dataPts;

  delete[] _dataWgts;

  delete[] _weights;


  TRACE_DESTROY;

}


////////////////////////////////////////////////////////////////////////////////

/// small helper structure


namespace {

  struct Data {

    double x;

    double w;

  };

  // helper to order two Data structures

  struct cmp {

    inline bool operator()(const struct Data& a, const struct Data& b) const

    { return a.x < b.x; }

  };

}


void RooKeysPdf::LoadDataSet( RooDataSet& data) {

  delete[] _dataPts;

  delete[] _dataWgts;

  delete[] _weights;


  std::vector<Data> tmp;

  tmp.reserve((1 + _mirrorLeft + _mirrorRight) * data.numEntries());

  double x0 = 0.;

  double x1 = 0.;

  double x2 = 0.;

  _sumWgt = 0.;

  // read the data set into tmp and accumulate some statistics

  RooRealVar& real = static_cast<RooRealVar&>(data.get()->operator[](_varName));

  for (Int_t i = 0; i < data.numEntries(); ++i) {

    data.get(i);

    const double x = real.getVal();

    const double w = data.weight();

    x0 += w;

    x1 += w * x;

    x2 += w * x * x;

    _sumWgt += double(1 + _mirrorLeft + _mirrorRight) * w;


    Data p;

    p.x = x, p.w = w;

    tmp.push_back(p);

    if (_mirrorLeft) {

      p.x = 2. * _lo - x;

      tmp.push_back(p);

    }

    if (_mirrorRight) {

      p.x = 2. * _hi - x;

      tmp.push_back(p);

    }

  }

  // sort the entire data set so that values of x are increasing

  std::sort(tmp.begin(), tmp.end(), cmp());


  // copy the sorted data set to its final destination

  _nEvents = tmp.size();

  _dataPts  = new double[_nEvents];

  _dataWgts = new double[_nEvents];

  for (unsigned i = 0; i < tmp.size(); ++i) {

    _dataPts[i] = tmp[i].x;

    _dataWgts[i] = tmp[i].w;

  }

  {

    // free tmp

    std::vector<Data> tmp2;

    tmp2.swap(tmp);

  }


  double meanv=x1/x0;

  double sigmav=std::sqrt(x2/x0-meanv*meanv);

  double h=std::pow(double(4)/double(3),0.2)*std::pow(_sumWgt,-0.2)*_rho;

  double hmin=h*sigmav*std::sqrt(2.)/10;

  double norm=h*std::sqrt(sigmav * _sumWgt)/(2.0*std::sqrt(3.0));


  _weights=new double[_nEvents];

  for(Int_t j=0;j<_nEvents;++j) {

    _weights[j] = norm / std::sqrt(_dataWgts[j] * g(_dataPts[j],h*sigmav));

    if (_weights[j]<hmin) _weights[j]=hmin;

  }


  // The idea below is that beyond nSigma sigma, the value of the exponential

  // in the Gaussian is well below the machine precision of a double, so it

  // does not contribute any more. That way, we can limit how many bins of the

  // binned approximation in _lookupTable we have to touch when filling it.

  for (Int_t i=0;i<_nPoints+1;++i) _lookupTable[i] = 0.;

  for(Int_t j=0;j<_nEvents;++j) {

      const double xlo = std::min(_hi,

         std::max(_lo, _dataPts[j] - _nSigma * _weights[j]));

      const double xhi = std::max(_lo,

         std::min(_hi, _dataPts[j] + _nSigma * _weights[j]));

      if (xlo >= xhi) continue;

      const double chi2incr = _binWidth / _weights[j] / std::sqrt(2.);

      const double weightratio = _dataWgts[j] / _weights[j];

      const Int_t binlo = static_cast<Int_t>(std::floor((xlo - _lo) / _binWidth));

      const Int_t binhi = static_cast<Int_t>(_nPoints - std::floor((_hi - xhi) / _binWidth));

      const double x = (double(_nPoints - binlo) * _lo +

         double(binlo) * _hi) / double(_nPoints);

      double chi = (x - _dataPts[j]) / _weights[j] / std::sqrt(2.);

      for (Int_t k = binlo; k <= binhi; ++k, chi += chi2incr) {

     _lookupTable[k] += weightratio * std::exp(- chi * chi);

      }

  }

  if (_asymLeft) {

      for(Int_t j=0;j<_nEvents;++j) {

     const double xlo = std::min(_hi,

        std::max(_lo, 2. * _lo - _dataPts[j] + _nSigma * _weights[j]));

     const double xhi = std::max(_lo,

        std::min(_hi, 2. * _lo - _dataPts[j] - _nSigma * _weights[j]));

     if (xlo >= xhi) continue;

     const double chi2incr = _binWidth / _weights[j] / std::sqrt(2.);

     const double weightratio = _dataWgts[j] / _weights[j];

     const Int_t binlo = static_cast<Int_t>(std::floor((xlo - _lo) / _binWidth));

     const Int_t binhi = static_cast<Int_t>(_nPoints - std::floor((_hi - xhi) / _binWidth));

     const double x = (double(_nPoints - binlo) * _lo +

        double(binlo) * _hi) / double(_nPoints);

     double chi = (x - (2. * _lo - _dataPts[j])) / _weights[j] / std::sqrt(2.);

     for (Int_t k = binlo; k <= binhi; ++k, chi += chi2incr) {

         _lookupTable[k] -= weightratio * std::exp(- chi * chi);

     }

      }

  }

  if (_asymRight) {

      for(Int_t j=0;j<_nEvents;++j) {

     const double xlo = std::min(_hi,

        std::max(_lo, 2. * _hi - _dataPts[j] + _nSigma * _weights[j]));

     const double xhi = std::max(_lo,

        std::min(_hi, 2. * _hi - _dataPts[j] - _nSigma * _weights[j]));

     if (xlo >= xhi) continue;

     const double chi2incr = _binWidth / _weights[j] / std::sqrt(2.);

     const double weightratio = _dataWgts[j] / _weights[j];

     const Int_t binlo = static_cast<Int_t>(std::floor((xlo - _lo) / _binWidth));

     const Int_t binhi = static_cast<Int_t>(_nPoints - std::floor((_hi - xhi) / _binWidth));

     const double x = (double(_nPoints - binlo) * _lo +

        double(binlo) * _hi) / double(_nPoints);

     double chi = (x - (2. * _hi - _dataPts[j])) / _weights[j] / std::sqrt(2.);

     for (Int_t k = binlo; k <= binhi; ++k, chi += chi2incr) {

         _lookupTable[k] -= weightratio * std::exp(- chi * chi);

     }

      }

  }

  static const double sqrt2pi(std::sqrt(2*TMath::Pi()));

  for (Int_t i=0;i<_nPoints+1;++i)

    _lookupTable[i] /= sqrt2pi * _sumWgt;

}


////////////////////////////////////////////////////////////////////////////////


double RooKeysPdf::evaluate() const {

  Int_t i = (Int_t)floor((double(_x)-_lo)/_binWidth);

  if (i<0) {

//     cerr << "got point below lower bound:"

//     << double(_x) << " < " << _lo

//     << " -- performing linear extrapolation..." << endl;

    i=0;

  }

  if (i>_nPoints-1) {

//     cerr << "got point above upper bound:"

//     << double(_x) << " > " << _hi

//     << " -- performing linear extrapolation..." << endl;

    i=_nPoints-1;

  }

  double dx = (double(_x)-(_lo+i*_binWidth))/_binWidth;


  // for now do simple linear interpolation.

  // one day replace by splines...

  double ret = (_lookupTable[i]+dx*(_lookupTable[i+1]-_lookupTable[i]));

  if (ret<0) ret=0 ;

  return ret ;

}


Int_t RooKeysPdf::getAnalyticalIntegral(

   RooArgSet& allVars, RooArgSet& analVars, const char* /* rangeName */) const

{

  if (matchArgs(allVars, analVars, _x)) return 1;

  return 0;

}


double RooKeysPdf::analyticalIntegral(Int_t code, const char* rangeName) const

{

  R__ASSERT(1 == code);

  // this code is based on _lookupTable and uses linear interpolation, just as

  // evaluate(); integration is done using the trapez rule

  const double xmin = std::max(_lo, _x.min(rangeName));

  const double xmax = std::min(_hi, _x.max(rangeName));

  const Int_t imin = (Int_t)floor((xmin - _lo) / _binWidth);

  const Int_t imax = std::min((Int_t)floor((xmax - _lo) / _binWidth),

      _nPoints - 1);

  double sum = 0.;

  // sum up complete bins in middle

  if (imin + 1 < imax)

    sum += _lookupTable[imin + 1] + _lookupTable[imax];

  for (Int_t i = imin + 2; i < imax; ++i)

    sum += 2. * _lookupTable[i];

  sum *= _binWidth * 0.5;

  // treat incomplete bins

  const double dxmin = (xmin - (_lo + imin * _binWidth)) / _binWidth;

  const double dxmax = (xmax - (_lo + imax * _binWidth)) / _binWidth;

  if (imin < imax) {

    // first bin

    sum += _binWidth * (1. - dxmin) * 0.5 * (_lookupTable[imin + 1] +

   _lookupTable[imin] + dxmin *

   (_lookupTable[imin + 1] - _lookupTable[imin]));

    // last bin

    sum += _binWidth * dxmax * 0.5 * (_lookupTable[imax] +

   _lookupTable[imax] + dxmax *

   (_lookupTable[imax + 1] - _lookupTable[imax]));

  } else if (imin == imax) {

    // first bin == last bin

    sum += _binWidth * (dxmax - dxmin) * 0.5 * (

   _lookupTable[imin] + dxmin *

   (_lookupTable[imin + 1] - _lookupTable[imin]) +

   _lookupTable[imax] + dxmax *

   (_lookupTable[imax + 1] - _lookupTable[imax]));

  }

  return sum;

}


Int_t RooKeysPdf::getMaxVal(const RooArgSet& vars) const

{

  if (vars.contains(*_x.absArg())) return 1;

  return 0;

}


double RooKeysPdf::maxVal(Int_t code) const

{

  R__ASSERT(1 == code);

  double max = -std::numeric_limits<double>::max();

  for (Int_t i = 0; i <= _nPoints; ++i)

    if (max < _lookupTable[i]) max = _lookupTable[i];

  return max;

}


////////////////////////////////////////////////////////////////////////////////


double RooKeysPdf::g(double x,double sigmav) const {

  double y=0;

  // since data is sorted, we can be a little faster because we know which data

  // points contribute

  double* it = std::lower_bound(_dataPts, _dataPts + _nEvents,

      x - _nSigma * sigmav);

  if (it >= (_dataPts + _nEvents)) return 0.;

  double* iend = std::upper_bound(it, _dataPts + _nEvents,

      x + _nSigma * sigmav);

  for ( ; it < iend; ++it) {

    const double r = (x - *it) / sigmav;

    y += std::exp(-0.5 * r * r);

  }


  static const double sqrt2pi(std::sqrt(2*TMath::Pi()));

  return y/(sigmav*sqrt2pi);

}


b
#define b(i)
Definition RSha256.hxx:100

g
#define g(i)
Definition RSha256.hxx:105

a
#define a(i)
Definition RSha256.hxx:99

h
#define h(i)
Definition RSha256.hxx:106

_nEvents
Int_t _nEvents
Total number of events in test statistic calculation.
Definition RooAbsTestStatistic.h:106

RooDataSet.h

RooKeysPdf.h

RooRandom.h

RooRealVar.h

RooTrace.h

TRACE_DESTROY
#define TRACE_DESTROY
Definition RooTrace.h:24

TRACE_CREATE
#define TRACE_CREATE
Definition RooTrace.h:23

Int_t
int Int_t
Definition RtypesCore.h:45

ClassImp
#define ClassImp(name)
Definition Rtypes.h:377

TRangeDynCast
ROOT::Detail::TRangeCast< T, true > TRangeDynCast
TRangeDynCast is an adapter class that allows the typed iteration through a TCollection.
Definition TCollection.h:358

TError.h

R__ASSERT
#define R__ASSERT(e)
Definition TError.h:118

w
winID w
Definition TGWin32VirtualGLProxy.cxx:39

p
winID h TVirtualViewer3D TVirtualGLPainter p
Definition TGWin32VirtualGLProxy.cxx:51

hmin
Option_t Option_t TPoint TPoint const char GetTextMagnitude GetFillStyle GetLineColor GetLineWidth GetMarkerStyle GetTextAlign GetTextColor GetTextSize void char Point_t Rectangle_t hmin
Definition TGWin32VirtualXProxy.cxx:162

data
Option_t Option_t TPoint TPoint const char GetTextMagnitude GetFillStyle GetLineColor GetLineWidth GetMarkerStyle GetTextAlign GetTextColor GetTextSize void data
Definition TGWin32VirtualXProxy.cxx:104

r
Option_t Option_t TPoint TPoint const char GetTextMagnitude GetFillStyle GetLineColor GetLineWidth GetMarkerStyle GetTextAlign GetTextColor GetTextSize void char Point_t Rectangle_t WindowAttributes_t Float_t r
Definition TGWin32VirtualXProxy.cxx:168

x2
Option_t Option_t TPoint TPoint const char x2
Definition TGWin32VirtualXProxy.cxx:70

x1
Option_t Option_t TPoint TPoint const char x1
Definition TGWin32VirtualXProxy.cxx:70

name
char name[80]
Definition TGX11.cxx:110

xmin
float xmin
Definition THbookFile.cxx:95

xmax
float xmax
Definition THbookFile.cxx:95

TMath.h

operator()
TRObject operator()(const T1 &t1) const
Definition TRFunctionImport__oprtr.h:14

snprintf
#define snprintf
Definition civetweb.c:1540

ROOT::Detail::TRangeCast
Definition TCollection.h:311

RooAbsCollection::contains
bool contains(const RooAbsArg &var) const
Check if collection contains an argument with the same name as var.
Definition RooAbsCollection.h:221

RooAbsPdf
Abstract interface for all probability density functions.
Definition RooAbsPdf.h:40

RooAbsReal
Abstract base class for objects that represent a real value and implements functionality common to al...
Definition RooAbsReal.h:59

RooAbsReal::matchArgs
bool matchArgs(const RooArgSet &allDeps, RooArgSet &numDeps, const RooArgProxy &a) const
Utility function for use in getAnalyticalIntegral().
Definition RooAbsReal.cxx:3228

RooArgProxy::absArg
RooAbsArg * absArg() const
Return pointer to contained argument.
Definition RooArgProxy.h:46

RooArgSet
RooArgSet is a container object that can hold multiple RooAbsArg objects.
Definition RooArgSet.h:55

RooDataSet
Container class to hold unbinned data.
Definition RooDataSet.h:57

RooKeysPdf
Class RooKeysPdf implements a one-dimensional kernel estimation p.d.f which model the distribution of...
Definition RooKeysPdf.h:24

RooKeysPdf::_binWidth
double _binWidth
Definition RooKeysPdf.h:77

RooKeysPdf::_sumWgt
double _sumWgt
Definition RooKeysPdf.h:63

RooKeysPdf::_nPoints
static constexpr int _nPoints
Definition RooKeysPdf.h:65

RooKeysPdf::_dataWgts
double * _dataWgts
Definition RooKeysPdf.h:61

RooKeysPdf::RooKeysPdf
RooKeysPdf()
coverity[UNINIT_CTOR]
Definition RooKeysPdf.cxx:55

RooKeysPdf::_lookupTable
double _lookupTable[_nPoints+1]
Definition RooKeysPdf.h:66

RooKeysPdf::maxVal
double maxVal(Int_t code) const override
Return maximum value for set of observables identified by code assigned in getMaxVal.
Definition RooKeysPdf.cxx:354

RooKeysPdf::_mirrorRight
bool _mirrorRight
Definition RooKeysPdf.h:71

RooKeysPdf::_rho
double _rho
Definition RooKeysPdf.h:78

RooKeysPdf::_hi
double _hi
Definition RooKeysPdf.h:77

RooKeysPdf::_varName
Char_t _varName[128]
Definition RooKeysPdf.h:76

RooKeysPdf::g
double g(double x, double sigma) const
Definition RooKeysPdf.cxx:365

RooKeysPdf::_nEvents
Int_t _nEvents
Definition RooKeysPdf.h:59

RooKeysPdf::LoadDataSet
void LoadDataSet(RooDataSet &data)
Definition RooKeysPdf.cxx:148

RooKeysPdf::getAnalyticalIntegral
Int_t getAnalyticalIntegral(RooArgSet &allVars, RooArgSet &analVars, const char *rangeName=nullptr) const override
Interface function getAnalyticalIntergral advertises the analytical integrals that are supported.
Definition RooKeysPdf.cxx:301

RooKeysPdf::_x
RooRealProxy _x
Definition RooKeysPdf.h:51

RooKeysPdf::_weights
double * _weights
Definition RooKeysPdf.h:62

RooKeysPdf::analyticalIntegral
double analyticalIntegral(Int_t code, const char *rangeName=nullptr) const override
Implements the actual analytical integral(s) advertised by getAnalyticalIntegral.
Definition RooKeysPdf.cxx:308

RooKeysPdf::_mirrorLeft
bool _mirrorLeft
Definition RooKeysPdf.h:70

RooKeysPdf::evaluate
double evaluate() const override
Evaluate this PDF / function / constant. Needs to be overridden by all derived classes.
Definition RooKeysPdf.cxx:278

RooKeysPdf::~RooKeysPdf
~RooKeysPdf() override
Definition RooKeysPdf.cxx:126

RooKeysPdf::_lo
double _lo
Definition RooKeysPdf.h:77

RooKeysPdf::Mirror
Mirror
Definition RooKeysPdf.h:26

RooKeysPdf::_dataPts
double * _dataPts
Definition RooKeysPdf.h:60

RooKeysPdf::_asymRight
bool _asymRight
Definition RooKeysPdf.h:73

RooKeysPdf::_nSigma
static const double _nSigma
Definition RooKeysPdf.h:57

RooKeysPdf::_asymLeft
bool _asymLeft
Definition RooKeysPdf.h:72

RooKeysPdf::getMaxVal
Int_t getMaxVal(const RooArgSet &vars) const override
Advertise capability to determine maximum value of function for given set of observables.
Definition RooKeysPdf.cxx:348

RooRealVar
Variable that can be changed from the outside.
Definition RooRealVar.h:37

RooTemplateProxy::max
double max(const char *rname=nullptr) const
Query upper limit of range. This requires the payload to be RooAbsRealLValue or derived.
Definition RooTemplateProxy.h:318

RooTemplateProxy::min
double min(const char *rname=nullptr) const
Query lower limit of range. This requires the payload to be RooAbsRealLValue or derived.
Definition RooTemplateProxy.h:316

double

int

y
Double_t y[n]
Definition legend1.C:17

x
Double_t x[n]
Definition legend1.C:17

TMath::Pi
constexpr Double_t Pi()
Definition TMath.h:37

sum
static uint64_t sum(uint64_t i)
Definition Factory.cxx:2345