Logo ROOT  
Reference Guide
 
Loading...
Searching...
No Matches
SamplingDistribution.cxx
Go to the documentation of this file.
1// @(#)root/roostats:$Id$
2
3/*************************************************************************
4 * Project: RooStats *
5 * Package: RooFit/RooStats *
6 * Authors: *
7 * Kyle Cranmer, Lorenzo Moneta, Gregory Schott, Wouter Verkerke *
8 *************************************************************************
9 * Copyright (C) 1995-2008, Rene Brun and Fons Rademakers. *
10 * All rights reserved. *
11 * *
12 * For the licensing terms see $ROOTSYS/LICENSE. *
13 * For the list of contributors see $ROOTSYS/README/CREDITS. *
14 *************************************************************************/
15
16/** \class RooStats::SamplingDistribution
17 \ingroup Roostats
18
19This class simply holds a sampling distribution of some test statistic.
20The distribution can either be an empirical distribution (eg. the samples themselves) or
21a weighted set of points (eg. for the FFT method).
22The class supports merging.
23*/
24
25#include "RooMsgService.h"
26
28#include "RooNumber.h"
29#include "TMath.h"
30#include <algorithm>
31#include <iostream>
32#include <cmath>
33#include <limits>
34using namespace std ;
35
37
38using namespace RooStats;
39
40////////////////////////////////////////////////////////////////////////////////
41/// SamplingDistribution constructor
42
43SamplingDistribution::SamplingDistribution( const char *name, const char *title,
44 std::vector<double>& samplingDist, const char * varName) :
45 TNamed(name,title)
46{
47 fSamplingDist = samplingDist;
48 // need to check STL stuff here. Will this = operator work as wanted, or do we need:
49 // std::copy(samplingDist.begin(), samplingDist.end(), fSamplingDist.begin());
50
51 // WVE must fill sampleWeights vector here otherwise append behavior potentially undefined
52 fSampleWeights.resize(fSamplingDist.size(),1.0) ;
53
54 fVarName = varName;
55}
56
57////////////////////////////////////////////////////////////////////////////////
58/// SamplingDistribution constructor
59
60SamplingDistribution::SamplingDistribution( const char *name, const char *title,
61 std::vector<double>& samplingDist, std::vector<double>& sampleWeights, const char * varName) :
62 TNamed(name,title)
63{
64 fSamplingDist = samplingDist;
65 fSampleWeights = sampleWeights;
66 // need to check STL stuff here. Will this = operator work as wanted, or do we need:
67 // std::copy(samplingDist.begin(), samplingDist.end(), fSamplingDist.begin());
68
69 fVarName = varName;
70}
71
72////////////////////////////////////////////////////////////////////////////////
73/// SamplingDistribution constructor (with name and title)
74
75SamplingDistribution::SamplingDistribution( const char *name, const char *title, const char * varName) :
76 TNamed(name,title)
77{
78 fVarName = varName;
79}
80
81////////////////////////////////////////////////////////////////////////////////
82/// Creates a SamplingDistribution from a RooDataSet for debugging
83/// purposes; e.g. if you need a Gaussian type SamplingDistribution
84/// you can generate it from a Gaussian pdf and use the resulting
85/// RooDataSet with this constructor.
86///
87/// The result is the projected distribution onto varName
88/// marginalizing the other variables.
89///
90/// If varName is not given, the first variable will be used.
91/// This is useful mostly for RooDataSets with only one observable.
92
94 const char *name,
95 const char *title,
96 RooDataSet& dataSet,
97 const char * _columnName,
98 const char * varName
99) : TNamed(name, title) {
100
101
102 // check there are any meaningful entries in the given dataset
103 if( dataSet.numEntries() == 0 || !dataSet.get()->first() ) {
104 if( varName ) fVarName = varName;
105 return;
106 }
107
108 TString columnName( _columnName );
109
110 if( !columnName.Length() ) {
111 columnName.Form( "%s_TS0", name );
112 if( !dataSet.get()->find(columnName) ) {
113 columnName = dataSet.get()->first()->GetName();
114 }
115 }
116
117 if( !varName ) {
118 // no leak. none of these transfers ownership.
119 fVarName = (*dataSet.get())[columnName].GetTitle();
120 }else{
121 fVarName = varName;
122 }
123
124 for(Int_t i=0; i < dataSet.numEntries(); i++) {
125 fSamplingDist.push_back(dataSet.get(i)->getRealValue(columnName));
126 fSampleWeights.push_back(dataSet.weight());
127 }
128}
129
130
131////////////////////////////////////////////////////////////////////////////////
132/// SamplingDistribution default constructor
133
135 TNamed("SamplingDistribution_DefaultName","SamplingDistribution")
136{
137}
138
139////////////////////////////////////////////////////////////////////////////////
140/// SamplingDistribution destructor
141
143{
144 fSamplingDist.clear();
145 fSampleWeights.clear();
146}
147
148////////////////////////////////////////////////////////////////////////////////
149/// Merge SamplingDistributions (does nothing if nullptr is given).
150/// If variable name was not set before, it is copied from the added
151/// SamplingDistribution.
152
154{
155 if(!other) return;
156
157 std::vector<double> newSamplingDist = other->fSamplingDist;
158 std::vector<double> newSampleWeights = other->fSampleWeights;
159 // need to check STL stuff here. Will this = operator work as wanted, or do we need:
160 // std::copy(samplingDist.begin(), samplingDist.end(), fSamplingDist.begin());
161 // need to look into STL, do it the easy way for now
162
163 // reserve memory
164 fSamplingDist.reserve(fSamplingDist.size()+newSamplingDist.size());
165 fSampleWeights.reserve(fSampleWeights.size()+newSampleWeights.size());
166
167 // push back elements
168 for(unsigned int i=0; i<newSamplingDist.size(); ++i){
169 fSamplingDist.push_back(newSamplingDist[i]);
170 fSampleWeights.push_back(newSampleWeights[i]);
171 }
172
173
174 if(GetVarName().Length() == 0 && other->GetVarName().Length() > 0)
175 fVarName = other->GetVarName();
176
177 if(strlen(GetName()) == 0 && strlen(other->GetName()) > 0)
178 SetName(other->GetName());
179 if(strlen(GetTitle()) == 0 && strlen(other->GetTitle()) > 0)
180 SetTitle(other->GetTitle());
181
182}
183
184////////////////////////////////////////////////////////////////////////////////
185/// Returns the integral in the open/closed/mixed interval. Default is [low,high) interval.
186/// Normalization can be turned off.
187
188double SamplingDistribution::Integral(double low, double high, bool normalize, bool lowClosed, bool
189 highClosed) const
190{
191 double error = 0;
192 return IntegralAndError(error, low,high, normalize, lowClosed, highClosed);
193}
194
195////////////////////////////////////////////////////////////////////////////////
196/// first need to sort the values and then compute the
197/// running sum of the weights and of the weight square
198/// needed later for computing the integral
199
201
202 unsigned int n = fSamplingDist.size();
203 std::vector<unsigned int> index(n);
204 TMath::SortItr(fSamplingDist.begin(), fSamplingDist.end(), index.begin(), false );
205
206 // compute the empirical CDF and cache in a vector
207 fSumW = std::vector<double>( n );
208 fSumW2 = std::vector<double>( n );
209
210 std::vector<double> sortedDist( n);
211 std::vector<double> sortedWeights( n);
212
213 for(unsigned int i=0; i <n; i++) {
214 unsigned int j = index[i];
215 if (i > 0) {
216 fSumW[i] += fSumW[i-1];
217 fSumW2[i] += fSumW2[i-1];
218 }
219 fSumW[i] += fSampleWeights[j];
221 // sort also the sampling distribution and the weights
222 sortedDist[i] = fSamplingDist[ j] ;
223 sortedWeights[i] = fSampleWeights[ j] ;
224 }
225
226 // save the sorted distribution
227 fSamplingDist = sortedDist;
228 fSampleWeights = sortedWeights;
229
230
231}
232
233////////////////////////////////////////////////////////////////////////////////
234/// Returns the integral in the open/closed/mixed interval. Default is [low,high) interval.
235/// Normalization can be turned off.
236/// compute also the error on the integral
237
238double SamplingDistribution::IntegralAndError(double & error, double low, double high, bool normalize, bool lowClosed, bool
239 highClosed) const
240{
241 int n = fSamplingDist.size();
242 if( n == 0 ) {
243 error = numeric_limits<double>::infinity();
244 return 0.0;
245 }
246
247 if (int(fSumW.size()) != n)
248 SortValues();
249
250
251 // use std::upper_bounds returns lower index value
252 int indexLow = -1;
253 int indexHigh = -1;
254 if (lowClosed) {
255 // case of closed intervals want to include lower part
256 indexLow = std::lower_bound( fSamplingDist.begin(), fSamplingDist.end() , low) - fSamplingDist.begin() -1;
257 }
258 else {
259 // case of open intervals
260 indexLow = std::upper_bound( fSamplingDist.begin(), fSamplingDist.end() , low) - fSamplingDist.begin() - 1;
261 }
262
263
264 if (highClosed) {
265 indexHigh = std::upper_bound( fSamplingDist.begin(), fSamplingDist.end() , high) - fSamplingDist.begin() -1;
266 }
267 else {
268 indexHigh = std::lower_bound( fSamplingDist.begin(), fSamplingDist.end() , high) - fSamplingDist.begin() -1;
269
270 }
271
272
273 assert(indexLow < n && indexHigh < n);
274
275 double sum = 0;
276 double sum2 = 0;
277
278 if (indexHigh >= 0) {
279 sum = fSumW[indexHigh];
280 sum2 = fSumW2[indexHigh];
281
282 if (indexLow >= 0) {
283 sum -= fSumW[indexLow];
284 sum2 -= fSumW2[indexLow];
285 }
286 }
287
288 if(normalize) {
289
290 double norm = fSumW.back();
291 double norm2 = fSumW2.back();
292
293 sum /= norm;
294
295 // use formula for binomial error in case of weighted events
296 // expression can be derived using a MLE for a weighted binomial likelihood
297 error = std::sqrt( sum2 * (1. - 2. * sum) + norm2 * sum * sum ) / norm;
298 }
299 else {
300 error = std::sqrt(sum2);
301 }
302
303
304 return sum;
305}
306
307////////////////////////////////////////////////////////////////////////////////
308/// returns the closed integral [-inf,x]
309
310double SamplingDistribution::CDF(double x) const {
311 return Integral(-RooNumber::infinity(), x, true, true, true);
312}
313
314////////////////////////////////////////////////////////////////////////////////
315/// returns the inverse of the cumulative distribution function
316
318{
319 double dummy=0;
320 return InverseCDF(pvalue,0,dummy);
321}
322
323////////////////////////////////////////////////////////////////////////////////
324/// returns the inverse of the cumulative distribution function, with variations depending on number of samples
325
327 double sigmaVariation,
328 double& inverseWithVariation)
329{
330 if (fSumW.size() != fSamplingDist.size())
331 SortValues();
332
333 if (!TMath::AreEqualRel(fSumW.back(), fSumW2.back(), 1.E-6) )
334 Warning("InverseCDF","Estimation of Quantiles (InverseCDF) for weighted events is not yet supported");
335
336
337 // Acceptance regions are meant to be inclusive of (1-\alpha) of the probability
338 // so the returned values of the CDF should make this easy.
339 // in particular:
340 // if finding the critical value for a lower bound
341 // when p_i < p < p_j, one should return the value associated with i
342 // if i=0, then one should return -infinity
343 // if finding the critical value for an upper bound
344 // when p_i < p < p_j, one should return the value associated with j
345 // if i = size-1, then one should return +infinity
346 // use pvalue < 0.5 to indicate a lower bound is requested
347
348 // casting will round down, eg. give i
349 int nominal = (unsigned int) (pvalue*fSamplingDist.size());
350
351 if(nominal <= 0) {
352 inverseWithVariation = -1.*RooNumber::infinity();
353 return -1.*RooNumber::infinity();
354 }
355 else if(nominal >= (Int_t)fSamplingDist.size()-1 ) {
356 inverseWithVariation = RooNumber::infinity();
357 return RooNumber::infinity();
358 }
359 else if(pvalue < 0.5){
360 int delta = (int)(sigmaVariation*sqrt(1.0*nominal)); // note sqrt(small fraction)
361 int variation = nominal+delta;
362
363 if(variation>=(Int_t)fSamplingDist.size()-1)
364 inverseWithVariation = RooNumber::infinity();
365 else if(variation<=0)
366 inverseWithVariation = -1.*RooNumber::infinity();
367 else
368 inverseWithVariation = fSamplingDist[ variation ];
369
370 return fSamplingDist[nominal];
371 }
372 else if(pvalue >= 0.5){
373 int delta = (int)(sigmaVariation*sqrt(1.0*fSamplingDist.size()- nominal)); // note sqrt(small fraction)
374 int variation = nominal+delta;
375
376
377 if(variation>=(Int_t)fSamplingDist.size()-1)
378 inverseWithVariation = RooNumber::infinity();
379
380 else if(variation<=0)
381 inverseWithVariation = -1.*RooNumber::infinity();
382 else
383 inverseWithVariation = fSamplingDist[ variation+1 ];
384
385
386 /*
387 std::cout << "dgb SamplingDistribution::InverseCDF. variation = " << variation
388 << " size = " << fSamplingDist.size()
389 << " value = " << inverseWithVariation << std::endl;
390 */
391
392 return fSamplingDist[nominal+1];
393 }
394 else{
395 std::cout << "problem in SamplingDistribution::InverseCDF" << std::endl;
396 }
397 inverseWithVariation = RooNumber::infinity();
398 return RooNumber::infinity();
399
400}
401
402
403////////////////////////////////////////////////////////////////////////////////
404/// returns the inverse of the cumulative distribution function
405
407{
408 if (fSumW.size() != fSamplingDist.size())
409 SortValues();
410
411 if (!TMath::AreEqualRel(fSumW.back(), fSumW2.back(), 1.E-6) )
412 Warning("InverseCDFInterpolate","Estimation of Quantiles (InverseCDF) for weighted events is not yet supported.");
413
414 // casting will round down, eg. give i
415 int nominal = (unsigned int) (pvalue*fSamplingDist.size());
416
417 if(nominal <= 0) {
418 return -1.*RooNumber::infinity();
419 }
420 if(nominal >= (Int_t)fSamplingDist.size()-1 ) {
421 return RooNumber::infinity();
422 }
423 double upperX = fSamplingDist[nominal+1];
424 double upperY = ((double) (nominal+1))/fSamplingDist.size();
425 double lowerX = fSamplingDist[nominal];
426 double lowerY = ((double) nominal)/fSamplingDist.size();
427
428 // std::cout << upperX << " " << upperY << " " << lowerX << " " << lowerY << std::endl;
429
430 return (upperX-lowerX)/(upperY-lowerY)*(pvalue-lowerY)+lowerX;
431
432}
#define ClassImp(name)
Definition Rtypes.h:377
Option_t Option_t TPoint TPoint const char GetTextMagnitude GetFillStyle GetLineColor GetLineWidth GetMarkerStyle GetTextAlign GetTextColor GetTextSize void char Point_t Rectangle_t WindowAttributes_t index
char name[80]
Definition TGX11.cxx:110
double getRealValue(const char *name, double defVal=0.0, bool verbose=false) const
Get value of a RooAbsReal stored in set with given name.
RooAbsArg * first() const
RooAbsArg * find(const char *name) const
Find object with given name in list.
virtual Int_t numEntries() const
Return number of entries in dataset, i.e., count unweighted entries.
RooDataSet is a container class to hold unbinned data.
Definition RooDataSet.h:57
const RooArgSet * get(Int_t index) const override
Return RooArgSet with coordinates of event 'index'.
double weight() const override
Return event weight of current event.
static constexpr double infinity()
Return internal infinity representation.
Definition RooNumber.h:24
This class simply holds a sampling distribution of some test statistic.
double Integral(double low, double high, bool normalize=true, bool lowClosed=true, bool highClosed=false) const
numerical integral in these limits
double CDF(double x) const
calculate CDF as a special case of Integral(...) with lower limit equal to -inf
~SamplingDistribution() override
Destructor of SamplingDistribution.
std::vector< double > fSumW2
! Cached vector with sum of the weight used to compute integral error
double InverseCDFInterpolate(double pvalue)
get the inverse of the Cumulative distribution function
std::vector< double > fSumW
! Cached vector with sum of the weight used to compute integral
SamplingDistribution()
Default constructor for SamplingDistribution.
std::vector< double > fSamplingDist
vector of points for the sampling distribution
double IntegralAndError(double &error, double low, double high, bool normalize=true, bool lowClosed=true, bool highClosed=false) const
numerical integral in these limits including error estimation
double InverseCDF(double pvalue)
get the inverse of the Cumulative distribution function
void Add(const SamplingDistribution *other)
merge two sampling distributions
void SortValues() const
internal function to sort values
std::vector< double > fSampleWeights
vector of weights for the samples
The TNamed class is the base class for all named ROOT classes.
Definition TNamed.h:29
virtual void SetTitle(const char *title="")
Set the title of the TNamed.
Definition TNamed.cxx:164
const char * GetName() const override
Returns name of object.
Definition TNamed.h:47
const char * GetTitle() const override
Returns title of object.
Definition TNamed.h:48
virtual void SetName(const char *name)
Set the name of the TNamed.
Definition TNamed.cxx:140
virtual void Warning(const char *method, const char *msgfmt,...) const
Issue warning message.
Definition TObject.cxx:956
Basic string class.
Definition TString.h:139
Ssiz_t Length() const
Definition TString.h:421
void Form(const char *fmt,...)
Formats a string using a printf style format descriptor.
Definition TString.cxx:2334
Double_t x[n]
Definition legend1.C:17
const Int_t n
Definition legend1.C:16
Namespace for the RooStats classes.
Definition Asimov.h:19
void SortItr(Iterator first, Iterator last, IndexIterator index, Bool_t down=kTRUE)
Sort the n1 elements of the Short_t array defined by its iterators.
Definition TMathBase.h:406
Bool_t AreEqualRel(Double_t af, Double_t bf, Double_t relPrec)
Comparing floating points.
Definition TMath.h:425
static uint64_t sum(uint64_t i)
Definition Factory.cxx:2345