Logo ROOT  
Reference Guide
 
All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Properties Friends Macros Modules Pages
Loading...
Searching...
No Matches
SamplingDistribution.cxx
Go to the documentation of this file.
1// @(#)root/roostats:$Id$
2
3/*************************************************************************
4 * Project: RooStats *
5 * Package: RooFit/RooStats *
6 * Authors: *
7 * Kyle Cranmer, Lorenzo Moneta, Gregory Schott, Wouter Verkerke *
8 *************************************************************************
9 * Copyright (C) 1995-2008, Rene Brun and Fons Rademakers. *
10 * All rights reserved. *
11 * *
12 * For the licensing terms see $ROOTSYS/LICENSE. *
13 * For the list of contributors see $ROOTSYS/README/CREDITS. *
14 *************************************************************************/
15
16/** \class RooStats::SamplingDistribution
17 \ingroup Roostats
18
19This class simply holds a sampling distribution of some test statistic.
20The distribution can either be an empirical distribution (eg. the samples themselves) or
21a weighted set of points (eg. for the FFT method).
22The class supports merging.
23*/
24
25#include "RooMsgService.h"
26
28#include "RooNumber.h"
29#include "TMath.h"
30#include <algorithm>
31#include <iostream>
32#include <cmath>
33#include <limits>
34using std::numeric_limits;
35
36
37using namespace RooStats;
38
39////////////////////////////////////////////////////////////////////////////////
40/// SamplingDistribution constructor
41
42SamplingDistribution::SamplingDistribution(const char *name, const char *title, std::vector<double> &samplingDist,
43 const char *varName)
44 : TNamed(name, title), fSamplingDist(samplingDist), fVarName(varName)
45{
46 // need to check STL stuff here. Will this = operator work as wanted, or do we need:
47 // std::copy(samplingDist.begin(), samplingDist.end(), fSamplingDist.begin());
48
49 // WVE must fill sampleWeights vector here otherwise append behavior potentially undefined
50 fSampleWeights.resize(fSamplingDist.size(),1.0) ;
51}
52
53////////////////////////////////////////////////////////////////////////////////
54/// SamplingDistribution constructor
55
56SamplingDistribution::SamplingDistribution(const char *name, const char *title, std::vector<double> &samplingDist,
57 std::vector<double> &sampleWeights, const char *varName)
58 : TNamed(name, title), fSamplingDist(samplingDist), fSampleWeights(sampleWeights), fVarName(varName)
59{
60 // need to check STL stuff here. Will this = operator work as wanted, or do we need:
61 // std::copy(samplingDist.begin(), samplingDist.end(), fSamplingDist.begin());
62}
63
64////////////////////////////////////////////////////////////////////////////////
65/// SamplingDistribution constructor (with name and title)
66
67SamplingDistribution::SamplingDistribution(const char *name, const char *title, const char *varName)
68 : TNamed(name, title), fVarName(varName)
69{
70}
71
72////////////////////////////////////////////////////////////////////////////////
73/// Creates a SamplingDistribution from a RooDataSet for debugging
74/// purposes; e.g. if you need a Gaussian type SamplingDistribution
75/// you can generate it from a Gaussian pdf and use the resulting
76/// RooDataSet with this constructor.
77///
78/// The result is the projected distribution onto varName
79/// marginalizing the other variables.
80///
81/// If varName is not given, the first variable will be used.
82/// This is useful mostly for RooDataSets with only one observable.
83
85 const char *name,
86 const char *title,
87 RooDataSet& dataSet,
88 const char * _columnName,
89 const char * varName
90) : TNamed(name, title) {
91
92
93 // check there are any meaningful entries in the given dataset
94 if( dataSet.numEntries() == 0 || !dataSet.get()->first() ) {
95 if( varName ) fVarName = varName;
96 return;
97 }
98
100
101 if( !columnName.Length() ) {
102 columnName.Form( "%s_TS0", name );
103 if( !dataSet.get()->find(columnName) ) {
104 columnName = dataSet.get()->first()->GetName();
105 }
106 }
107
108 if( !varName ) {
109 // no leak. none of these transfers ownership.
110 fVarName = (*dataSet.get())[columnName].GetTitle();
111 }else{
113 }
114
115 for(Int_t i=0; i < dataSet.numEntries(); i++) {
116 fSamplingDist.push_back(dataSet.get(i)->getRealValue(columnName));
117 fSampleWeights.push_back(dataSet.weight());
118 }
119}
120
121
122////////////////////////////////////////////////////////////////////////////////
123/// SamplingDistribution default constructor
124
125SamplingDistribution::SamplingDistribution() : TNamed("SamplingDistribution_DefaultName", "SamplingDistribution") {}
126
127////////////////////////////////////////////////////////////////////////////////
128/// SamplingDistribution destructor
129
135
136////////////////////////////////////////////////////////////////////////////////
137/// Merge SamplingDistributions (does nothing if nullptr is given).
138/// If variable name was not set before, it is copied from the added
139/// SamplingDistribution.
140
142{
143 if(!other) return;
144
145 std::vector<double> newSamplingDist = other->fSamplingDist;
146 std::vector<double> newSampleWeights = other->fSampleWeights;
147 // need to check STL stuff here. Will this = operator work as wanted, or do we need:
148 // std::copy(samplingDist.begin(), samplingDist.end(), fSamplingDist.begin());
149 // need to look into STL, do it the easy way for now
150
151 // reserve memory
152 fSamplingDist.reserve(fSamplingDist.size()+newSamplingDist.size());
153 fSampleWeights.reserve(fSampleWeights.size()+newSampleWeights.size());
154
155 // push back elements
156 for(unsigned int i=0; i<newSamplingDist.size(); ++i){
157 fSamplingDist.push_back(newSamplingDist[i]);
158 fSampleWeights.push_back(newSampleWeights[i]);
159 }
160
161
162 if(GetVarName().Length() == 0 && other->GetVarName().Length() > 0)
163 fVarName = other->GetVarName();
164
165 if(strlen(GetName()) == 0 && strlen(other->GetName()) > 0)
166 SetName(other->GetName());
167 if(strlen(GetTitle()) == 0 && strlen(other->GetTitle()) > 0)
168 SetTitle(other->GetTitle());
169
170}
171
172////////////////////////////////////////////////////////////////////////////////
173/// Returns the integral in the open/closed/mixed interval. Default is [low,high) interval.
174/// Normalization can be turned off.
175
176double SamplingDistribution::Integral(double low, double high, bool normalize, bool lowClosed, bool
177 highClosed) const
178{
179 double error = 0;
180 return IntegralAndError(error, low,high, normalize, lowClosed, highClosed);
181}
182
183////////////////////////////////////////////////////////////////////////////////
184/// first need to sort the values and then compute the
185/// running sum of the weights and of the weight square
186/// needed later for computing the integral
187
189
190 unsigned int n = fSamplingDist.size();
191 std::vector<unsigned int> index(n);
192 TMath::SortItr(fSamplingDist.begin(), fSamplingDist.end(), index.begin(), false );
193
194 // compute the empirical CDF and cache in a vector
195 fSumW = std::vector<double>( n );
196 fSumW2 = std::vector<double>( n );
197
198 std::vector<double> sortedDist( n);
199 std::vector<double> sortedWeights( n);
200
201 for(unsigned int i=0; i <n; i++) {
202 unsigned int j = index[i];
203 if (i > 0) {
204 fSumW[i] += fSumW[i-1];
205 fSumW2[i] += fSumW2[i-1];
206 }
207 fSumW[i] += fSampleWeights[j];
209 // sort also the sampling distribution and the weights
210 sortedDist[i] = fSamplingDist[ j] ;
212 }
213
214 // save the sorted distribution
217
218
219}
220
221////////////////////////////////////////////////////////////////////////////////
222/// Returns the integral in the open/closed/mixed interval. Default is [low,high) interval.
223/// Normalization can be turned off.
224/// compute also the error on the integral
225
226double SamplingDistribution::IntegralAndError(double & error, double low, double high, bool normalize, bool lowClosed, bool
227 highClosed) const
228{
229 int n = fSamplingDist.size();
230 if( n == 0 ) {
231 error = numeric_limits<double>::infinity();
232 return 0.0;
233 }
234
235 if (int(fSumW.size()) != n)
236 SortValues();
237
238
239 // use std::upper_bounds returns lower index value
240 int indexLow = -1;
241 int indexHigh = -1;
242 if (lowClosed) {
243 // case of closed intervals want to include lower part
244 indexLow = std::lower_bound( fSamplingDist.begin(), fSamplingDist.end() , low) - fSamplingDist.begin() -1;
245 }
246 else {
247 // case of open intervals
248 indexLow = std::upper_bound( fSamplingDist.begin(), fSamplingDist.end() , low) - fSamplingDist.begin() - 1;
249 }
250
251
252 if (highClosed) {
253 indexHigh = std::upper_bound( fSamplingDist.begin(), fSamplingDist.end() , high) - fSamplingDist.begin() -1;
254 }
255 else {
256 indexHigh = std::lower_bound( fSamplingDist.begin(), fSamplingDist.end() , high) - fSamplingDist.begin() -1;
257
258 }
259
260
261 assert(indexLow < n && indexHigh < n);
262
263 double sum = 0;
264 double sum2 = 0;
265
266 if (indexHigh >= 0) {
269
270 if (indexLow >= 0) {
271 sum -= fSumW[indexLow];
272 sum2 -= fSumW2[indexLow];
273 }
274 }
275
276 if(normalize) {
277
278 double norm = fSumW.back();
279 double norm2 = fSumW2.back();
280
281 sum /= norm;
282
283 // use formula for binomial error in case of weighted events
284 // expression can be derived using a MLE for a weighted binomial likelihood
285 error = std::sqrt( sum2 * (1. - 2. * sum) + norm2 * sum * sum ) / norm;
286 }
287 else {
288 error = std::sqrt(sum2);
289 }
290
291
292 return sum;
293}
294
295////////////////////////////////////////////////////////////////////////////////
296/// returns the closed integral [-inf,x]
297
298double SamplingDistribution::CDF(double x) const {
299 return Integral(-RooNumber::infinity(), x, true, true, true);
300}
301
302////////////////////////////////////////////////////////////////////////////////
303/// returns the inverse of the cumulative distribution function
304
306{
307 double dummy=0;
308 return InverseCDF(pvalue,0,dummy);
309}
310
311////////////////////////////////////////////////////////////////////////////////
312/// returns the inverse of the cumulative distribution function, with variations depending on number of samples
313
315 double sigmaVariation,
316 double& inverseWithVariation)
317{
318 if (fSumW.size() != fSamplingDist.size())
319 SortValues();
320
321 if (!TMath::AreEqualRel(fSumW.back(), fSumW2.back(), 1.E-6) )
322 Warning("InverseCDF","Estimation of Quantiles (InverseCDF) for weighted events is not yet supported");
323
324
325 // Acceptance regions are meant to be inclusive of (1-\alpha) of the probability
326 // so the returned values of the CDF should make this easy.
327 // in particular:
328 // if finding the critical value for a lower bound
329 // when p_i < p < p_j, one should return the value associated with i
330 // if i=0, then one should return -infinity
331 // if finding the critical value for an upper bound
332 // when p_i < p < p_j, one should return the value associated with j
333 // if i = size-1, then one should return +infinity
334 // use pvalue < 0.5 to indicate a lower bound is requested
335
336 // casting will round down, eg. give i
337 int nominal = (unsigned int) (pvalue*fSamplingDist.size());
338
339 if(nominal <= 0) {
341 return -1.*RooNumber::infinity();
342 }
343 else if(nominal >= (Int_t)fSamplingDist.size()-1 ) {
345 return RooNumber::infinity();
346 }
347 else if(pvalue < 0.5){
348 int delta = (int)(sigmaVariation*sqrt(1.0*nominal)); // note sqrt(small fraction)
349 int variation = nominal+delta;
350
351 if (variation >= (Int_t)fSamplingDist.size() - 1) {
353 } else if (variation <= 0) {
355 } else {
357 }
358
359 return fSamplingDist[nominal];
360 }
361 else if(pvalue >= 0.5){
362 int delta = (int)(sigmaVariation*sqrt(1.0*fSamplingDist.size()- nominal)); // note sqrt(small fraction)
363 int variation = nominal+delta;
364
365 if (variation >= (Int_t)fSamplingDist.size() - 1) {
367
368 } else if (variation <= 0) {
370 } else {
372 }
373
374 /*
375 std::cout << "dgb SamplingDistribution::InverseCDF. variation = " << variation
376 << " size = " << fSamplingDist.size()
377 << " value = " << inverseWithVariation << std::endl;
378 */
379
380 return fSamplingDist[nominal+1];
381 }
382 else{
383 std::cout << "problem in SamplingDistribution::InverseCDF" << std::endl;
384 }
386 return RooNumber::infinity();
387
388}
389
390
391////////////////////////////////////////////////////////////////////////////////
392/// returns the inverse of the cumulative distribution function
393
395{
396 if (fSumW.size() != fSamplingDist.size())
397 SortValues();
398
399 if (!TMath::AreEqualRel(fSumW.back(), fSumW2.back(), 1.E-6) )
400 Warning("InverseCDFInterpolate","Estimation of Quantiles (InverseCDF) for weighted events is not yet supported.");
401
402 // casting will round down, eg. give i
403 int nominal = (unsigned int) (pvalue*fSamplingDist.size());
404
405 if(nominal <= 0) {
406 return -1.*RooNumber::infinity();
407 }
408 if(nominal >= (Int_t)fSamplingDist.size()-1 ) {
409 return RooNumber::infinity();
410 }
411 double upperX = fSamplingDist[nominal+1];
412 double upperY = ((double) (nominal+1))/fSamplingDist.size();
413 double lowerX = fSamplingDist[nominal];
414 double lowerY = ((double) nominal)/fSamplingDist.size();
415
416 // std::cout << upperX << " " << upperY << " " << lowerX << " " << lowerY << std::endl;
417
419
420}
ROOT::Detail::TRangeCast< T, true > TRangeDynCast
TRangeDynCast is an adapter class that allows the typed iteration through a TCollection.
Option_t Option_t TPoint TPoint const char GetTextMagnitude GetFillStyle GetLineColor GetLineWidth GetMarkerStyle GetTextAlign GetTextColor GetTextSize void char Point_t Rectangle_t WindowAttributes_t index
char name[80]
Definition TGX11.cxx:110
Container class to hold unbinned data.
Definition RooDataSet.h:34
static constexpr double infinity()
Return internal infinity representation.
Definition RooNumber.h:25
This class simply holds a sampling distribution of some test statistic.
double Integral(double low, double high, bool normalize=true, bool lowClosed=true, bool highClosed=false) const
numerical integral in these limits
double CDF(double x) const
calculate CDF as a special case of Integral(...) with lower limit equal to -inf
~SamplingDistribution() override
Destructor of SamplingDistribution.
std::vector< double > fSumW2
! Cached vector with sum of the weight used to compute integral error
double InverseCDFInterpolate(double pvalue)
get the inverse of the Cumulative distribution function
std::vector< double > fSumW
! Cached vector with sum of the weight used to compute integral
SamplingDistribution()
Default constructor for SamplingDistribution.
std::vector< double > fSamplingDist
vector of points for the sampling distribution
double IntegralAndError(double &error, double low, double high, bool normalize=true, bool lowClosed=true, bool highClosed=false) const
numerical integral in these limits including error estimation
double InverseCDF(double pvalue)
get the inverse of the Cumulative distribution function
void Add(const SamplingDistribution *other)
merge two sampling distributions
void SortValues() const
internal function to sort values
std::vector< double > fSampleWeights
vector of weights for the samples
The TNamed class is the base class for all named ROOT classes.
Definition TNamed.h:29
virtual void SetTitle(const char *title="")
Set the title of the TNamed.
Definition TNamed.cxx:174
const char * GetName() const override
Returns name of object.
Definition TNamed.h:49
const char * GetTitle() const override
Returns title of object.
Definition TNamed.h:50
virtual void SetName(const char *name)
Set the name of the TNamed.
Definition TNamed.cxx:150
virtual void Warning(const char *method, const char *msgfmt,...) const
Issue warning message.
Definition TObject.cxx:1057
Basic string class.
Definition TString.h:139
Double_t x[n]
Definition legend1.C:17
const Int_t n
Definition legend1.C:16
Namespace for the RooStats classes.
Definition CodegenImpl.h:58
void SortItr(Iterator first, Iterator last, IndexIterator index, Bool_t down=kTRUE)
Sort the n1 elements of the Short_t array defined by its iterators.
Definition TMathBase.h:406
Bool_t AreEqualRel(Double_t af, Double_t bf, Double_t relPrec)
Comparing floating points.
Definition TMath.h:426
static uint64_t sum(uint64_t i)
Definition Factory.cxx:2345