Logo ROOT   6.16/01
Reference Guide
TBinomialEfficiencyFitter.cxx
Go to the documentation of this file.
1// @(#)root/hist:$Id$
2// Author: Frank Filthaut, Rene Brun 30/05/2007
3
4/*************************************************************************
5 * Copyright (C) 1995-2007, Rene Brun and Fons Rademakers. *
6 * All rights reserved. *
7 * *
8 * For the licensing terms see $ROOTSYS/LICENSE. *
9 * For the list of contributors see $ROOTSYS/README/CREDITS. *
10 *************************************************************************/
11
12/** \class TBinomialEfficiencyFitter
13 \ingroup Hist
14 \brief Binomial fitter for the division of two histograms.
15
16Use when you need to calculate a selection's efficiency from two histograms,
17one containing all entries, and one containing the subset of these entries
18that pass the selection, and when you have a parametrization available for
19the efficiency as a function of the variable(s) under consideration.
20
21A very common problem when estimating efficiencies is that of error estimation:
22when no other information is available than the total number of events N and
23the selected number n, the best estimate for the selection efficiency p is n/N.
24Standard binomial statistics dictates that the uncertainty (this presupposes
25sufficiently high statistics that an approximation by a normal distribution is
26reasonable) on p, given N, is
27\f[
28 \sqrt{\frac{p(1-p)}{N}}
29\f]
30However, when p is estimated as n/N, fluctuations from the true p to its
31estimate become important, especially for low numbers of events, and giving
32rise to biased results.
33
34When fitting a parametrized efficiency, these problems can largely be overcome,
35as a hypothesized true efficiency is available by construction. Even so, simply
36using the corresponding uncertainty still presupposes that Gaussian errors
37yields a reasonable approximation. When using, instead of binned efficiency
38histograms, the original numerator and denominator histograms, a binned maximum
39likelihood can be constructed as the product of bin-by-bin binomial probabilities
40to select n out of N events. Assuming that a correct parametrization of the
41efficiency is provided, this construction in general yields less biased results
42(and is much less sensitive to binning details).
43
44A generic use of this method is given below (note that the method works for 2D
45and 3D histograms as well):
46
47~~~ {.cpp}
48 {
49 TH1* denominator; // denominator histogram
50 TH1* numerator; // corresponding numerator histogram
51 TF1* eff; // efficiency parametrization
52 .... // set step sizes and initial parameter
53 .... // values for the fit function
54 .... // possibly also set ranges, see TF1::SetRange()
55 TBinomialEfficiencyFitter* f = new TBinomialEfficiencyFitter(
56 numerator, denominator);
57 Int_t status = f->Fit(eff, "I");
58 if (status == 0) {
59 // if the fit was successful, display bin-by-bin efficiencies
60 // as well as the result of the fit
61 numerator->Sumw2();
62 TH1* hEff = dynamic_cast<TH1*>(numerator->Clone("heff"));
63 hEff->Divide(hEff, denominator, 1.0, 1.0, "B");
64 hEff->Draw("E");
65 eff->Draw("same");
66 }
67 }
68~~~
69
70Note that this method cannot be expected to yield reliable results when using
71weighted histograms (because the likelihood computation will be incorrect).
72
73*/
74
76
77#include "TMath.h"
78#include "TPluginManager.h"
79#include "TROOT.h"
80#include "TH1.h"
81#include "TF1.h"
82#include "TF2.h"
83#include "TF3.h"
84#include "Fit/FitConfig.h"
85#include "Fit/Fitter.h"
86#include "TFitResult.h"
87#include "Math/Functor.h"
88
89#include <limits>
90
91
93
95
96
97////////////////////////////////////////////////////////////////////////////////
98/// default constructor
99
101 fNumerator = 0;
102 fDenominator = 0;
103 fFunction = 0;
106 fRange = kFALSE;
108 fFitter = 0;
109}
110
111////////////////////////////////////////////////////////////////////////////////
112/// Constructor.
113///
114/// Note that no objects are copied, so it is up to the user to ensure that the
115/// histogram pointers remain valid.
116///
117/// Both histograms need to be "consistent". This is not checked here, but in
118/// TBinomialEfficiencyFitter::Fit().
119
120TBinomialEfficiencyFitter::TBinomialEfficiencyFitter(const TH1 *numerator, const TH1 *denominator) {
122 fFunction = 0;
123 fFitter = 0;
124 Set(numerator,denominator);
125}
126
127////////////////////////////////////////////////////////////////////////////////
128/// destructor
129
131 if (fFitter) delete fFitter;
132 fFitter = 0;
133}
134
135////////////////////////////////////////////////////////////////////////////////
136/// Initialize with a new set of inputs.
137
138void TBinomialEfficiencyFitter::Set(const TH1 *numerator, const TH1 *denominator)
139{
140 fNumerator = (TH1*)numerator;
141 fDenominator = (TH1*)denominator;
142
145 fRange = kFALSE;
146}
147
148////////////////////////////////////////////////////////////////////////////////
149/// Set the required integration precision, see TF1::Integral()
150
152{
154}
155
156////////////////////////////////////////////////////////////////////////////////
157/// Provide access to the underlying fitter object.
158/// This may be useful e.g. for the retrieval of additional information (such
159/// as the output covariance matrix of the fit).
160
162{
163 if (!fFitter) fFitter = new ROOT::Fit::Fitter();
164 return fFitter;
165
166}
167
168////////////////////////////////////////////////////////////////////////////////
169/// Carry out the fit of the given function to the given histograms.
170///
171/// If option "I" is used, the fit function will be averaged over the
172/// bin (the default is to evaluate it simply at the bin center).
173///
174/// If option "R" is used, the fit range will be taken from the fit
175/// function (the default is to use the entire histogram).
176///
177/// If option "S" a TFitResult object is returned and it can be used to obtain
178/// additional fit information, like covariance or correlation matrix.
179///
180/// Note that all parameter values, limits, and step sizes are copied
181/// from the input fit function f1 (so they should be set before calling
182/// this method. This is particularly relevant for the step sizes, taken
183/// to be the "error" set on input, as a null step size usually fixes the
184/// corresponding parameter. That is protected against, but in such cases
185/// an arbitrary starting step size will be used, and the reliability of
186/// the fit should be questioned). If parameters are to be fixed, this
187/// should be done by specifying non-null parameter limits, with lower
188/// limits larger than upper limits.
189///
190/// On output, f1 contains the fitted parameters and errors, as well as
191/// the number of degrees of freedom, and the goodness-of-fit estimator
192/// as given by S. Baker and R. Cousins, Nucl. Instr. Meth. A221 (1984) 437.
193
195{
196 TString opt = option;
197 opt.ToUpper();
198 fAverage = opt.Contains("I");
199 fRange = opt.Contains("R");
200 Bool_t verbose = opt.Contains("V");
201 Bool_t quiet = opt.Contains("Q");
202 Bool_t saveResult = opt.Contains("S");
203 if (!f1) return -1;
204 fFunction = (TF1*)f1;
205 Int_t i, npar;
206 npar = f1->GetNpar();
207 if (npar <= 0) {
208 Error("Fit", "function %s has illegal number of parameters = %d",
209 f1->GetName(), npar);
210 return -3;
211 }
212
213 // Check that function has same dimension as histogram
214 if (!fNumerator || !fDenominator) {
215 Error("Fit","No numerator or denominator histograms set");
216 return -5;
217 }
218 if (f1->GetNdim() != fNumerator->GetDimension()) {
219 Error("Fit","function %s dimension, %d, does not match histogram dimension, %d",
221 return -4;
222 }
223 // Check that the numbers of bins for the histograms match
225 (f1->GetNdim() > 1 && fNumerator->GetNbinsY() != fDenominator->GetNbinsY()) ||
226 (f1->GetNdim() > 2 && fNumerator->GetNbinsZ() != fDenominator->GetNbinsZ())) {
227 Error("Fit", "numerator and denominator histograms do not have identical numbers of bins");
228 return -6;
229 }
230
231 // initialize the fitter
232
233 if (!fFitter) {
235 }
236
237
238 std::vector<ROOT::Fit::ParameterSettings> & parameters = fFitter->Config().ParamsSettings();
239 parameters.reserve(npar);
240 for (i = 0; i < npar; i++) {
241
242 // assign an ARBITRARY starting error to ensure the parameter won't be fixed!
243 Double_t we = f1->GetParError(i);
244 if (we <= 0) we = 0.3*TMath::Abs(f1->GetParameter(i));
245 if (we == 0) we = 0.01;
246
247 parameters.push_back(ROOT::Fit::ParameterSettings(f1->GetParName(i), f1->GetParameter(i), we) );
248
249 Double_t plow, pup;
250 f1->GetParLimits(i,plow,pup);
251 if (plow*pup != 0 && plow >= pup) { // this is a limitation - cannot fix a parameter to zero value
252 parameters.back().Fix();
253 }
254 else if (plow < pup ) {
255 parameters.back().SetLimits(plow,pup);
256 }
257 }
258
259 // fcn must be set after setting the parameters
261 fFitter->SetFCN(static_cast<ROOT::Math::IMultiGenFunction&>(fcnFunction));
262
263
264 // in case default value of 1.0 is used
265 if (fFitter->Config().MinimizerOptions().ErrorDef() == 1.0 ) {
267 }
268
269 if (verbose) {
271 }
272 else if (quiet) {
274 }
275
276
277
278 // perform the actual fit
279
280 fFitDone = kTRUE;
281 Bool_t status = fFitter->FitFCN();
282 if ( !status && !quiet)
283 Warning("Fit","Abnormal termination of minimization.");
284
285
286 //Store fit results in fitFunction
287 const ROOT::Fit::FitResult & fitResult = fFitter->Result();
288 if (!fitResult.IsEmpty() ) {
289 // set in f1 the result of the fit
290 f1->SetNDF(fitResult.Ndf() );
291
292 //f1->SetNumberFitPoints(...); // this is set in ComputeFCN
293
294 f1->SetParameters( &(fitResult.Parameters().front()) );
295 if ( int( fitResult.Errors().size()) >= f1->GetNpar() )
296 f1->SetParErrors( &(fitResult.Errors().front()) );
297
298 f1->SetChisquare(2.*fitResult.MinFcnValue()); // store goodness of fit (Baker&Cousins)
300 Info("result"," chi2 %f ndf %d ",2.*fitResult.MinFcnValue(), fitResult.Ndf() );
301
302 }
303 // create a new result class if needed
304 if (saveResult) {
305 TFitResult* fr = new TFitResult(fitResult);
306 TString name = TString::Format("TBinomialEfficiencyFitter_result_of_%s",f1->GetName() );
307 fr->SetName(name); fr->SetTitle(name);
308 return TFitResultPtr(fr);
309 }
310 else {
311 return TFitResultPtr(fitResult.Status() );
312 }
313
314}
315
316////////////////////////////////////////////////////////////////////////////////
317/// Compute the likelihood.
318
320{
321 int nDim = fDenominator->GetDimension();
322
323 int xlowbin = fDenominator->GetXaxis()->GetFirst();
324 int xhighbin = fDenominator->GetXaxis()->GetLast();
325 int ylowbin = 0, yhighbin = 0, zlowbin = 0, zhighbin = 0;
326 if (nDim > 1) {
327 ylowbin = fDenominator->GetYaxis()->GetFirst();
328 yhighbin = fDenominator->GetYaxis()->GetLast();
329 if (nDim > 2) {
330 zlowbin = fDenominator->GetZaxis()->GetFirst();
331 zhighbin = fDenominator->GetZaxis()->GetLast();
332 }
333 }
334
336
337 if (fRange) {
338 double xmin, xmax, ymin, ymax, zmin, zmax;
339
340 // This way to ensure that a minimum range chosen exactly at a
341 // bin boundary is far from elegant, but is hopefully adequate.
342
343 if (nDim == 1) {
345 xlowbin = fDenominator->GetXaxis()->FindBin(xmin);
346 xhighbin = fDenominator->GetXaxis()->FindBin(xmax);
347 } else if (nDim == 2) {
349 xlowbin = fDenominator->GetXaxis()->FindBin(xmin);
350 xhighbin = fDenominator->GetXaxis()->FindBin(xmax);
351 ylowbin = fDenominator->GetYaxis()->FindBin(ymin);
352 yhighbin = fDenominator->GetYaxis()->FindBin(ymax);
353 } else if (nDim == 3) {
354 fFunction->GetRange(xmin, ymin, zmin, xmax, ymax, zmax);
355 xlowbin = fDenominator->GetXaxis()->FindBin(xmin);
356 xhighbin = fDenominator->GetXaxis()->FindBin(xmax);
357 ylowbin = fDenominator->GetYaxis()->FindBin(ymin);
358 yhighbin = fDenominator->GetYaxis()->FindBin(ymax);
359 zlowbin = fDenominator->GetZaxis()->FindBin(zmin);
360 zhighbin = fDenominator->GetZaxis()->FindBin(zmax);
361 }
362 }
363
364 // The coding below is perhaps somewhat awkward -- but it is done
365 // so that 1D, 2D, and 3D cases can be covered in the same loops.
366
367 f = 0.;
368
369 Int_t npoints = 0;
370 Double_t nmax = 0;
371 for (int xbin = xlowbin; xbin <= xhighbin; ++xbin) {
372
373 // compute the bin edges
376
377 for (int ybin = ylowbin; ybin <= yhighbin; ++ybin) {
378
379 // compute the bin edges (if applicable)
380 Double_t ylow = (nDim > 1) ? fDenominator->GetYaxis()->GetBinLowEdge(ybin) : 0;
381 Double_t yup = (nDim > 1) ? fDenominator->GetYaxis()->GetBinLowEdge(ybin+1) : 0;
382
383 for (int zbin = zlowbin; zbin <= zhighbin; ++zbin) {
384
385 // compute the bin edges (if applicable)
386 Double_t zlow = (nDim > 2) ? fDenominator->GetZaxis()->GetBinLowEdge(zbin) : 0;
387 Double_t zup = (nDim > 2) ? fDenominator->GetZaxis()->GetBinLowEdge(zbin+1) : 0;
388
389 int bin = fDenominator->GetBin(xbin, ybin, zbin);
391 Double_t nNum = fNumerator->GetBinContent(bin);
392
393 // count maximum value to use in the likelihood for inf
394 // i.e. a number much larger than the other terms
395 if (nDen> nmax) nmax = nDen;
396 if (nDen <= 0.) continue;
397 npoints++;
398
399 // mu is the average of the function over the bin OR
400 // the function evaluated at the bin centre
401 // As yet, there is nothing to prevent mu from being
402 // outside the range <0,1> !!
403
404 Double_t mu = 0;
405 switch (nDim) {
406 case 1:
407 mu = (fAverage) ?
408 fFunction->Integral(xlow, xup, fEpsilon)
409 / (xup-xlow) :
411 break;
412 case 2:
413 {
414 mu = (fAverage) ?
415 ((TF2*)fFunction)->Integral(xlow, xup, ylow, yup, fEpsilon)
416 / ((xup-xlow)*(yup-ylow)) :
419 }
420 break;
421 case 3:
422 {
423 mu = (fAverage) ?
424 ((TF3*)fFunction)->Integral(xlow, xup, ylow, yup, zlow, zup, fEpsilon)
425 / ((xup-xlow)*(yup-ylow)*(zup-zlow)) :
429 }
430 }
431
432 // binomial formula (forgetting about the factorials)
433 if (nNum != 0.) {
434 if (mu > 0.)
435 f -= nNum * TMath::Log(mu*nDen/nNum);
436 else
437 f -= nmax * -1E30; // crossing our fingers
438 }
439 if (nDen - nNum != 0.) {
440 if (1. - mu > 0.)
441 f -= (nDen - nNum) * TMath::Log((1. - mu)*nDen/(nDen-nNum));
442 else
443 f -= nmax * -1E30; // crossing our fingers
444 }
445 }
446 }
447 }
448
450}
451
#define f(i)
Definition: RSha256.hxx:104
int Int_t
Definition: RtypesCore.h:41
const Bool_t kFALSE
Definition: RtypesCore.h:88
bool Bool_t
Definition: RtypesCore.h:59
double Double_t
Definition: RtypesCore.h:55
const Bool_t kTRUE
Definition: RtypesCore.h:87
const char Option_t
Definition: RtypesCore.h:62
#define ClassImp(name)
Definition: Rtypes.h:363
const Double_t kDefaultEpsilon
float xmin
Definition: THbookFile.cxx:93
float ymin
Definition: THbookFile.cxx:93
float xmax
Definition: THbookFile.cxx:93
float ymax
Definition: THbookFile.cxx:93
const std::vector< ROOT::Fit::ParameterSettings > & ParamsSettings() const
get the vector of parameter settings (const method)
Definition: FitConfig.h:85
ROOT::Math::MinimizerOptions & MinimizerOptions()
access to the minimizer control parameter (non const method)
Definition: FitConfig.h:166
class containg the result of the fit and all the related information (fitted parameter values,...
Definition: FitResult.h:48
bool IsEmpty() const
True if a fit result does not exist (even invalid) with parameter values.
Definition: FitResult.h:118
const std::vector< double > & Errors() const
parameter errors (return st::vector)
Definition: FitResult.h:170
const std::vector< double > & Parameters() const
parameter values (return std::vector)
Definition: FitResult.h:175
unsigned int Ndf() const
Number of degree of freedom.
Definition: FitResult.h:164
double MinFcnValue() const
Return value of the objective function (chi2 or likelihood) used in the fit.
Definition: FitResult.h:121
unsigned int NFreeParameters() const
get total number of free parameters
Definition: FitResult.h:135
int Status() const
minimizer status code
Definition: FitResult.h:138
Fitter class, entry point for performing all type of fits.
Definition: Fitter.h:77
bool FitFCN(unsigned int npar, Function &fcn, const double *params=0, unsigned int dataSize=0, bool chi2fit=false)
Fit using the a generic FCN function as a C++ callable object implementing double () (const double *)...
Definition: Fitter.h:590
bool SetFCN(unsigned int npar, Function &fcn, const double *params=0, unsigned int dataSize=0, bool chi2fit=false)
Set a generic FCN function as a C++ callable object implementing double () (const double *) Note that...
Definition: Fitter.h:595
const FitResult & Result() const
get fit result
Definition: Fitter.h:365
const FitConfig & Config() const
access to the fit configuration (const method)
Definition: Fitter.h:393
Class, describing value, limits and step size of the parameters Provides functionality also to set/re...
Documentation for class Functor class.
Definition: Functor.h:392
Documentation for the abstract class IBaseFunctionMultiDim.
Definition: IFunction.h:62
double ErrorDef() const
error definition
void SetErrorDef(double err)
set error def
void SetPrintLevel(int level)
set print level
virtual Double_t GetBinCenter(Int_t bin) const
Return center of bin.
Definition: TAxis.cxx:464
virtual Int_t FindBin(Double_t x)
Find bin number corresponding to abscissa x.
Definition: TAxis.cxx:279
virtual Double_t GetBinLowEdge(Int_t bin) const
Return low edge of bin.
Definition: TAxis.cxx:504
Int_t GetLast() const
Return last bin on the axis i.e.
Definition: TAxis.cxx:455
Int_t GetFirst() const
Return first bin on the axis i.e.
Definition: TAxis.cxx:444
Binomial fitter for the division of two histograms.
void Set(const TH1 *numerator, const TH1 *denominator)
Initialize with a new set of inputs.
virtual ~TBinomialEfficiencyFitter()
destructor
TBinomialEfficiencyFitter()
default constructor
ROOT::Fit::Fitter * GetFitter()
Provide access to the underlying fitter object.
Double_t EvaluateFCN(const Double_t *par)
void SetPrecision(Double_t epsilon)
Set the required integration precision, see TF1::Integral()
void ComputeFCN(Double_t &f, const Double_t *par)
Compute the likelihood.
TFitResultPtr Fit(TF1 *f1, Option_t *option="")
Carry out the fit of the given function to the given histograms.
1-Dim function class
Definition: TF1.h:211
virtual void GetParLimits(Int_t ipar, Double_t &parmin, Double_t &parmax) const
Return limits for parameter ipar.
Definition: TF1.cxx:1914
virtual void SetNDF(Int_t ndf)
Set the number of degrees of freedom ndf should be the number of points used in a fit - the number of...
Definition: TF1.cxx:3412
virtual Double_t GetParError(Int_t ipar) const
Return value of parameter number ipar.
Definition: TF1.cxx:1904
virtual void SetChisquare(Double_t chi2)
Definition: TF1.h:596
virtual Int_t GetNpar() const
Definition: TF1.h:465
virtual void SetParErrors(const Double_t *errors)
Set errors for all active parameters when calling this function, the array errors must have at least ...
Definition: TF1.cxx:3477
virtual Double_t Integral(Double_t a, Double_t b, Double_t epsrel=1.e-12)
IntegralOneDim or analytical integral.
Definition: TF1.cxx:2496
virtual Int_t GetNumberFitPoints() const
Definition: TF1.h:487
virtual void SetNumberFitPoints(Int_t npfits)
Definition: TF1.h:608
virtual void GetRange(Double_t *xmin, Double_t *xmax) const
Return range of a generic N-D function.
Definition: TF1.cxx:2257
virtual const char * GetParName(Int_t ipar) const
Definition: TF1.h:513
virtual void SetParameters(const Double_t *params)
Definition: TF1.h:628
virtual Double_t Eval(Double_t x, Double_t y=0, Double_t z=0, Double_t t=0) const
Evaluate this function.
Definition: TF1.cxx:1424
virtual Int_t GetNdim() const
Definition: TF1.h:469
virtual Double_t GetParameter(Int_t ipar) const
Definition: TF1.h:496
A 2-Dim function with parameters.
Definition: TF2.h:29
A 3-Dim function with parameters.
Definition: TF3.h:28
Provides an indirection to the TFitResult class and with a semantics identical to a TFitResult pointe...
Definition: TFitResultPtr.h:31
Extends the ROOT::Fit::Result class with a TNamed inheritance providing easy possibility for I/O.
Definition: TFitResult.h:30
The TH1 histogram class.
Definition: TH1.h:56
virtual Double_t GetBinCenter(Int_t bin) const
Return bin center for 1D histogram.
Definition: TH1.cxx:8462
TAxis * GetZaxis()
Definition: TH1.h:318
virtual Int_t GetNbinsY() const
Definition: TH1.h:293
virtual Int_t GetNbinsZ() const
Definition: TH1.h:294
virtual Int_t GetDimension() const
Definition: TH1.h:278
TAxis * GetXaxis()
Get the behaviour adopted by the object about the statoverflows. See EStatOverflows for more informat...
Definition: TH1.h:316
virtual Int_t GetBin(Int_t binx, Int_t biny=0, Int_t binz=0) const
Return Global bin number corresponding to binx,y,z.
Definition: TH1.cxx:4692
virtual Int_t GetNbinsX() const
Definition: TH1.h:292
TAxis * GetYaxis()
Definition: TH1.h:317
virtual Double_t GetBinContent(Int_t bin) const
Return content of bin number bin.
Definition: TH1.cxx:4790
virtual void SetTitle(const char *title="")
Set the title of the TNamed.
Definition: TNamed.cxx:164
virtual void SetName(const char *name)
Set the name of the TNamed.
Definition: TNamed.cxx:140
virtual const char * GetName() const
Returns name of object.
Definition: TNamed.h:47
virtual void Warning(const char *method, const char *msgfmt,...) const
Issue warning message.
Definition: TObject.cxx:866
virtual void Error(const char *method, const char *msgfmt,...) const
Issue error message.
Definition: TObject.cxx:880
virtual void Info(const char *method, const char *msgfmt,...) const
Issue info message.
Definition: TObject.cxx:854
Basic string class.
Definition: TString.h:131
void ToUpper()
Change string to upper case.
Definition: TString.cxx:1113
static TString Format(const char *fmt,...)
Static method which formats a string using a printf style format descriptor and return a TString.
Definition: TString.cxx:2286
Bool_t Contains(const char *pat, ECaseCompare cmp=kExact) const
Definition: TString.h:619
TF1 * f1
Definition: legend1.C:11
constexpr Double_t E()
Base of natural log:
Definition: TMath.h:97
Double_t Log(Double_t x)
Definition: TMath.h:748
Short_t Abs(Short_t d)
Definition: TMathBase.h:120
REAL epsilon
Definition: triangle.c:617