Logo ROOT  
Reference Guide
 
Loading...
Searching...
No Matches
TBinomialEfficiencyFitter.cxx
Go to the documentation of this file.
1// @(#)root/hist:$Id$
2// Author: Frank Filthaut, Rene Brun 30/05/2007
3
4/*************************************************************************
5 * Copyright (C) 1995-2007, Rene Brun and Fons Rademakers. *
6 * All rights reserved. *
7 * *
8 * For the licensing terms see $ROOTSYS/LICENSE. *
9 * For the list of contributors see $ROOTSYS/README/CREDITS. *
10 *************************************************************************/
11
12/** \class TBinomialEfficiencyFitter
13 \ingroup Hist
14 \brief Binomial fitter for the division of two histograms.
15
16Use when you need to calculate a selection's efficiency from two histograms,
17one containing all entries, and one containing the subset of these entries
18that pass the selection, and when you have a parametrization available for
19the efficiency as a function of the variable(s) under consideration.
20
21A very common problem when estimating efficiencies is that of error estimation:
22when no other information is available than the total number of events N and
23the selected number n, the best estimate for the selection efficiency p is n/N.
24Standard binomial statistics dictates that the uncertainty (this presupposes
25sufficiently high statistics that an approximation by a normal distribution is
26reasonable) on p, given N, is
27\f[
28 \sqrt{\frac{p(1-p)}{N}}
29\f]
30However, when p is estimated as n/N, fluctuations from the true p to its
31estimate become important, especially for low numbers of events, and giving
32rise to biased results.
33
34When fitting a parametrized efficiency, these problems can largely be overcome,
35as a hypothesized true efficiency is available by construction. Even so, simply
36using the corresponding uncertainty still presupposes that Gaussian errors
37yields a reasonable approximation. When using, instead of binned efficiency
38histograms, the original numerator and denominator histograms, a binned maximum
39likelihood can be constructed as the product of bin-by-bin binomial probabilities
40to select n out of N events. Assuming that a correct parametrization of the
41efficiency is provided, this construction in general yields less biased results
42(and is much less sensitive to binning details).
43
44A generic use of this method is given below (note that the method works for 2D
45and 3D histograms as well):
46
47~~~ {.cpp}
48 {
49 TH1* denominator; // denominator histogram
50 TH1* numerator; // corresponding numerator histogram
51 TF1* eff; // efficiency parametrization
52 .... // set step sizes and initial parameter
53 .... // values for the fit function
54 .... // possibly also set ranges, see TF1::SetRange()
55 TBinomialEfficiencyFitter* f = new TBinomialEfficiencyFitter(
56 numerator, denominator);
57 Int_t status = f->Fit(eff, "I");
58 if (status == 0) {
59 // if the fit was successful, display bin-by-bin efficiencies
60 // as well as the result of the fit
61 numerator->Sumw2();
62 TH1* hEff = dynamic_cast<TH1*>(numerator->Clone("heff"));
63 hEff->Divide(hEff, denominator, 1.0, 1.0, "B");
64 hEff->Draw("E");
65 eff->Draw("same");
66 }
67 }
68~~~
69
70Note that this method cannot be expected to yield reliable results when using
71weighted histograms (because the likelihood computation will be incorrect).
72
73*/
74
76
77#include "TMath.h"
78#include "TPluginManager.h"
79#include "TROOT.h"
80#include "TH1.h"
81#include "TF1.h"
82#include "TF2.h"
83#include "TF3.h"
84#include "Fit/FitConfig.h"
85#include "Fit/Fitter.h"
86#include "TFitResult.h"
87#include "Math/Functor.h"
89
90#include <limits>
91
92
94
96
97
98////////////////////////////////////////////////////////////////////////////////
99/// default constructor
100
102 fNumerator = 0;
103 fDenominator = 0;
104 fFunction = 0;
107 fRange = kFALSE;
109 fFitter = 0;
110}
111
112////////////////////////////////////////////////////////////////////////////////
113/// Constructor.
114///
115/// Note that no objects are copied, so it is up to the user to ensure that the
116/// histogram pointers remain valid.
117///
118/// Both histograms need to be "consistent". This is not checked here, but in
119/// TBinomialEfficiencyFitter::Fit().
120
121TBinomialEfficiencyFitter::TBinomialEfficiencyFitter(const TH1 *numerator, const TH1 *denominator) {
123 fFunction = 0;
124 fFitter = 0;
125 Set(numerator,denominator);
126}
127
128////////////////////////////////////////////////////////////////////////////////
129/// destructor
130
132 if (fFitter) delete fFitter;
133 fFitter = 0;
134}
135
136////////////////////////////////////////////////////////////////////////////////
137/// Initialize with a new set of inputs.
138
139void TBinomialEfficiencyFitter::Set(const TH1 *numerator, const TH1 *denominator)
140{
141 fNumerator = (TH1*)numerator;
142 fDenominator = (TH1*)denominator;
143
146 fRange = kFALSE;
147}
148
149////////////////////////////////////////////////////////////////////////////////
150/// Set the required integration precision, see TF1::Integral()
151
153{
155}
156
157////////////////////////////////////////////////////////////////////////////////
158/// Provide access to the underlying fitter object.
159/// This may be useful e.g. for the retrieval of additional information (such
160/// as the output covariance matrix of the fit).
161
163{
164 if (!fFitter) fFitter = new ROOT::Fit::Fitter();
165 return fFitter;
166
167}
168
169////////////////////////////////////////////////////////////////////////////////
170/// Carry out the fit of the given function to the given histograms.
171///
172/// If option "I" is used, the fit function will be averaged over the
173/// bin (the default is to evaluate it simply at the bin center).
174///
175/// If option "R" is used, the fit range will be taken from the fit
176/// function (the default is to use the entire histogram).
177///
178/// If option "S" a TFitResult object is returned and it can be used to obtain
179/// additional fit information, like covariance or correlation matrix.
180///
181/// Note that all parameter values, limits, and step sizes are copied
182/// from the input fit function f1 (so they should be set before calling
183/// this method. This is particularly relevant for the step sizes, taken
184/// to be the "error" set on input, as a null step size usually fixes the
185/// corresponding parameter. That is protected against, but in such cases
186/// an arbitrary starting step size will be used, and the reliability of
187/// the fit should be questioned). If parameters are to be fixed, this
188/// should be done by specifying non-null parameter limits, with lower
189/// limits larger than upper limits.
190///
191/// On output, f1 contains the fitted parameters and errors, as well as
192/// the number of degrees of freedom, and the goodness-of-fit estimator
193/// as given by S. Baker and R. Cousins, Nucl. Instr. Meth. A221 (1984) 437.
194
196{
197 TString opt = option;
198 opt.ToUpper();
199 fAverage = opt.Contains("I");
200 fRange = opt.Contains("R");
201 Bool_t verbose = opt.Contains("V");
202 Bool_t quiet = opt.Contains("Q");
203 Bool_t saveResult = opt.Contains("S");
204 if (!f1) return -1;
205 fFunction = (TF1*)f1;
206 Int_t i, npar;
207 npar = f1->GetNpar();
208 if (npar <= 0) {
209 Error("Fit", "function %s has illegal number of parameters = %d",
210 f1->GetName(), npar);
211 return -3;
212 }
213
214 // Check that function has same dimension as histogram
215 if (!fNumerator || !fDenominator) {
216 Error("Fit","No numerator or denominator histograms set");
217 return -5;
218 }
219 if (f1->GetNdim() != fNumerator->GetDimension()) {
220 Error("Fit","function %s dimension, %d, does not match histogram dimension, %d",
222 return -4;
223 }
224 // Check that the numbers of bins for the histograms match
226 (f1->GetNdim() > 1 && fNumerator->GetNbinsY() != fDenominator->GetNbinsY()) ||
227 (f1->GetNdim() > 2 && fNumerator->GetNbinsZ() != fDenominator->GetNbinsZ())) {
228 Error("Fit", "numerator and denominator histograms do not have identical numbers of bins");
229 return -6;
230 }
231
232 // initialize the fitter
233
234 if (!fFitter) {
236 }
237
238
239 std::vector<ROOT::Fit::ParameterSettings> & parameters = fFitter->Config().ParamsSettings();
240 parameters.reserve(npar);
241 for (i = 0; i < npar; i++) {
242
243 // assign an ARBITRARY starting error to ensure the parameter won't be fixed!
244 Double_t we = f1->GetParError(i);
245 if (we <= 0) we = 0.3*TMath::Abs(f1->GetParameter(i));
246 if (we == 0) we = 0.01;
247
248 parameters.push_back(ROOT::Fit::ParameterSettings(f1->GetParName(i), f1->GetParameter(i), we) );
249
250 Double_t plow, pup;
251 f1->GetParLimits(i,plow,pup);
252 // when a parameter is fixed is having plow and pup equal to the value (if this is not zero)
253 // we handle special case when fixed parameter has zero value (in that case plow=1 and pup =1 )
254 if (plow >= pup && (plow==f1->GetParameter(i) || pup==f1->GetParameter(i) ||
255 ( f1->GetParameter(i) == 0 && plow==1. && pup == 1.) ) ) {
256 parameters.back().Fix();
257 Info("Fit", "Fixing parameter %s to value %f", f1->GetParName(i), f1->GetParameter(i));
258 } else if (plow < pup) {
259 parameters.back().SetLimits(plow,pup);
260 Info("Fit", "Setting limits for parameter %s to [%f,%f]", f1->GetParName(i), plow,pup);
261 }
262 }
263
264 // fcn must be set after setting the parameters
266
267 // set also model function in fitter to have it in FitResult
268 // in this way one can compute for example the confidence intervals
270
271 // in case default value of 1.0 is used
272 if (fFitter->Config().MinimizerOptions().ErrorDef() == 1.0 ) {
274 }
275
276 if (verbose) {
278 }
279 else if (quiet) {
281 }
282
283 // perform the actual fit
284
285 fFitDone = kTRUE;
286 Bool_t status = fFitter->FitFCN();
287 if ( !status && !quiet)
288 Warning("Fit","Abnormal termination of minimization.");
289
290
291 //Store fit results in fitFunction
292 const ROOT::Fit::FitResult & fitResult = fFitter->Result();
293 if (!fitResult.IsEmpty() ) {
294 // set in f1 the result of the fit
295 f1->SetNDF(fitResult.Ndf() );
296
297 //f1->SetNumberFitPoints(...); // this is set in ComputeFCN
298
299 f1->SetParameters( &(fitResult.Parameters().front()) );
300 if ( int( fitResult.Errors().size()) >= f1->GetNpar() )
301 f1->SetParErrors( &(fitResult.Errors().front()) );
302
303 f1->SetChisquare(2.*fitResult.MinFcnValue()); // store goodness of fit (Baker&Cousins)
305 Info("result"," chi2 %f ndf %d ",2.*fitResult.MinFcnValue(), fitResult.Ndf() );
306
307 }
308 // create a new result class if needed
309 if (saveResult) {
310 TFitResult* fr = new TFitResult(fitResult);
311 TString name = TString::Format("TBinomialEfficiencyFitter_result_of_%s",f1->GetName() );
312 fr->SetName(name); fr->SetTitle(name);
313 return TFitResultPtr(fr);
314 }
315 else {
316 return TFitResultPtr(fitResult.Status() );
317 }
318
319}
320
321////////////////////////////////////////////////////////////////////////////////
322/// Compute the likelihood.
323
325{
326 int nDim = fDenominator->GetDimension();
327
328 int xlowbin = fDenominator->GetXaxis()->GetFirst();
329 int xhighbin = fDenominator->GetXaxis()->GetLast();
330 int ylowbin = 0, yhighbin = 0, zlowbin = 0, zhighbin = 0;
331 if (nDim > 1) {
332 ylowbin = fDenominator->GetYaxis()->GetFirst();
333 yhighbin = fDenominator->GetYaxis()->GetLast();
334 if (nDim > 2) {
335 zlowbin = fDenominator->GetZaxis()->GetFirst();
336 zhighbin = fDenominator->GetZaxis()->GetLast();
337 }
338 }
339
341
342 if (fRange) {
343 double xmin, xmax, ymin, ymax, zmin, zmax;
344
345 // This way to ensure that a minimum range chosen exactly at a
346 // bin boundary is far from elegant, but is hopefully adequate.
347
348 if (nDim == 1) {
350 xlowbin = fDenominator->GetXaxis()->FindBin(xmin);
351 xhighbin = fDenominator->GetXaxis()->FindBin(xmax);
352 } else if (nDim == 2) {
354 xlowbin = fDenominator->GetXaxis()->FindBin(xmin);
355 xhighbin = fDenominator->GetXaxis()->FindBin(xmax);
356 ylowbin = fDenominator->GetYaxis()->FindBin(ymin);
357 yhighbin = fDenominator->GetYaxis()->FindBin(ymax);
358 } else if (nDim == 3) {
359 fFunction->GetRange(xmin, ymin, zmin, xmax, ymax, zmax);
360 xlowbin = fDenominator->GetXaxis()->FindBin(xmin);
361 xhighbin = fDenominator->GetXaxis()->FindBin(xmax);
362 ylowbin = fDenominator->GetYaxis()->FindBin(ymin);
363 yhighbin = fDenominator->GetYaxis()->FindBin(ymax);
364 zlowbin = fDenominator->GetZaxis()->FindBin(zmin);
365 zhighbin = fDenominator->GetZaxis()->FindBin(zmax);
366 }
367 }
368
369 // The coding below is perhaps somewhat awkward -- but it is done
370 // so that 1D, 2D, and 3D cases can be covered in the same loops.
371
372 f = 0.;
373
374 Int_t npoints = 0;
375 Double_t nmax = 0;
376 for (int xbin = xlowbin; xbin <= xhighbin; ++xbin) {
377
378 // compute the bin edges
381
382 for (int ybin = ylowbin; ybin <= yhighbin; ++ybin) {
383
384 // compute the bin edges (if applicable)
385 Double_t ylow = (nDim > 1) ? fDenominator->GetYaxis()->GetBinLowEdge(ybin) : 0;
386 Double_t yup = (nDim > 1) ? fDenominator->GetYaxis()->GetBinLowEdge(ybin+1) : 0;
387
388 for (int zbin = zlowbin; zbin <= zhighbin; ++zbin) {
389
390 // compute the bin edges (if applicable)
391 Double_t zlow = (nDim > 2) ? fDenominator->GetZaxis()->GetBinLowEdge(zbin) : 0;
392 Double_t zup = (nDim > 2) ? fDenominator->GetZaxis()->GetBinLowEdge(zbin+1) : 0;
393
394 int bin = fDenominator->GetBin(xbin, ybin, zbin);
396 Double_t nNum = fNumerator->GetBinContent(bin);
397
398 // count maximum value to use in the likelihood for inf
399 // i.e. a number much larger than the other terms
400 if (nDen> nmax) nmax = nDen;
401 if (nDen <= 0.) continue;
402 npoints++;
403
404 // mu is the average of the function over the bin OR
405 // the function evaluated at the bin centre
406 // As yet, there is nothing to prevent mu from being
407 // outside the range <0,1> !!
408
409 Double_t mu = 0;
410 switch (nDim) {
411 case 1:
412 mu = (fAverage) ?
413 fFunction->Integral(xlow, xup, fEpsilon)
414 / (xup-xlow) :
416 break;
417 case 2:
418 {
419 mu = (fAverage) ?
420 ((TF2*)fFunction)->Integral(xlow, xup, ylow, yup, fEpsilon)
421 / ((xup-xlow)*(yup-ylow)) :
424 }
425 break;
426 case 3:
427 {
428 mu = (fAverage) ?
429 ((TF3*)fFunction)->Integral(xlow, xup, ylow, yup, zlow, zup, fEpsilon)
430 / ((xup-xlow)*(yup-ylow)*(zup-zlow)) :
434 }
435 }
436
437 // binomial formula (forgetting about the factorials)
438 if (nNum != 0.) {
439 if (mu > 0.)
440 f -= nNum * TMath::Log(mu*nDen/nNum);
441 else
442 f -= nmax * -1E30; // crossing our fingers
443 }
444 if (nDen - nNum != 0.) {
445 if (1. - mu > 0.)
446 f -= (nDen - nNum) * TMath::Log((1. - mu)*nDen/(nDen-nNum));
447 else
448 f -= nmax * -1E30; // crossing our fingers
449 }
450 }
451 }
452 }
453
455}
#define f(i)
Definition RSha256.hxx:104
const Bool_t kFALSE
Definition RtypesCore.h:101
double Double_t
Definition RtypesCore.h:59
const Bool_t kTRUE
Definition RtypesCore.h:100
const char Option_t
Definition RtypesCore.h:66
#define ClassImp(name)
Definition Rtypes.h:364
const Double_t kDefaultEpsilon
char name[80]
Definition TGX11.cxx:110
float xmin
float ymin
float xmax
float ymax
const std::vector< ROOT::Fit::ParameterSettings > & ParamsSettings() const
get the vector of parameter settings (const method)
Definition FitConfig.h:86
ROOT::Math::MinimizerOptions & MinimizerOptions()
access to the minimizer control parameter (non const method)
Definition FitConfig.h:167
class containg the result of the fit and all the related information (fitted parameter values,...
Definition FitResult.h:47
bool IsEmpty() const
True if a fit result does not exist (even invalid) with parameter values.
Definition FitResult.h:117
const std::vector< double > & Errors() const
parameter errors (return st::vector)
Definition FitResult.h:169
const std::vector< double > & Parameters() const
parameter values (return std::vector)
Definition FitResult.h:174
unsigned int Ndf() const
Number of degree of freedom.
Definition FitResult.h:163
double MinFcnValue() const
Return value of the objective function (chi2 or likelihood) used in the fit.
Definition FitResult.h:120
unsigned int NFreeParameters() const
get total number of free parameters
Definition FitResult.h:134
int Status() const
minimizer status code
Definition FitResult.h:137
Fitter class, entry point for performing all type of fits.
Definition Fitter.h:77
bool FitFCN(unsigned int npar, Function &fcn, const double *params=0, unsigned int dataSize=0, bool chi2fit=false)
Fit using the a generic FCN function as a C++ callable object implementing double () (const double *)...
Definition Fitter.h:610
bool SetFCN(unsigned int npar, Function &fcn, const double *params=0, unsigned int dataSize=0, bool chi2fit=false)
Set a generic FCN function as a C++ callable object implementing double () (const double *) Note that...
Definition Fitter.h:615
const FitResult & Result() const
get fit result
Definition Fitter.h:384
const FitConfig & Config() const
access to the fit configuration (const method)
Definition Fitter.h:412
Class, describing value, limits and step size of the parameters Provides functionality also to set/re...
Documentation for class Functor class.
Definition Functor.h:401
Documentation for the abstract class IBaseFunctionMultiDim.
Definition IFunction.h:62
double ErrorDef() const
error definition
void SetErrorDef(double err)
set error def
void SetPrintLevel(int level)
set print level
Class to Wrap a ROOT Function class (like TF1) in a IParamMultiFunction interface of multi-dimensions...
virtual Double_t GetBinCenter(Int_t bin) const
Return center of bin.
Definition TAxis.cxx:478
virtual Int_t FindBin(Double_t x)
Find bin number corresponding to abscissa x.
Definition TAxis.cxx:293
virtual Double_t GetBinLowEdge(Int_t bin) const
Return low edge of bin.
Definition TAxis.cxx:518
Int_t GetLast() const
Return last bin on the axis i.e.
Definition TAxis.cxx:469
Int_t GetFirst() const
Return first bin on the axis i.e.
Definition TAxis.cxx:458
Binomial fitter for the division of two histograms.
Double_t fEpsilon
Precision required for function integration (option "I")
void Set(const TH1 *numerator, const TH1 *denominator)
Initialize with a new set of inputs.
ROOT::Fit::Fitter * fFitter
pointer to the real fitter
TH1 * fDenominator
Denominator histogram.
Bool_t fRange
True if the fit range must be taken from the function range.
Bool_t fAverage
True if the fit function must be averaged over the bin.
TBinomialEfficiencyFitter()
default constructor
ROOT::Fit::Fitter * GetFitter()
Provide access to the underlying fitter object.
Double_t EvaluateFCN(const Double_t *par)
void SetPrecision(Double_t epsilon)
Set the required integration precision, see TF1::Integral()
Bool_t fFitDone
Set to kTRUE when the fit has been done.
void ComputeFCN(Double_t &f, const Double_t *par)
Compute the likelihood.
TFitResultPtr Fit(TF1 *f1, Option_t *option="")
Carry out the fit of the given function to the given histograms.
TH1 * fNumerator
Numerator histogram.
1-Dim function class
Definition TF1.h:213
virtual void GetParLimits(Int_t ipar, Double_t &parmin, Double_t &parmax) const
Return limits for parameter ipar.
Definition TF1.cxx:1936
virtual void SetNDF(Int_t ndf)
Set the number of degrees of freedom ndf should be the number of points used in a fit - the number of...
Definition TF1.cxx:3439
virtual Double_t GetParError(Int_t ipar) const
Return value of parameter number ipar.
Definition TF1.cxx:1926
virtual void SetChisquare(Double_t chi2)
Definition TF1.h:612
virtual Int_t GetNpar() const
Definition TF1.h:481
virtual void SetParErrors(const Double_t *errors)
Set errors for all active parameters when calling this function, the array errors must have at least ...
Definition TF1.cxx:3504
virtual Double_t Integral(Double_t a, Double_t b, Double_t epsrel=1.e-12)
IntegralOneDim or analytical integral.
Definition TF1.cxx:2521
virtual Int_t GetNumberFitPoints() const
Definition TF1.h:503
virtual void SetNumberFitPoints(Int_t npfits)
Definition TF1.h:624
virtual void GetRange(Double_t *xmin, Double_t *xmax) const
Return range of a generic N-D function.
Definition TF1.cxx:2275
virtual const char * GetParName(Int_t ipar) const
Definition TF1.h:529
virtual void SetParameters(const Double_t *params)
Definition TF1.h:644
virtual Double_t Eval(Double_t x, Double_t y=0, Double_t z=0, Double_t t=0) const
Evaluate this function.
Definition TF1.cxx:1440
virtual Int_t GetNdim() const
Definition TF1.h:485
virtual Double_t GetParameter(Int_t ipar) const
Definition TF1.h:512
A 2-Dim function with parameters.
Definition TF2.h:29
A 3-Dim function with parameters.
Definition TF3.h:28
Provides an indirection to the TFitResult class and with a semantics identical to a TFitResult pointe...
Extends the ROOT::Fit::Result class with a TNamed inheritance providing easy possibility for I/O.
Definition TFitResult.h:34
TH1 is the base class of all histogram classes in ROOT.
Definition TH1.h:58
virtual Double_t GetBinCenter(Int_t bin) const
Return bin center for 1D histogram.
Definition TH1.cxx:8971
TAxis * GetZaxis()
Definition TH1.h:322
virtual Int_t GetNbinsY() const
Definition TH1.h:297
virtual Int_t GetNbinsZ() const
Definition TH1.h:298
virtual Int_t GetDimension() const
Definition TH1.h:282
TAxis * GetXaxis()
Get the behaviour adopted by the object about the statoverflows. See EStatOverflows for more informat...
Definition TH1.h:320
virtual Int_t GetBin(Int_t binx, Int_t biny=0, Int_t binz=0) const
Return Global bin number corresponding to binx,y,z.
Definition TH1.cxx:4894
virtual Int_t GetNbinsX() const
Definition TH1.h:296
TAxis * GetYaxis()
Definition TH1.h:321
virtual Double_t GetBinContent(Int_t bin) const
Return content of bin number bin.
Definition TH1.cxx:4994
virtual void SetTitle(const char *title="")
Set the title of the TNamed.
Definition TNamed.cxx:164
virtual void SetName(const char *name)
Set the name of the TNamed.
Definition TNamed.cxx:140
virtual const char * GetName() const
Returns name of object.
Definition TNamed.h:47
virtual void Warning(const char *method, const char *msgfmt,...) const
Issue warning message.
Definition TObject.cxx:949
virtual void Error(const char *method, const char *msgfmt,...) const
Issue error message.
Definition TObject.cxx:963
virtual void Info(const char *method, const char *msgfmt,...) const
Issue info message.
Definition TObject.cxx:937
Basic string class.
Definition TString.h:136
void ToUpper()
Change string to upper case.
Definition TString.cxx:1163
static TString Format(const char *fmt,...)
Static method which formats a string using a printf style format descriptor and return a TString.
Definition TString.cxx:2336
Bool_t Contains(const char *pat, ECaseCompare cmp=kExact) const
Definition TString.h:624
TF1 * f1
Definition legend1.C:11
Double_t Log(Double_t x)
Definition TMath.h:710
Short_t Abs(Short_t d)
Definition TMathBase.h:120
REAL epsilon
Definition triangle.c:618