ROOT
master
Reference Guide
Loading...
Searching...
No Matches
rf403_weightedevts.C File Reference
Tutorials
»
RooFit Tutorials
Detailed Description
Data and categories: using weights in unbinned datasets
#include "
RooRealVar.h
"
#include "
RooDataSet.h
"
#include "
RooDataHist.h
"
#include "
RooGaussian.h
"
#include "
RooFormulaVar.h
"
#include "
RooGenericPdf.h
"
#include "
RooPolynomial.h
"
#include "
RooMinimizer.h
"
#include "
TCanvas.h
"
#include "
TAxis.h
"
#include "
RooPlot.h
"
#include "
RooFitResult.h
"
void
rf403_weightedevts
()
{
using namespace
RooFit
;
// C r e a t e o b s e r v a b l e a n d u n w e i g h t e d d a t a s e t
// -------------------------------------------------------------------------------
// Declare observable
RooRealVar
x
(
"x"
,
"x"
, -10, 10);
x
.setBins(40);
// Construction a uniform pdf
RooPolynomial
p0(
"px"
,
"px"
,
x
);
// Sample 1000 events from pdf
std::unique_ptr<RooDataSet>
data
{p0.generate(
x
, 1000)};
// C a l c u l a t e w e i g h t a n d m a k e d a t a s e t w e i g h t e d
// -----------------------------------------------------------------------------------
// Construct formula to calculate (fake) weight for events
RooFormulaVar
wFunc(
"w"
,
"event weight"
,
"(x*x+10)"
,
x
);
// Add column with variable w to previously generated dataset
data
->addColumn(wFunc);
// Dataset d is now a dataset with two observable (x,w) with 1000 entries
data
->Print();
// Create a new dataset wdata where w is interpreted as event weight rather than as observable
RooDataSet
wdata(
data
->GetName(),
data
->GetTitle(), *
data
->get(), Import(*
data
), WeightVar(
"w"
));
// Dataset d is now a dataset with one observable (x) with 1000 entries and a sum of weights of ~430K
wdata.Print();
// U n b i n n e d M L f i t t o w e i g h t e d d a t a
// ---------------------------------------------------------------
// Construction quadratic polynomial pdf for fitting
RooRealVar
a0(
"a0"
,
"a0"
, 1);
RooRealVar
a1(
"a1"
,
"a1"
, 0, -1, 1);
RooRealVar
a2(
"a2"
,
"a2"
, 1, 0, 10);
RooPolynomial
p2(
"p2"
,
"p2"
,
x
,
RooArgList
(a0, a1, a2), 0);
// Fit quadratic polynomial to weighted data
// NOTE: A plain Maximum likelihood fit to weighted data does in general
// NOT result in correct error estimates, unless individual
// event weights represent Poisson statistics themselves.
//
// Fit with 'wrong' errors
std::unique_ptr<RooFitResult> r_ml_wgt{p2.fitTo(wdata, Save(), PrintLevel(-1))};
// A first order correction to estimated parameter errors in an
// (unbinned) ML fit can be obtained by calculating the
// covariance matrix as
//
// V' = V C-1 V
//
// where V is the covariance matrix calculated from a fit
// to -logL = - sum [ w_i log f(x_i) ] and C is the covariance
// matrix calculated from -logL' = -sum [ w_i^2 log f(x_i) ]
// (i.e. the weights are applied squared)
//
// A fit in this mode can be performed as follows:
std::unique_ptr<RooFitResult> r_ml_wgt_corr{p2.fitTo(wdata,
Save
(),
SumW2Error
(
true
),
PrintLevel
(-1))};
// P l o t w e i g h e d d a t a a n d f i t r e s u l t
// ---------------------------------------------------------------
// Construct plot frame
RooPlot
*frame =
x
.
frame
(
Title
(
"Unbinned ML fit, binned chi^2 fit to weighted data"
));
// Plot data using sum-of-weights-squared error rather than Poisson errors
wdata.plotOn(frame,
DataError
(
RooAbsData::SumW2
));
// Overlay result of 2nd order polynomial fit to weighted data
p2.plotOn(frame);
// ML Fit of pdf to equivalent unweighted dataset
// -----------------------------------------------------------------------------------------
// Construct a pdf with the same shape as p0 after weighting
RooGenericPdf
genPdf(
"genPdf"
,
"x*x+10"
,
x
);
// Sample a dataset with the same number of events as data
std::unique_ptr<RooDataSet> data2{genPdf.generate(
x
, 1000)};
// Sample a dataset with the same number of weights as data
std::unique_ptr<RooDataSet> data3{genPdf.generate(
x
, 43000)};
// Fit the 2nd order polynomial to both unweighted datasets and save the results for comparison
std::unique_ptr<RooFitResult> r_ml_unw10{p2.fitTo(*data2,
Save
(),
PrintLevel
(-1))};
std::unique_ptr<RooFitResult> r_ml_unw43{p2.fitTo(*data3,
Save
(),
PrintLevel
(-1))};
// C h i 2 f i t o f p d f t o b i n n e d w e i g h t e d d a t a s e t
// ------------------------------------------------------------------------------------
// Construct binned clone of unbinned weighted dataset
std::unique_ptr<RooAbsData> binnedData{wdata.binnedClone()};
binnedData->Print(
"v"
);
// Perform chi2 fit to binned weighted dataset using sum-of-weights errors
//
// NB: Within the usual approximations of a chi2 fit, a chi2 fit to weighted
// data using sum-of-weights-squared errors does give correct error
// estimates
std::unique_ptr<RooAbsReal> chi2{
p2.createChi2(
static_cast<
RooDataHist
&
>
(*binnedData),
DataError
(
RooAbsData::SumW2
))};
RooMinimizer
m
(*chi2);
m
.migrad();
m
.hesse();
// Plot chi^2 fit result on frame as well
std::unique_ptr<RooFitResult> r_chi2_wgt{
m
.save()};
p2.plotOn(frame,
LineStyle
(
kDashed
),
LineColor
(
kRed
));
// C o m p a r e f i t r e s u l t s o f c h i 2 , M L f i t s t o ( u n ) w e i g h t e d d a t a
// ---------------------------------------------------------------------------------------------------------------
// Note that ML fit on 1Kevt of weighted data is closer to result of ML fit on 43Kevt of unweighted data
// than to 1Kevt of unweighted data, whereas the reference chi^2 fit with SumW2 error gives a result closer to
// that of an unbinned ML fit to 1Kevt of unweighted data.
cout <<
"==> ML Fit results on 1K unweighted events"
<< endl;
r_ml_unw10->
Print
();
cout <<
"==> ML Fit results on 43K unweighted events"
<< endl;
r_ml_unw43->Print();
cout <<
"==> ML Fit results on 1K weighted events with a summed weight of 43K"
<< endl;
r_ml_wgt->Print();
cout <<
"==> Corrected ML Fit results on 1K weighted events with a summed weight of 43K"
<< endl;
r_ml_wgt_corr->Print();
cout <<
"==> Chi2 Fit results on 1K weighted events with a summed weight of 43K"
<< endl;
r_chi2_wgt->Print();
new
TCanvas
(
"rf403_weightedevts"
,
"rf403_weightedevts"
, 600, 600);
gPad
->SetLeftMargin(0.15);
frame->
GetYaxis
()->
SetTitleOffset
(1.8);
frame->
Draw
();
}
RooDataHist.h
RooDataSet.h
RooFitResult.h
RooFormulaVar.h
RooGaussian.h
RooGenericPdf.h
RooMinimizer.h
RooPlot.h
RooPolynomial.h
RooRealVar.h
kRed
@ kRed
Definition
Rtypes.h:66
kDashed
@ kDashed
Definition
TAttLine.h:48
TAxis.h
TCanvas.h
data
Option_t Option_t TPoint TPoint const char GetTextMagnitude GetFillStyle GetLineColor GetLineWidth GetMarkerStyle GetTextAlign GetTextColor GetTextSize void data
Definition
TGWin32VirtualXProxy.cxx:104
gPad
#define gPad
Definition
TVirtualPad.h:308
RooAbsData::SumW2
@ SumW2
Definition
RooAbsData.h:108
RooArgList
RooArgList is a container object that can hold multiple RooAbsArg objects.
Definition
RooArgList.h:22
RooDataHist
Container class to hold N-dimensional binned data.
Definition
RooDataHist.h:40
RooDataSet
Container class to hold unbinned data.
Definition
RooDataSet.h:34
RooFormulaVar
A RooFormulaVar is a generic implementation of a real-valued object, which takes a RooArgList of serv...
Definition
RooFormulaVar.h:30
RooGenericPdf
Implementation of a probability density function that takes a RooArgList of servers and a C++ express...
Definition
RooGenericPdf.h:25
RooMinimizer
Wrapper class around ROOT::Math::Minimizer that provides a seamless interface between the minimizer f...
Definition
RooMinimizer.h:48
RooPlot
Plot frame and a container for graphics objects within that frame.
Definition
RooPlot.h:43
RooPlot::frame
static RooPlot * frame(const RooAbsRealLValue &var, double xmin, double xmax, Int_t nBins)
Create a new frame for a given variable in x.
Definition
RooPlot.cxx:185
RooPlot::GetYaxis
TAxis * GetYaxis() const
Definition
RooPlot.cxx:1224
RooPlot::Draw
void Draw(Option_t *options=nullptr) override
Draw this plot and all of the elements it contains.
Definition
RooPlot.cxx:597
RooPolynomial
RooPolynomial implements a polynomial p.d.f of the form.
Definition
RooPolynomial.h:25
RooRealVar
Variable that can be changed from the outside.
Definition
RooRealVar.h:37
TAttAxis::SetTitleOffset
virtual void SetTitleOffset(Float_t offset=1)
Set distance between the axis and the axis title.
Definition
TAttAxis.cxx:298
TCanvas
The Canvas class.
Definition
TCanvas.h:23
TMarker::Print
void Print(Option_t *option="") const override
Dump this marker with its attributes.
Definition
TMarker.cxx:339
RooFit::Save
RooCmdArg Save(bool flag=true)
Definition
RooGlobalFunc.cxx:654
RooFit::SumW2Error
RooCmdArg SumW2Error(bool flag)
Definition
RooGlobalFunc.cxx:738
RooFit::PrintLevel
RooCmdArg PrintLevel(Int_t code)
Definition
RooGlobalFunc.cxx:662
RooFit::DataError
RooCmdArg DataError(Int_t)
Definition
RooGlobalFunc.cxx:471
RooFit::LineColor
RooCmdArg LineColor(Color_t color)
Definition
RooGlobalFunc.cxx:216
RooFit::LineStyle
RooCmdArg LineStyle(Style_t style)
Definition
RooGlobalFunc.cxx:241
x
Double_t x[n]
Definition
legend1.C:17
RooFit
The namespace RooFit contains mostly switches that change the behaviour of functions of PDFs (or othe...
Definition
CodegenImpl.h:64
rf403_weightedevts
Definition
rf403_weightedevts.py:1
xmlio::Title
const char * Title
Definition
TXMLSetup.cxx:68
m
TMarker m
Definition
textangle.C:8
RooDataSet::pxData[x,w] = 1000 entries
RooDataSet::pxData[x,weight:w] = 1000 entries (43238.9 weighted)
[#1] INFO:Fitting -- RooAbsPdf::fitTo(p2_over_p2_Int[x]) fixing normalization set for coefficient determination to observables in data
[#1] INFO:Fitting -- using CPU computation library compiled with -mavx2
[#0] WARNING:InputArguments -- RooAbsPdf::fitTo(p2): WARNING: a likelihood fit is requested of what appears to be weighted data.
While the estimated values of the parameters will always be calculated taking the weights into account,
there are multiple ways to estimate the errors of the parameters. You are advised to make an
explicit choice for the error calculation:
- Either provide SumW2Error(true), to calculate a sum-of-weights-corrected HESSE error matrix
(error will be proportional to the number of events in MC).
- Or provide SumW2Error(false), to return errors from original HESSE error matrix
(which will be proportional to the sum of the weights, i.e., a dataset with <sum of weights> events).
- Or provide AsymptoticError(true), to use the asymptotically correct expression
(for details see https://arxiv.org/abs/1911.01303)."
[#1] INFO:Fitting -- RooAddition::defaultErrorLevel(nll_p2_over_p2_Int[x]_pxData) Summation contains a RooNLLVar, using its error level
[#1] INFO:Minimization -- RooAbsMinimizerFcn::setOptimizeConst: activating const optimization
[#1] INFO:Minimization -- RooAbsMinimizerFcn::setOptimizeConst: deactivating const optimization
[#1] INFO:Fitting -- RooAbsPdf::fitTo(p2_over_p2_Int[x]) fixing normalization set for coefficient determination to observables in data
[#1] INFO:Fitting -- RooAddition::defaultErrorLevel(nll_p2_over_p2_Int[x]_pxData) Summation contains a RooNLLVar, using its error level
[#1] INFO:Minimization -- RooAbsMinimizerFcn::setOptimizeConst: activating const optimization
[#1] INFO:Fitting -- RooAbsPdf::fitTo(p2) Calculating sum-of-weights-squared correction matrix for covariance matrix
[#1] INFO:Minimization -- RooAbsMinimizerFcn::setOptimizeConst: deactivating const optimization
[#1] INFO:NumericIntegration -- RooRealIntegral::init(genPdf_Int[x]) using numeric integrator RooIntegrator1D to calculate Int(x)
[#1] INFO:NumericIntegration -- RooRealIntegral::init(genPdf_Int[x]) using numeric integrator RooIntegrator1D to calculate Int(x)
[#1] INFO:NumericIntegration -- RooRealIntegral::init(genPdf_Int[x]) using numeric integrator RooIntegrator1D to calculate Int(x)
[#1] INFO:NumericIntegration -- RooRealIntegral::init(genPdf_Int[x]) using numeric integrator RooIntegrator1D to calculate Int(x)
[#1] INFO:Fitting -- RooAbsPdf::fitTo(p2_over_p2_Int[x]) fixing normalization set for coefficient determination to observables in data
[#1] INFO:Fitting -- RooAddition::defaultErrorLevel(nll_p2_over_p2_Int[x]_genPdfData) Summation contains a RooNLLVar, using its error level
[#1] INFO:Minimization -- RooAbsMinimizerFcn::setOptimizeConst: activating const optimization
[#1] INFO:Minimization -- RooAbsMinimizerFcn::setOptimizeConst: deactivating const optimization
[#1] INFO:Fitting -- RooAbsPdf::fitTo(p2_over_p2_Int[x]) fixing normalization set for coefficient determination to observables in data
[#1] INFO:Fitting -- RooAddition::defaultErrorLevel(nll_p2_over_p2_Int[x]_genPdfData) Summation contains a RooNLLVar, using its error level
[#1] INFO:Minimization -- RooAbsMinimizerFcn::setOptimizeConst: activating const optimization
[#1] INFO:Minimization -- RooAbsMinimizerFcn::setOptimizeConst: deactivating const optimization
DataStore pxData_binned (Generated From px_binned)
Contains 40 entries
Observables:
1) x = 9.75 L(-10 - 10) B(40) "x"
Binned Dataset pxData_binned (Generated From px_binned)
Contains 40 bins with a total weight of 43238.9
Observables: 1) x = 9.75 L(-10 - 10) B(40) "x"
Minuit2Minimizer: Minimize with max-calls 1000 convergence for edm < 1 strategy 1
Minuit2Minimizer : Valid minimum - status = 0
FVAL = 31.3747451817532266
Edm = 4.13511525432537654e-08
Nfcn = 29
a1 = -0.009989 +/- 0.0262975 (limited)
a2 = 0.106373 +/- 0.0101849 (limited)
==> ML Fit results on 1K unweighted events
RooFitResult: minimized FCN value: 2766.49, estimated distance to minimum: 0.000399952
covariance matrix quality: Full, accurate covariance matrix
Status : MINIMIZE=0 HESSE=0
Floating Parameter FinalValue +/- Error
-------------------- --------------------------
a1 8.9483e-03 +/- 2.70e-02
a2 1.0177e-01 +/- 1.69e-02
==> ML Fit results on 43K unweighted events
RooFitResult: minimized FCN value: 118892, estimated distance to minimum: 0.000206627
covariance matrix quality: Full, accurate covariance matrix
Status : MINIMIZE=0 HESSE=0
Floating Parameter FinalValue +/- Error
-------------------- --------------------------
a1 -1.2106e-03 +/- 4.02e-03
a2 9.7573e-02 +/- 2.37e-03
==> ML Fit results on 1K weighted events with a summed weight of 43K
RooFitResult: minimized FCN value: 119682, estimated distance to minimum: 1.25398e-05
covariance matrix quality: Full, accurate covariance matrix
Status : MINIMIZE=0 HESSE=0
Floating Parameter FinalValue +/- Error
-------------------- --------------------------
a1 -4.8713e-03 +/- 4.03e-03
a2 9.8645e-02 +/- 2.41e-03
==> Corrected ML Fit results on 1K weighted events with a summed weight of 43K
RooFitResult: minimized FCN value: 119682, estimated distance to minimum: 79498.5
covariance matrix quality: Full, accurate covariance matrix
Status : MINIMIZE=0 HESSE=0 HESSE=0
Floating Parameter FinalValue +/- Error
-------------------- --------------------------
a1 -4.8565e-03 +/- 3.00e-02
a2 9.8652e-02 +/- 2.99e-02
==> Chi2 Fit results on 1K weighted events with a summed weight of 43K
RooFitResult: minimized FCN value: 31.3747, estimated distance to minimum: 4.135e-08
covariance matrix quality: Full, accurate covariance matrix
Status : MIGRAD=0 HESSE=0
Floating Parameter FinalValue +/- Error
-------------------- --------------------------
a1 -9.9890e-03 +/- 2.63e-02
a2 1.0637e-01 +/- 1.02e-02
Date
July 2008
Author
Wouter Verkerke
Definition in file
rf403_weightedevts.C
.
tutorials
roofit
rf403_weightedevts.C
ROOT master - Reference Guide Generated on Thu Dec 19 2024 09:47:45 (GVA Time) using Doxygen 1.9.8