A hypothesis testing example based on number counting with background uncertainty.
NOTE: This example must be run with the ACLIC (the + option ) due to the new class that is defined.
The basic setup here is that a main measurement has observed x events with an expectation of s+b. One can choose an ad hoc prior for the uncertainty on b, or try to base it on an auxiliary measurement. In this case, the auxiliary measurement (aka control measurement, sideband) is another counting experiment with measurement y and expectation tau*b. With an 'original prior' on b, called \(\eta(b)\) then one can obtain a posterior from the auxiliary measurement \(\pi(b) = \eta(b) * Pois(y|tau*b).\) This is a principled choice for a prior on b in the main measurement of x, which can then be treated in a hybrid Bayesian/Frequentist way. Additionally, one can try to treat the two measurements simultaneously, which is detailed in Part 6 of the tutorial.
This tutorial is related to the FourBin.C tutorial in the modeling, but focuses on hypothesis testing instead of interval estimation.
More background on this 'prototype problem' can be found in the following papers:
␛[1mRooFit v3.60 -- Developed by Wouter Verkerke and David Kirkby␛[0m
Copyright (C) 2000-2013 NIKHEF, University of California & Stanford University
All rights reserved, please read http://roofit.sourceforge.net/license.txt
-----------------------------------------
Part 2
Hybrid p-value from direct integration = 0.000774094
Z_Gamma Significance = 3.1655
-----------------------------------------
Part 3
Z_Bi p-value (analytic): 0.00094165
Z_Bi significance (analytic): 3.10804
Real time 0:00:04, CP time 4.280
[#0] WARNING:Eval -- RooStatsUtils::MakeNuisancePdf - no constraints found on nuisance parameters in the input model
[#0] WARNING:Eval -- RooStatsUtils::MakeNuisancePdf - no constraints found on nuisance parameters in the input model
-----------------------------------------
Part 4
Results HypoTestCalculator_result:
- Null p-value = 0.00115 +/- 0.000239654
- Significance = 3.04848 +/- 0.0626148 sigma
- Number of Alt toys: 1000
- Number of Null toys: 20000
- Test statistic evaluated on data: 150
- CL_b: 0.00115 +/- 0.000239654
- CL_s+b: 0.524 +/- 0.0157932
- CL_s: 455.652 +/- 95.9434
Real time 0:00:03, CP time 3.080
[#0] WARNING:Eval -- RooStatsUtils::MakeNuisancePdf - no constraints found on nuisance parameters in the input model
[#0] WARNING:Eval -- RooStatsUtils::MakeNuisancePdf - no constraints found on nuisance parameters in the input model
-----------------------------------------
Part 5
Results HypoTestCalculator_result:
- Null p-value = 0.0009 +/- 0.000212037
- Significance = 3.12139 +/- 0.0693716 sigma
- Number of Alt toys: 1000
- Number of Null toys: 20000
- Test statistic evaluated on data: 10.8198
- CL_b: 0.0009 +/- 0.000212037
- CL_s+b: 0.535 +/- 0.0157726
- CL_s: 594.444 +/- 141.141
Real time 0:00:03, CP time 3.580
[#0] WARNING:Eval -- RooStatsUtils::MakeNuisancePdf - no constraints found on nuisance parameters in the input model
[#0] WARNING:Eval -- RooStatsUtils::MakeNuisancePdf - no constraints found on nuisance parameters in the input model
-----------------------------------------
Part 6
Results HypoTestCalculator_result:
- Null p-value = 0.000666667 +/- 0.000149021
- Significance = 3.20871 +/- 0.0642752 sigma
- Number of Alt toys: 1000
- Number of Null toys: 30000
- Test statistic evaluated on data: 5.03388
- CL_b: 0.000666667 +/- 0.000149021
- CL_s+b: 0.489 +/- 0.0158076
- CL_s: 733.5 +/- 165.667
Real time 0:00:35, CP time 35.150
class BinCountTestStat : public TestStatistic {
public:
BinCountTestStat(void) : fColumnName("tmp") {}
BinCountTestStat(string columnName) : fColumnName(columnName) {}
for(
int i=0; i <
data.numEntries(); i++) {
value +=
data.get(i)->getRealValue(fColumnName.c_str());
}
return value;
}
virtual const TString GetVarName()
const {
return fColumnName; }
private:
string fColumnName;
protected:
};
void HybridInstructional() {
w->
factory(
"Poisson::px(x[150,0,500],sum::splusb(s[0,0,100],b[100,0,300]))");
w->
factory(
"Poisson::py(y[100,0,500],prod::taub(tau[1.],b))");
w->
factory(
"PROJ::averagedModel(PROD::foo(px|b,py,prior_b),b)") ;
cout << "-----------------------------------------"<<endl;
cout << "Part 2" << endl;
cout <<
"Hybrid p-value from direct integration = " << 1-cdf->
getVal() << endl;
cout << "Z_Gamma Significance = " <<
cout << "-----------------------------------------"<<endl;
cout << "Part 3" << endl;
std::cout << "Z_Bi p-value (analytic): " << p_Bi << std::endl;
std::cout << "Z_Bi significance (analytic): " << Z_Bi << std::endl;
ModelConfig b_model("B_model", w);
b_model.SetPdf(*w->
pdf(
"px"));
b_model.SetObservables(*w->
set(
"obs"));
b_model.SetParametersOfInterest(*w->
set(
"poi"));
b_model.SetSnapshot(*w->
set(
"poi"));
ModelConfig sb_model("S+B_model", w);
sb_model.SetPdf(*w->
pdf(
"px"));
sb_model.SetObservables(*w->
set(
"obs"));
sb_model.SetParametersOfInterest(*w->
set(
"poi"));
sb_model.SetSnapshot(*w->
set(
"poi"));
BinCountTestStat binCount("x");
w->
factory(
"Gaussian::gauss_prior(b,y, expr::sqrty('sqrt(y)',y))");
w->
factory(
"Lognormal::lognorm_prior(b,y, expr::kappa('1+1./sqrt(y)',y))");
HybridCalculator hc1(*
data, sb_model, b_model);
ToyMCSampler *toymcs1 = (ToyMCSampler*)hc1.GetTestStatSampler();
toymcs1->SetNEventsPerToy(1);
toymcs1->SetTestStatistic(&binCount);
hc1.SetToys(20000,1000);
hc1.ForcePriorNuisanceAlt(*w->
pdf(
"py"));
hc1.ForcePriorNuisanceNull(*w->
pdf(
"py"));
HypoTestResult *r1 = hc1.GetHypoTest();
cout << "-----------------------------------------"<<endl;
cout << "Part 4" << endl;
r1->Print();
HypoTestPlot *
p1 =
new HypoTestPlot(*r1,30);
SimpleLikelihoodRatioTestStat slrts(*b_model.GetPdf(),*sb_model.GetPdf());
slrts.SetNullParameters(*b_model.GetSnapshot());
slrts.SetAltParameters(*sb_model.GetSnapshot());
HybridCalculator hc2(*
data, sb_model, b_model);
ToyMCSampler *toymcs2 = (ToyMCSampler*)hc2.GetTestStatSampler();
toymcs2->SetNEventsPerToy(1);
toymcs2->SetTestStatistic(&slrts);
hc2.SetToys(20000,1000);
hc2.ForcePriorNuisanceAlt(*w->
pdf(
"py"));
hc2.ForcePriorNuisanceNull(*w->
pdf(
"py"));
if(
pc) toymcs2->SetProofConfig(
pc);
HypoTestResult *r2 = hc2.GetHypoTest();
cout << "-----------------------------------------"<<endl;
cout << "Part 5" << endl;
r2->Print();
HypoTestPlot *
p2 =
new HypoTestPlot(*r2,30);
dataXY->
add(*w->
set(
"obsXY"));
ModelConfig b_modelXY("B_modelXY", w);
b_modelXY.SetPdf(*w->
pdf(
"model"));
b_modelXY.SetObservables(*w->
set(
"obsXY"));
b_modelXY.SetParametersOfInterest(*w->
set(
"poi"));
b_modelXY.SetSnapshot(*w->
set(
"poi"));
ModelConfig sb_modelXY("S+B_modelXY", w);
sb_modelXY.SetPdf(*w->
pdf(
"model"));
sb_modelXY.SetObservables(*w->
set(
"obsXY"));
sb_modelXY.SetParametersOfInterest(*w->
set(
"poi"));
sb_modelXY.SetSnapshot(*w->
set(
"poi"));
RatioOfProfiledLikelihoodsTestStat
ropl(*b_modelXY.GetPdf(), *sb_modelXY.GetPdf(), sb_modelXY.GetSnapshot());
ropl.SetSubtractMLE(false);
ProfileLikelihoodTestStat profll(*b_modelXY.GetPdf());
MaxLikelihoodEstimateTestStat mlets(*sb_modelXY.GetPdf(), *w->
var(
"s"));
w->
factory(
"Gamma::gamma_y0(b,sum::temp0(y0,1),1,0)");
w->
factory(
"Gaussian::gauss_prior_y0(b,y0, expr::sqrty0('sqrt(y0)',y0))");
HybridCalculator hc3(*dataXY, sb_modelXY, b_modelXY);
ToyMCSampler *toymcs3 = (ToyMCSampler*)hc3.GetTestStatSampler();
toymcs3->SetNEventsPerToy(1);
toymcs3->SetTestStatistic(&slrts);
hc3.SetToys(30000,1000);
hc3.ForcePriorNuisanceAlt(*w->
pdf(
"gamma_y0"));
hc3.ForcePriorNuisanceNull(*w->
pdf(
"gamma_y0"));
toymcs3->SetTestStatistic(&profll);
if(
pc) toymcs3->SetProofConfig(
pc);
HypoTestResult *r3 = hc3.GetHypoTest();
cout << "-----------------------------------------"<<endl;
cout << "Part 6" << endl;
r3->Print();
HypoTestPlot *
p3 =
new HypoTestPlot(*r3,50);
}
static double p3(double t, double a, double b, double c, double d)
static double p1(double t, double a, double b)
static double p2(double t, double a, double b, double c)
#define ClassDef(name, id)
RooAbsData is the common abstract base class for binned and unbinned datasets.
RooAbsReal * createCdf(const RooArgSet &iset, const RooArgSet &nset=RooArgSet())
Create a cumulative distribution function of this p.d.f in terms of the observables listed in iset.
virtual RooPlot * plotOn(RooPlot *frame, const RooCmdArg &arg1=RooCmdArg::none(), const RooCmdArg &arg2=RooCmdArg::none(), const RooCmdArg &arg3=RooCmdArg::none(), const RooCmdArg &arg4=RooCmdArg::none(), const RooCmdArg &arg5=RooCmdArg::none(), const RooCmdArg &arg6=RooCmdArg::none(), const RooCmdArg &arg7=RooCmdArg::none(), const RooCmdArg &arg8=RooCmdArg::none(), const RooCmdArg &arg9=RooCmdArg::none(), const RooCmdArg &arg10=RooCmdArg::none()) const
Helper calling plotOn(RooPlot*, RooLinkedList&) const.
RooPlot * frame(const RooCmdArg &arg1, const RooCmdArg &arg2=RooCmdArg::none(), const RooCmdArg &arg3=RooCmdArg::none(), const RooCmdArg &arg4=RooCmdArg::none(), const RooCmdArg &arg5=RooCmdArg::none(), const RooCmdArg &arg6=RooCmdArg::none(), const RooCmdArg &arg7=RooCmdArg::none(), const RooCmdArg &arg8=RooCmdArg::none()) const
Create a new RooPlot on the heap with a drawing frame initialized for this object,...
RooAbsReal is the common abstract base class for objects that represent a real value and implements f...
Double_t getVal(const RooArgSet *set=0) const
Evaluate object. Returns either cached value or triggers a recalculation.
RooArgSet is a container object that can hold multiple RooAbsArg objects.
RooDataSet is a container class to hold unbinned data.
virtual void add(const RooArgSet &row, Double_t weight=1.0, Double_t weightError=0)
Add a data point, with its coordinates specified in the 'data' argset, to the data set.
static RooMsgService & instance()
Return reference to singleton instance.
void setGlobalKillBelow(RooFit::MsgLevel level)
RooFit::MsgLevel globalKillBelow() const
A RooPlot is a plot frame and a container for graphics objects within that frame.
virtual void Draw(Option_t *options=0)
Draw this plot and all of the elements it contains.
virtual void setVal(Double_t value)
Set value of variable to 'value'.
The RooWorkspace is a persistable container for RooFit projects.
Bool_t defineSet(const char *name, const RooArgSet &aset, Bool_t importMissing=kFALSE)
Define a named RooArgSet with given constituents.
RooRealVar * var(const char *name) const
Retrieve real-valued variable (RooRealVar) with given name. A null pointer is returned if not found.
RooFactoryWSTool & factory()
Return instance to factory tool.
const RooArgSet * set(const char *name)
Return pointer to previously defined named set with given nmame If no such set is found a null pointe...
RooAbsPdf * pdf(const char *name) const
Retrieve p.d.f (RooAbsPdf) with given name. A null pointer is returned if not found.
void Start(Bool_t reset=kTRUE)
Start the stopwatch.
void Stop()
Stop the stopwatch.
void Print(Option_t *option="") const
Print the real and cpu time passed between the start and stop events.
RooCmdArg LineColor(Color_t color)
Double_t BinomialWithTauObsZ(Double_t nObs, Double_t bExp, Double_t tau)
Double_t BinomialWithTauObsP(Double_t nObs, Double_t bExp, Double_t tau)
@(#)root/roostats:$Id$ Author: George Lewis, Kyle Cranmer
Double_t PValueToSignificance(Double_t pvalue)
returns one-sided significance corresponding to a p-value
static constexpr double pc