Logo ROOT   6.16/01
Reference Guide
StandardTestStatDistributionDemo.C
Go to the documentation of this file.
1/// \file
2/// \ingroup tutorial_roostats
3/// \notebook
4/// StandardTestStatDistributionDemo.C
5///
6/// This simple script plots the sampling distribution of the profile likelihood
7/// ratio test statistic based on the input Model File. To do this one needs to
8/// specify the value of the parameter of interest that will be used for evaluating
9/// the test statistic and the value of the parameters used for generating the toy data.
10/// In this case, it uses the upper-limit estimated from the ProfileLikleihoodCalculator,
11/// which assumes the asymptotic chi-square distribution for -2 log profile likelihood ratio.
12/// Thus, the script is handy for checking to see if the asymptotic approximations are valid.
13/// To aid, that comparison, the script overlays a chi-square distribution as well.
14/// The most common parameter of interest is a parameter proportional to the signal rate,
15/// and often that has a lower-limit of 0, which breaks the standard chi-square distribution.
16/// Thus the script allows the parameter to be negative so that the overlay chi-square is
17/// the correct asymptotic distribution.
18///
19/// \macro_image
20/// \macro_output
21/// \macro_code
22///
23/// \author Kyle Cranmer
24
25#include "TFile.h"
26#include "TROOT.h"
27#include "TH1F.h"
28#include "TCanvas.h"
29#include "TSystem.h"
30#include "TF1.h"
31#include "TSystem.h"
32
33#include "RooWorkspace.h"
34#include "RooAbsData.h"
35
41
47
48using namespace RooFit;
49using namespace RooStats;
50
51
52bool useProof = false; // flag to control whether to use Proof
53int nworkers = 0; // number of workers (default use all available cores)
54
55// -------------------------------------------------------
56// The actual macro
57
58void StandardTestStatDistributionDemo(const char* infile = "",
59 const char* workspaceName = "combined",
60 const char* modelConfigName = "ModelConfig",
61 const char* dataName = "obsData"){
62
63
64 // the number of toy MC used to generate the distribution
65 int nToyMC = 1000;
66 // The parameter below is needed for asymptotic distribution to be chi-square,
67 // but set to false if your model is not numerically stable if mu<0
68 bool allowNegativeMu=true;
69
70
71 // -------------------------------------------------------
72 // First part is just to access a user-defined file
73 // or create the standard example file if it doesn't exist
74 const char* filename = "";
75 if (!strcmp(infile,"")) {
76 filename = "results/example_combined_GaussExample_model.root";
77 bool fileExist = !gSystem->AccessPathName(filename); // note opposite return code
78 // if file does not exists generate with histfactory
79 if (!fileExist) {
80#ifdef _WIN32
81 cout << "HistFactory file cannot be generated on Windows - exit" << endl;
82 return;
83#endif
84 // Normally this would be run on the command line
85 cout <<"will run standard hist2workspace example"<<endl;
86 gROOT->ProcessLine(".! prepareHistFactory .");
87 gROOT->ProcessLine(".! hist2workspace config/example.xml");
88 cout <<"\n\n---------------------"<<endl;
89 cout <<"Done creating example input"<<endl;
90 cout <<"---------------------\n\n"<<endl;
91 }
92
93 }
94 else
95 filename = infile;
96
97 // Try to open the file
98 TFile *file = TFile::Open(filename);
99
100 // if input file was specified byt not found, quit
101 if(!file ){
102 cout <<"StandardRooStatsDemoMacro: Input file " << filename << " is not found" << endl;
103 return;
104 }
105
106
107 // -------------------------------------------------------
108 // Now get the data and workspace
109
110 // get the workspace out of the file
111 RooWorkspace* w = (RooWorkspace*) file->Get(workspaceName);
112 if(!w){
113 cout <<"workspace not found" << endl;
114 return;
115 }
116
117 // get the modelConfig out of the file
118 ModelConfig* mc = (ModelConfig*) w->obj(modelConfigName);
119
120 // get the modelConfig out of the file
121 RooAbsData* data = w->data(dataName);
122
123 // make sure ingredients are found
124 if(!data || !mc){
125 w->Print();
126 cout << "data or ModelConfig was not found" <<endl;
127 return;
128 }
129
130 mc->Print();
131 // -------------------------------------------------------
132 // Now find the upper limit based on the asymptotic results
133 RooRealVar* firstPOI = (RooRealVar*) mc->GetParametersOfInterest()->first();
135 LikelihoodInterval* interval = plc.GetInterval();
136 double plcUpperLimit = interval->UpperLimit(*firstPOI);
137 delete interval;
138 cout << "\n\n--------------------------------------"<<endl;
139 cout <<"Will generate sampling distribution at " << firstPOI->GetName() << " = " << plcUpperLimit <<endl;
140 int nPOI = mc->GetParametersOfInterest()->getSize();
141 if(nPOI>1){
142 cout <<"not sure what to do with other parameters of interest, but here are their values"<<endl;
143 mc->GetParametersOfInterest()->Print("v");
144 }
145
146 // -------------------------------------------------------
147 // create the test stat sampler
149
150 // to avoid effects from boundary and simplify asymptotic comparison, set min=-max
151 if(allowNegativeMu)
152 firstPOI->setMin(-1*firstPOI->getMax());
153
154 // temporary RooArgSet
155 RooArgSet poi;
156 poi.add(*mc->GetParametersOfInterest());
157
158 // create and configure the ToyMCSampler
159 ToyMCSampler sampler(ts,nToyMC);
160 sampler.SetPdf(*mc->GetPdf());
161 sampler.SetObservables(*mc->GetObservables());
162 sampler.SetGlobalObservables(*mc->GetGlobalObservables());
163 if(!mc->GetPdf()->canBeExtended() && (data->numEntries()==1)){
164 cout << "tell it to use 1 event"<<endl;
165 sampler.SetNEventsPerToy(1);
166 }
167 firstPOI->setVal(plcUpperLimit); // set POI value for generation
168 sampler.SetParametersForTestStat(*mc->GetParametersOfInterest()); // set POI value for evaluation
169
170 if (useProof) {
171 ProofConfig pc(*w, nworkers, "",false);
172 sampler.SetProofConfig(&pc); // enable proof
173 }
174
175 firstPOI->setVal(plcUpperLimit);
176 RooArgSet allParameters;
177 allParameters.add(*mc->GetParametersOfInterest());
178 allParameters.add(*mc->GetNuisanceParameters());
179 allParameters.Print("v");
180
181 SamplingDistribution* sampDist = sampler.GetSamplingDistribution(allParameters);
182 SamplingDistPlot plot;
183 plot.AddSamplingDistribution(sampDist);
184 plot.GetTH1F(sampDist)->GetYaxis()->SetTitle(Form("f(-log #lambda(#mu=%.2f) | #mu=%.2f)",plcUpperLimit,plcUpperLimit));
185 plot.SetAxisTitle(Form("-log #lambda(#mu=%.2f)",plcUpperLimit));
186
187 TCanvas* c1 = new TCanvas("c1");
188 c1->SetLogy();
189 plot.Draw();
190 double min = plot.GetTH1F(sampDist)->GetXaxis()->GetXmin();
191 double max = plot.GetTH1F(sampDist)->GetXaxis()->GetXmax();
192
193 TF1* f = new TF1("f",Form("2*ROOT::Math::chisquared_pdf(2*x,%d,0)",nPOI),min,max);
194 f->Draw("same");
195 c1->SaveAs("standard_test_stat_distribution.pdf");
196
197}
#define f(i)
Definition: RSha256.hxx:104
#define gROOT
Definition: TROOT.h:410
char * Form(const char *fmt,...)
R__EXTERN TSystem * gSystem
Definition: TSystem.h:540
Int_t getSize() const
RooAbsArg * first() const
virtual void Print(Option_t *options=0) const
This method must be overridden when a class wants to print itself.
RooAbsData is the common abstract base class for binned and unbinned datasets.
Definition: RooAbsData.h:37
Bool_t canBeExtended() const
Definition: RooAbsPdf.h:230
virtual Double_t getMax(const char *name=0) const
RooArgSet is a container object that can hold multiple RooAbsArg objects.
Definition: RooArgSet.h:28
virtual Bool_t add(const RooAbsCollection &col, Bool_t silent=kFALSE)
Add a collection of arguments to this collection by calling add() for each element in the source coll...
Definition: RooArgSet.h:88
RooRealVar represents a fundamental (non-derived) real valued object.
Definition: RooRealVar.h:36
void setMin(const char *name, Double_t value)
Set minimum of name range to given value.
Definition: RooRealVar.cxx:387
virtual void setVal(Double_t value)
Set value of variable to 'value'.
Definition: RooRealVar.cxx:204
LikelihoodInterval is a concrete implementation of the RooStats::ConfInterval interface.
Double_t UpperLimit(const RooRealVar &param)
return the upper bound of the interval on a given parameter
ModelConfig is a simple class that holds configuration information specifying how a model should be u...
Definition: ModelConfig.h:30
const RooArgSet * GetGlobalObservables() const
get RooArgSet for global observables (return NULL if not existing)
Definition: ModelConfig.h:249
const RooArgSet * GetParametersOfInterest() const
get RooArgSet containing the parameter of interest (return NULL if not existing)
Definition: ModelConfig.h:231
const RooArgSet * GetNuisanceParameters() const
get RooArgSet containing the nuisance parameters (return NULL if not existing)
Definition: ModelConfig.h:234
virtual void Print(Option_t *option="") const override
overload the print method
const RooArgSet * GetObservables() const
get RooArgSet for observables (return NULL if not existing)
Definition: ModelConfig.h:243
RooAbsPdf * GetPdf() const
get model PDF (return NULL if pdf has not been specified or does not exist)
Definition: ModelConfig.h:228
ProfileLikelihoodCalculator is a concrete implementation of CombinedCalculator (the interface class f...
ProfileLikelihoodTestStat is an implementation of the TestStatistic interface that calculates the pro...
Holds configuration options for proof and proof-lite.
Definition: ProofConfig.h:46
This class provides simple and straightforward utilities to plot SamplingDistribution objects.
Double_t AddSamplingDistribution(const SamplingDistribution *samplingDist, Option_t *drawOptions="NORMALIZE HIST")
adds the sampling distribution and returns the scale factor
void Draw(Option_t *options=0)
Draw this plot and all of the elements it contains.
void SetAxisTitle(char *varName)
TH1F * GetTH1F(const SamplingDistribution *samplDist=NULL)
Returns the TH1F associated with the give SamplingDistribution.
This class simply holds a sampling distribution of some test statistic.
const std::vector< Double_t > & GetSamplingDistribution() const
Get test statistics values.
ToyMCSampler is an implementation of the TestStatSampler interface.
Definition: ToyMCSampler.h:71
The RooWorkspace is a persistable container for RooFit projects.
Definition: RooWorkspace.h:43
RooAbsData * data(const char *name) const
Retrieve dataset (binned or unbinned) with given name. A null pointer is returned if not found.
void Print(Option_t *opts=0) const
Print contents of the workspace.
TObject * obj(const char *name) const
Return any type of object (RooAbsArg, RooAbsData or generic object) with given name)
Double_t GetXmax() const
Definition: TAxis.h:134
Double_t GetXmin() const
Definition: TAxis.h:133
The Canvas class.
Definition: TCanvas.h:31
1-Dim function class
Definition: TF1.h:211
static TFile * Open(const char *name, Option_t *option="", const char *ftitle="", Int_t compress=ROOT::RCompressionSetting::EDefaults::kUseGeneralPurpose, Int_t netopt=0)
Create / open a file.
Definition: TFile.cxx:3975
TAxis * GetXaxis()
Get the behaviour adopted by the object about the statoverflows. See EStatOverflows for more informat...
Definition: TH1.h:316
TAxis * GetYaxis()
Definition: TH1.h:317
virtual void SetTitle(const char *title="")
Set the title of the TNamed.
Definition: TNamed.cxx:164
virtual const char * GetName() const
Returns name of object.
Definition: TNamed.h:47
virtual Bool_t AccessPathName(const char *path, EAccessMode mode=kFileExists)
Returns FALSE if one can access a file using the specified access mode.
Definition: TSystem.cxx:1286
return c1
Definition: legend1.C:41
@(#)root/roostats:$Id$ Author: George Lewis, Kyle Cranmer
Definition: Asimov.h:20
static constexpr double pc
Definition: file.py:1