Logo ROOT   6.07/09
Reference Guide
StandardBayesianNumericalDemo.C
Go to the documentation of this file.
1 /// \file
2 /// \ingroup tutorial_roostats
3 /// \notebook -js
4 /// Standard demo of the numerical Bayesian calculator
5 ///
6 /// This is a standard demo that can be used with any ROOT file
7 /// prepared in the standard way. You specify:
8 /// - name for input ROOT file
9 /// - name of workspace inside ROOT file that holds model and data
10 /// - name of ModelConfig that specifies details for calculator tools
11 /// - name of dataset
12 ///
13 /// With default parameters the macro will attempt to run the
14 /// standard hist2workspace example and read the ROOT file
15 /// that it produces.
16 ///
17 /// The actual heart of the demo is only about 10 lines long.
18 ///
19 /// The BayesianCalculator is based on Bayes's theorem
20 /// and performs the integration using ROOT's numeric integration utilities
21 ///
22 /// \macro_image
23 /// \macro_output
24 /// \macro_code
25 ///
26 /// \author Kyle Cranmer
27 
28 #include "TFile.h"
29 #include "TROOT.h"
30 #include "RooWorkspace.h"
31 #include "RooAbsData.h"
32 #include "RooRealVar.h"
33 
34 #include "RooUniform.h"
35 #include "RooStats/ModelConfig.h"
38 #include "RooStats/RooStatsUtils.h"
39 #include "RooPlot.h"
40 #include "TSystem.h"
41 
42 using namespace RooFit;
43 using namespace RooStats;
44 
45 
46 struct BayesianNumericalOptions {
47 
48  double confLevel = 0.95 ; // interval CL
49  TString integrationType = ""; // integration Type (default is adaptive (numerical integration)
50  // possible values are "TOYMC" (toy MC integration, work when nuisances have a constraints pdf)
51  // "VEGAS" , "MISER", or "PLAIN" (these are all possible MC integration)
52  int nToys = 10000; // number of toys used for the MC integrations - for Vegas should be probably set to an higher value
53  bool scanPosterior = false; // flag to compute interval by scanning posterior (it is more robust but maybe less precise)
54  int nScanPoints = 20; // number of points for scanning the posterior (if scanPosterior = false it is used only for plotting). Use by default a low value to speed-up tutorial
55  int intervalType = 1; // type of interval (0 is shortest, 1 central, 2 upper limit)
56  double maxPOI = -999; // force a different value of POI for doing the scan (default is given value)
57  double nSigmaNuisance = -1; // force integration of nuisance parameters to be within nSigma of their error (do first a model fit to find nuisance error)
58 
59 };
60 
61 BayesianNumericalOptions optBayes;
62 
63 void StandardBayesianNumericalDemo(const char* infile = "",
64  const char* workspaceName = "combined",
65  const char* modelConfigName = "ModelConfig",
66  const char* dataName = "obsData") {
67 
68  // option definitions
69  double confLevel = optBayes.confLevel;
70  TString integrationType = optBayes.integrationType;
71  int nToys = optBayes.nToys;
72  bool scanPosterior = optBayes.scanPosterior;
73  int nScanPoints = optBayes.nScanPoints;
74  int intervalType = optBayes.intervalType;
75  int maxPOI = optBayes.maxPOI;
76  double nSigmaNuisance = optBayes.nSigmaNuisance;
77 
78 
79 
80  // -------------------------------------------------------
81  // First part is just to access a user-defined file
82  // or create the standard example file if it doesn't exist
83 
84  const char* filename = "";
85  if (!strcmp(infile,"")) {
86  filename = "results/example_combined_GaussExample_model.root";
87  bool fileExist = !gSystem->AccessPathName(filename); // note opposite return code
88  // if file does not exists generate with histfactory
89  if (!fileExist) {
90 #ifdef _WIN32
91  cout << "HistFactory file cannot be generated on Windows - exit" << endl;
92  return;
93 #endif
94  // Normally this would be run on the command line
95  cout <<"will run standard hist2workspace example"<<endl;
96  gROOT->ProcessLine(".! prepareHistFactory .");
97  gROOT->ProcessLine(".! hist2workspace config/example.xml");
98  cout <<"\n\n---------------------"<<endl;
99  cout <<"Done creating example input"<<endl;
100  cout <<"---------------------\n\n"<<endl;
101  }
102 
103  }
104  else
105  filename = infile;
106 
107  // Try to open the file
108  TFile *file = TFile::Open(filename);
109 
110  // if input file was specified byt not found, quit
111  if(!file ){
112  cout <<"StandardRooStatsDemoMacro: Input file " << filename << " is not found" << endl;
113  return;
114  }
115 
116 
117  // -------------------------------------------------------
118  // Tutorial starts here
119  // -------------------------------------------------------
120 
121  // get the workspace out of the file
122  RooWorkspace* w = (RooWorkspace*) file->Get(workspaceName);
123  if(!w){
124  cout <<"workspace not found" << endl;
125  return;
126  }
127 
128  // get the modelConfig out of the file
129  ModelConfig* mc = (ModelConfig*) w->obj(modelConfigName);
130 
131  // get the modelConfig out of the file
132  RooAbsData* data = w->data(dataName);
133 
134  // make sure ingredients are found
135  if(!data || !mc){
136  w->Print();
137  cout << "data or ModelConfig was not found" <<endl;
138  return;
139  }
140 
141  // ------------------------------------------
142  // create and use the BayesianCalculator
143  // to find and plot the 95% credible interval
144  // on the parameter of interest as specified
145  // in the model config
146 
147  // before we do that, we must specify our prior
148  // it belongs in the model config, but it may not have
149  // been specified
150  RooUniform prior("prior","",*mc->GetParametersOfInterest());
151  w->import(prior);
152  mc->SetPriorPdf(*w->pdf("prior"));
153 
154  // do without systematics
155  //mc->SetNuisanceParameters(RooArgSet() );
156  if (nSigmaNuisance > 0) {
157  RooAbsPdf * pdf = mc->GetPdf();
158  assert(pdf);
159  RooFitResult * res = pdf->fitTo(*data, Save(true), Minimizer(ROOT::Math::MinimizerOptions::DefaultMinimizerType().c_str()), Hesse(true),
161 
162  res->Print();
163  RooArgList nuisPar(*mc->GetNuisanceParameters());
164  for (int i = 0; i < nuisPar.getSize(); ++i) {
165  RooRealVar * v = dynamic_cast<RooRealVar*> (&nuisPar[i] );
166  assert( v);
167  v->setMin( TMath::Max( v->getMin(), v->getVal() - nSigmaNuisance * v->getError() ) );
168  v->setMax( TMath::Min( v->getMax(), v->getVal() + nSigmaNuisance * v->getError() ) );
169  std::cout << "setting interval for nuisance " << v->GetName() << " : [ " << v->getMin() << " , " << v->getMax() << " ]" << std::endl;
170  }
171  }
172 
173 
174  BayesianCalculator bayesianCalc(*data,*mc);
175  bayesianCalc.SetConfidenceLevel(confLevel); // 95% interval
176 
177  // default of the calculator is central interval. here use shortest , central or upper limit depending on input
178  // doing a shortest interval might require a longer time since it requires a scan of the posterior function
179  if (intervalType == 0) bayesianCalc.SetShortestInterval(); // for shortest interval
180  if (intervalType == 1) bayesianCalc.SetLeftSideTailFraction(0.5); // for central interval
181  if (intervalType == 2) bayesianCalc.SetLeftSideTailFraction(0.); // for upper limit
182 
183  if (!integrationType.IsNull() ) {
184  bayesianCalc.SetIntegrationType(integrationType); // set integrationType
185  bayesianCalc.SetNumIters(nToys); // set number of iterations (i.e. number of toys for MC integrations)
186  }
187 
188  // in case of toyMC make a nuisance pdf
189  if (integrationType.Contains("TOYMC") ) {
190  RooAbsPdf * nuisPdf = RooStats::MakeNuisancePdf(*mc, "nuisance_pdf");
191  cout << "using TOYMC integration: make nuisance pdf from the model " << std::endl;
192  nuisPdf->Print();
193  bayesianCalc.ForceNuisancePdf(*nuisPdf);
194  scanPosterior = true; // for ToyMC the posterior is scanned anyway so used given points
195  }
196 
197  // compute interval by scanning the posterior function
198  if (scanPosterior)
199  bayesianCalc.SetScanOfPosterior(nScanPoints);
200 
202  if (maxPOI != -999 && maxPOI > poi->getMin())
203  poi->setMax(maxPOI);
204 
205 
206  SimpleInterval* interval = bayesianCalc.GetInterval();
207 
208  // print out the interval on the first Parameter of Interest
209  cout << "\n>>>> RESULT : " << confLevel*100 << "% interval on " << poi->GetName()<<" is : ["<<
210  interval->LowerLimit() << ", "<<
211  interval->UpperLimit() <<"] "<<endl;
212 
213 
214  // make a plot
215  // since plotting may take a long time (it requires evaluating
216  // the posterior in many points) this command will speed up
217  // by reducing the number of points to plot - do 50
218 
219  // ignore errors of PDF if is zero
221 
222 
223  cout << "\nDrawing plot of posterior function....." << endl;
224 
225  // always plot using numer of scan points
226  bayesianCalc.SetScanOfPosterior(nScanPoints);
227 
228  RooPlot * plot = bayesianCalc.GetPosteriorPlot();
229  plot->Draw();
230 
231 }
virtual void Print(Option_t *options=0) const
Print TNamed name and title.
Definition: RooFitResult.h:65
virtual Bool_t AccessPathName(const char *path, EAccessMode mode=kFileExists)
Returns FALSE if one can access a file using the specified access mode.
Definition: TSystem.cxx:1265
ModelConfig is a simple class that holds configuration information specifying how a model should be u...
Definition: ModelConfig.h:52
RooAbsPdf * GetPdf() const
get model PDF (return NULL if pdf has not been specified or does not exist)
Definition: ModelConfig.h:244
RooAbsPdf * MakeNuisancePdf(RooAbsPdf &pdf, const RooArgSet &observables, const char *name)
RooAbsData * data(const char *name) const
Retrieve dataset (binned or unbinned) with given name. A null pointer is returned if not found...
RooCmdArg PrintLevel(Int_t code)
A ROOT file is a suite of consecutive data records (TKey instances) with a well defined format...
Definition: TFile.h:50
virtual TObject * Get(const char *namecycle)
Return pointer to object identified by namecycle.
#define gROOT
Definition: TROOT.h:364
Basic string class.
Definition: TString.h:137
virtual Double_t getMin(const char *name=0) const
Short_t Min(Short_t a, Short_t b)
Definition: TMathBase.h:170
const RooArgSet * GetNuisanceParameters() const
get RooArgSet containing the nuisance parameters (return NULL if not existing)
Definition: ModelConfig.h:250
void setMax(const char *name, Double_t value)
Set maximum of name range to given value.
Definition: RooRealVar.cxx:418
static TFile * Open(const char *name, Option_t *option="", const char *ftitle="", Int_t compress=1, Int_t netopt=0)
Create / open a file.
Definition: TFile.cxx:3871
RooAbsArg * first() const
static void setEvalErrorLoggingMode(ErrorLoggingMode m)
Set evaluation error logging mode.
virtual void Print(Option_t *options=0) const
Print TNamed name and title.
Definition: RooAbsArg.h:227
RooAbsPdf * pdf(const char *name) const
Retrieve p.d.f (RooAbsPdf) with given name. A null pointer is returned if not found.
virtual Double_t LowerLimit()
Double_t getVal(const RooArgSet *set=0) const
Definition: RooAbsReal.h:64
RooRealVar represents a fundamental (non-derived) real valued object.
Definition: RooRealVar.h:37
R__EXTERN TSystem * gSystem
Definition: TSystem.h:549
static const std::string & DefaultMinimizerType()
SVector< double, 2 > v
Definition: Dict.h:5
RooCmdArg Minimizer(const char *type, const char *alg=0)
void setMin(const char *name, Double_t value)
Set minimum of name range to given value.
Definition: RooRealVar.cxx:388
virtual const char * GetName() const
Returns name of object.
Definition: TNamed.h:51
RooAbsData is the common abstract base class for binned and unbinned datasets.
Definition: RooAbsData.h:37
Bool_t IsNull() const
Definition: TString.h:387
Flat p.d.f.
Definition: RooUniform.h:24
A RooPlot is a plot frame and a container for graphics objects within that frame. ...
Definition: RooPlot.h:41
Namespace for the RooStats classes.
Definition: Asimov.h:20
RooCmdArg Hesse(Bool_t flag=kTRUE)
virtual void SetPriorPdf(const RooAbsPdf &pdf)
Set the Prior Pdf, add to the the workspace if not already there.
Definition: ModelConfig.h:103
TObject * obj(const char *name) const
Return any type of object (RooAbsArg, RooAbsData or generic object) with given name) ...
virtual Double_t getMax(const char *name=0) const
RooCmdArg Save(Bool_t flag=kTRUE)
virtual Double_t UpperLimit()
RooAbsPdf is the abstract interface for all probability density functions The class provides hybrid a...
Definition: RooAbsPdf.h:41
Bool_t import(const RooAbsArg &arg, const RooCmdArg &arg1=RooCmdArg(), const RooCmdArg &arg2=RooCmdArg(), const RooCmdArg &arg3=RooCmdArg(), const RooCmdArg &arg4=RooCmdArg(), const RooCmdArg &arg5=RooCmdArg(), const RooCmdArg &arg6=RooCmdArg(), const RooCmdArg &arg7=RooCmdArg(), const RooCmdArg &arg8=RooCmdArg(), const RooCmdArg &arg9=RooCmdArg())
Import a RooAbsArg object, e.g.
Definition: file.py:1
Short_t Max(Short_t a, Short_t b)
Definition: TMathBase.h:202
void Print(Option_t *opts=0) const
Print contents of the workspace.
Bool_t Contains(const char *pat, ECaseCompare cmp=kExact) const
Definition: TString.h:567
virtual RooFitResult * fitTo(RooAbsData &data, const RooCmdArg &arg1=RooCmdArg::none(), const RooCmdArg &arg2=RooCmdArg::none(), const RooCmdArg &arg3=RooCmdArg::none(), const RooCmdArg &arg4=RooCmdArg::none(), const RooCmdArg &arg5=RooCmdArg::none(), const RooCmdArg &arg6=RooCmdArg::none(), const RooCmdArg &arg7=RooCmdArg::none(), const RooCmdArg &arg8=RooCmdArg::none())
Fit PDF to given dataset.
Definition: RooAbsPdf.cxx:1056
const RooArgSet * GetParametersOfInterest() const
get RooArgSet containing the parameter of interest (return NULL if not existing)
Definition: ModelConfig.h:247
BayesianCalculator is a concrete implementation of IntervalCalculator, providing the computation of a...
The RooWorkspace is a persistable container for RooFit projects.
Definition: RooWorkspace.h:42
virtual void Draw(Option_t *options=0)
Draw this plot and all of the elements it contains.
Definition: RooPlot.cxx:559
Double_t getError() const
Definition: RooRealVar.h:54