Logo ROOT  
Reference Guide
 
Loading...
Searching...
No Matches
StandardHypoTestDemo.py
Go to the documentation of this file.
1# \file
2# \ingroup tutorial_roostats
3# \notebook
4# Standard tutorial macro for hypothesis test (for computing the discovery significance) using all
5# RooStats hypothesis tests calculators and test statistics.
6#
7# Usage:
8#
9# ~~~{.cpp}
10# root>.L StandardHypoTestDemo.C
11# root> StandardHypoTestDemo("fileName","workspace name","S+B modelconfig name","B model name","data set
12# name",calculator type, test statistic type, number of toys)
13#
14# type = 0 Freq calculator
15# type = 1 Hybrid calculator
16# type = 2 Asymptotic calculator
17# type = 3 Asymptotic calculator using nominal Asimov data sets (not using fitted parameter values but nominal ones)
18#
19# testStatType = 0 LEP
20# = 1 Tevatron
21# = 2 Profile Likelihood
22# = 3 Profile Likelihood one sided (i.e. = 0 if mu_hat < 0)
23# ~~~
24#
25# \macro_image
26# \macro_output
27# \macro_code
28#
29# \author Lorenzo Moneta (C++ version), and P. P. (Python translation)
30
31from warnings import warn
32from gc import collect
33import numpy as np
34
35import ROOT
36
37
38class Struct:
39 def __init__(self, **entries):
40 self.__dict__.update(entries)
41
42
43HypoTestOptions = Struct(
44 # force all systematics to be off (i.e. set all nuisance parameters as constant)
45 noSystematics=False, # ratio Ntoys Null/ntoys ALT
46 nToysRatio=4, # change poi snapshot value for S+B model (needed for expected p0 values)
47 poiValue=-1,
48 printLevel=0,
49 generateBinned=False, # for binned generation
50 enableDetailedOutput=True, # for detailed output
51)
52
53optHT = HypoTestOptions
54
55
57 infile="",
58 workspaceName="combined",
59 modelSBName="ModelConfig",
60 modelBName="", # 0 freq, 1 hybrid, 2 asymptotic \
61 dataName="obsData",
62 calcType=0, # 0 LEP, 1 TeV, 2 LHC, 3 LHC - one sided \
63 testStatType=3,
64 ntoys=5000,
65 useNC=False,
66 nuisPriorName=0,
67):
68
69 noSystematics = optHT.noSystematics
70 nToysRatio = optHT.nToysRatio # ratio Ntoys Null/ntoys ALT
71 poiValue = optHT.poiValue # change poi snapshot value for S+B model (needed for expected p0 values)
72 printLevel = optHT.printLevel
73 generateBinned = optHT.generateBinned # for binned generation
74 enableDetOutput = optHT.enableDetailedOutput
75
76 # Other Parameter to pass in tutorial
77 # apart from standard for filename, ws, modelconfig and data
78
79 # type = 0 Freq calculator
80 # type = 1 Hybrid calculator
81 # type = 2 Asymptotic calculator
82
83 # testStatType = 0 LEP
84 # = 1 Tevatron
85 # = 2 Profile Likelihood
86 # = 3 Profile Likelihood one sided (i.e. = 0 if mu < mu_hat)
87
88 # ntoys: number of toys to use
89
90 # useNumberCounting: set to True when using number counting events
91
92 # nuisPriorName: name of prior for the nuisance. This is often expressed as constraint term in the global model
93 # It is needed only when using the HybridCalculator (type=1)
94 # If not given by default the prior pdf from ModelConfig is used.
95
96 # extra options are available as global parameters of the macro. They major ones are:
97
98 # generateBinned generate binned data sets for toys (default is false) - be careful not to activate with
99 # a too large (>=3) number of observables
100 # nToyRatio ratio of S+B/B toys (default is 2)
101 # printLevel
102
103 # disable - can cause some problems
104 # ToyMCSampler::SetAlwaysUseMultiGen(True);
105
106 ROOT.RooStats.SimpleLikelihoodRatioTestStat.SetAlwaysReuseNLL(True)
107 ROOT.RooStats.ProfileLikelihoodTestStat.SetAlwaysReuseNLL(True)
108 ROOT.RooStats.RatioOfProfiledLikelihoodsTestStat.SetAlwaysReuseNLL(True)
109
110 # RooRandom::randomGenerator()->SetSeed(0);
111
112 # to change minimizers
113 # ~~~{.bash}
114 # ROOT::Math::MinimizerOptions::SetDefaultStrategy(0);
115 # ROOT::Math::MinimizerOptions::SetDefaultMinimizer("Minuit2");
116 # ROOT::Math::MinimizerOptions::SetDefaultTolerance(1);
117 # ~~~
118
119 # -------------------------------------------------------
120 # First part is just to access a user-defined file
121 # or create the standard example file if it doesn't exist
122 filename = ""
123 if infile == "":
124 filename = "results/example_combined_GaussExample_model.root"
125 fileExist = not ROOT.gSystem.AccessPathName(filename, ROOT.kFileExists) # reverse-convention 'not'
126 # if file does not exists generate with histfactory
127 if not fileExist:
128 # Normally this would be run on the command line
129 print("will run standard hist2workspace example")
130 ROOT.gROOT.ProcessLine(".! prepareHistFactory .")
131 ROOT.gROOT.ProcessLine(".! hist2workspace config/example.xml")
132 print("\n\n---------------------")
133 print("Done creating example input")
134 print("---------------------\n\n")
135
136 else:
137 filename = infile
138
139 # Try to open the file
140 file = ROOT.TFile.Open(filename)
141
142 # if input file was specified but not found, quit
143 if not file:
144 print(f"StandardRooStatsDemoMacro: Input file {filename} is not found")
145 return
146
147 # -------------------------------------------------------
148 # Tutorial starts here
149 # -------------------------------------------------------
150
151 # get the workspace out of the file
152 w = file.Get(workspaceName)
153 if not w:
154 print(f"workspace not found")
155 return
156
157 w.Print()
158
159 # get the modelConfig out of the file
160 sbModel = w.obj(modelSBName)
161
162 # get the modelConfig out of the file
163 data = w.data(dataName)
164
165 # make sure ingredients are found
166 if not data or not sbModel:
167 w.Print()
168 print(f"data or ModelConfig was not found")
169 return
170
171 # make b model
172 bModel = w.obj(modelBName)
173
174 # case of no systematics
175 # remove nuisance parameters from model
176 if noSystematics:
177 nuisPar = sbModel.GetNuisanceParameters()
178 if nuisPar and nuisPar.getSize() > 0:
179 print(f"StandardHypoTestInvDemo")
180 print(" - Switch off all systematics by setting them constant to their initial values")
182
183 if bModel:
184 bnuisPar = bModel.GetNuisanceParameters()
185 if bnuisPar:
187
188 if not bModel:
189 ROOT.Info("StandardHypoTestInvDemo", "The background model {} does not exist".format(modelBName))
190 ROOT.Info("StandardHypoTestInvDemo", "Copy it from ModelConfig {} and set POI to zero".format(modelSBName))
191 bModel = sbModel.Clone()
192 bModel.SetName(modelSBName + "B_only")
193 var = bModel.GetParametersOfInterest().first()
194 if not var:
195 return
196 oldval = var.getVal()
197 var.setVal(0)
198 # bModel->SetSnapshot( RooArgSet(*var, *w->var("lumi")) );
199 bModel.SetSnapshot(ROOT.RooArgSet(var))
200 var.setVal(oldval)
201
202 if not sbModel.GetSnapshot() or (poiValue > 0):
203 ROOT.Info("StandardHypoTestDemo", "Model {} has no snapshot - make one using model poi".format(modelSBName))
204 var = sbModel.GetParametersOfInterest().first()
205 if not var:
206 return
207 oldval = var.getVal()
208 if poiValue > 0:
209 var.setVal(poiValue)
210 # sbModel->SetSnapshot( RooArgSet(*var, *w->var("lumi") ) );
211 sbModel.SetSnapshot(ROOT.RooArgSet(var))
212 if poiValue > 0:
213 var.setVal(oldval)
214 # sbModel->SetSnapshot( *sbModel->GetParametersOfInterest() );
215
216 # part 1, hypothesis testing
217 slrts = ROOT.RooStats.SimpleLikelihoodRatioTestStat(bModel.GetPdf(), sbModel.GetPdf())
218 # null parameters must include snapshot of poi plus the nuisance values
219 nullParams = ROOT.RooArgSet(bModel.GetSnapshot())
220 if bModel.GetNuisanceParameters():
221 nullParams.add(bModel.GetNuisanceParameters())
222
223 slrts.SetNullParameters(nullParams)
224 altParams = ROOT.RooArgSet(sbModel.GetSnapshot())
225 if sbModel.GetNuisanceParameters():
226 altParams.add(sbModel.GetNuisanceParameters())
227 slrts.SetAltParameters(altParams)
228
229 profll = ROOT.RooStats.ProfileLikelihoodTestStat(bModel.GetPdf())
230
231 ropl = ROOT.RooStats.RatioOfProfiledLikelihoodsTestStat(bModel.GetPdf(), sbModel.GetPdf(), sbModel.GetSnapshot())
232 ropl.SetSubtractMLE(False)
233
234 if testStatType == 3:
235 profll.SetOneSidedDiscovery(1)
236 profll.SetPrintLevel(printLevel)
237
238 if enableDetOutput:
239 slrts.EnableDetailedOutput()
240 profll.EnableDetailedOutput()
241 ropl.EnableDetailedOutput()
242
243 # profll.SetReuseNLL(mOptimize);
244 # slrts.SetReuseNLL(mOptimize);
245 # ropl.SetReuseNLL(mOptimize);
246
247 ROOT.RooStats.AsymptoticCalculator.SetPrintLevel(printLevel)
248
249 # hypoCalc = HypoTestCalculatorGeneric.__smartptr__ # unnecessary
250 # note here Null is B and Alt is S+B
251 if calcType == 0:
252 hypoCalc = ROOT.RooStats.FrequentistCalculator(data, sbModel, bModel)
253 elif calcType == 1:
254 hypoCalc = ROOT.RooStats.HybridCalculator(data, sbModel, bModel)
255 elif calcType == 2:
256 hypoCalc = ROOT.RooStats.AsymptoticCalculator(data, sbModel, bModel)
257
258 if calcType == 0:
259 hypoCalc.SetToys(ntoys, int(ntoys / nToysRatio))
260 if enableDetOutput:
261 (hypoCalc).StoreFitInfo(True)
262
263 elif calcType == 1:
264 hypoCalc.SetToys(ntoys, ntoys / nToysRatio)
265 # n. a. yetif (enableDetOutput) : ((HybridCalculator*) hypoCalc)->StoreFitInfo(True);
266
267 elif calcType == 2:
268 if testStatType == 3:
269 hypoCalc.SetOneSidedDiscovery(True)
270 elif testStatType != 2 and testStatType != 3:
271 warn(
272 "StandardHypoTestDemo",
273 "Only the PL test statistic can be used with AsymptoticCalculator - use by default a two-sided PL",
274 )
275
276 # check for nuisance prior pdf in case of nuisance parameters
277 if calcType == 1 and (bModel.GetNuisanceParameters() or sbModel.GetNuisanceParameters()):
278 # nuisPdf = 0
279 if nuisPriorName:
280 nuisPdf = w.pdf(nuisPriorName)
281 # use prior defined first in bModel (then in SbModel)
282 if not nuisPdf:
283 ROOT.Info(
284 "StandardHypoTestDemo",
285 "No nuisance pdf given for the HybridCalculator - try to deduce pdf from the model",
286 )
287 if bModel.GetPdf() and bModel.GetObservables():
288 nuisPdf = ROOT.RooStats.MakeNuisancePdf(bModel, "nuisancePdf_bmodel")
289 else:
290 nuisPdf = ROOT.RooStats.MakeNuisancePdf(sbModel, "nuisancePdf_sbmodel")
291
292 if not nuisPdf:
293 if bModel.GetPriorPdf():
294 nuisPdf = bModel.GetPriorPdf()
295 ROOT.Info(
296 "StandardHypoTestDemo",
297 "No nuisance pdf given - try to use %s that is defined as a prior pdf in the B model",
298 nuisPdf.GetName(),
299 )
300 else:
301 Error(
302 "StandardHypoTestDemo",
303 "Cannot run Hybrid calculator because no prior on the nuisance parameter is "
304 "specified or can be derived",
305 )
306 return
307
308 assert nuisPdf
309 ROOT.Info("StandardHypoTestDemo", "Using as nuisance Pdf ... ")
310 nuisPdf.Print()
311
312 nuisParams = (
313 bModel.GetNuisanceParameters() if bModel.GetNuisanceParameters() else sbModel.GetNuisanceParameters()
314 )
315 nuisParsInPdf = nuisPdf.getObservables(nuisParams)
316 if nuisParsInPdf.getSize() == 0:
317 warn(
318 "StandardHypoTestDemo",
319 "Prior nuisance does not depend on nuisance parameters. They will be smeared in their full range",
320 )
321
322 # HybridCalculator
323 hypoCalc.ForcePriorNuisanceAlt(nuisPdf)
324 hypoCalc.ForcePriorNuisanceNull(nuisPdf)
325
326 # hypoCalc->ForcePriorNuisanceAlt(*sbModel->GetPriorPdf());
327 # hypoCalc->ForcePriorNuisanceNull(*bModel->GetPriorPdf());
328
329 sampler = hypoCalc.GetTestStatSampler()
330
331 if sampler and (calcType == 0 or calcType == 1):
332
333 # look if pdf is number counting or extended
334 if sbModel.GetPdf().canBeExtended():
335 if useNC:
336 warn("StandardHypoTestDemo", "Pdf is extended: but number counting flag is set: ignore it ")
337 else:
338 # for not extended pdf
339 if not useNC:
340 nEvents = int(data.numEntries())
341 ROOT.Info(
342 "StandardHypoTestDemo",
343 "Pdf is not extended: number of events to generate taken from observed data set is {nEvents}",
344 )
345 sampler.SetNEventsPerToy(nEvents)
346 else:
347 ROOT.Info("StandardHypoTestDemo", "using a number counting pdf")
348 sampler.SetNEventsPerToy(1)
349
350 if data.isWeighted() and not generateBinned:
351 msgfmt = "Data set is weighted, nentries = {} and sum of weights = {:8.1f} but toy ".format(
352 data.numEntries(), data.sumEntries()
353 )
354 msgfmt += "generation is unbinned - it would be faster to set generateBinned to True\n"
355
356 ROOT.Info("StandardHypoTestDemo", msgfmt)
357
358 if generateBinned:
359 sampler.SetGenerateBinned(generateBinned)
360
361 # set the test statistic
362 if testStatType == 0:
363 sampler.SetTestStatistic(slrts)
364 if testStatType == 1:
365 sampler.SetTestStatistic(ropl)
366 if testStatType == 2 or testStatType == 3:
367 sampler.SetTestStatistic(profll)
368
369 htr = hypoCalc.GetHypoTest()
370 htr.SetPValueIsRightTail(True)
371 htr.SetBackgroundAsAlt(False)
372 htr.Print() # how to get meaningful CLs at this point?
373
374 del sampler
375 del slrts
376 del ropl
377 del profll
378 collect() # Trigger the garbage collector gc.collector()
379
380 if calcType != 2:
381 c1 = ROOT.TCanvas("myc1", "myc1")
382 plot = ROOT.RooStats.HypoTestPlot(htr, 100)
383 plot.SetLogYaxis(True)
384 plot.Draw()
385 c1.Update()
386 c1.Draw()
387 c1.SaveAs("StandardHypoTestDemo.png")
388 else:
389 print("Asymptotic results ")
390
391 # look at expected significances
392 # found median of S+B distribution
393 if calcType != 2:
394
395 altDist = htr.GetAltDistribution()
396 htExp = ROOT.RooStats.HypoTestResult("Expected Result")
397 htExp.Append(htr)
398 # find quantiles in alt (S+B) distribution
399 p = np.array([i for i in range(5)], dtype=float)
400 q = np.array([0.5 for i in range(5)], dtype=float)
401 for i in range(5):
402 sig = -2 + i
403 p[i] = ROOT.Math.normal_cdf(sig, 1)
404
405 values = altDist.GetSamplingDistribution()
406 ROOT.TMath.Quantiles(values.size(), 5, values.data(), q, p, False)
407
408 for i in range(5):
409 htExp.SetTestStatisticData(q[i])
410 sig = -2 + i
411 print(
412 " Expected p -value and significance at ",
413 sig,
414 " sigma = ",
415 htExp.NullPValue(),
416 " significance ",
417 htExp.Significance(),
418 "sigma ",
419 )
420
421 else:
422 # case of asymptotic calculator
423 for i in range(5):
424 sig = -2 + i
425 # sigma is inverted here
426 pval = ROOT.RooStats.AsymptoticCalculator.GetExpectedPValues(
427 htr.NullPValue(), htr.AlternatePValue(), -sig, False
428 )
429 print(
430 " Expected p -value and significance at ",
431 sig,
432 " sigma = ",
433 pval,
434 " significance ",
436 " sigma ",
437 )
438
439 # write result in a file in case of toys
440 writeResult = calcType != 2
441
442 if enableDetOutput:
443 writeResult = True
444 ROOT.Info("StandardHypoTestDemo", "Detailed output will be written in output result file")
445
446 if htr != ROOT.kNone and writeResult:
447
448 # write to a file the results
449 calcTypeName = "Freq" if (calcType == 0) else ("Hybr" if (calcType == 1) else ("Asym"))
450 resultFileName = ROOT.TString.Format("{}_HypoTest_ts{}_".format(calcTypeName, testStatType))
451 # strip the / from the filename
452
453 name = ROOT.TString(infile)
454 name.Replace(0, name.Last("/") + 1, "")
455 resultFileName += name
456
457 fileOut = ROOT.TFile(str(resultFileName), "RECREATE")
458
459 htr.Write()
460
461 ROOT.Info(
462 "StandardHypoTestDemo", "HypoTestResult has been written in the file {}".format(resultFileName.Data())
463 )
464
465 fileOut.Close()
466
467
468# Preparing Running ...
469infile = ""
470workspaceName = "combined"
471modelSBName = "ModelConfig"
472modelBName = ""
473# 0 freq, 1 hybrid, 2 asymptotic
474dataName = "obsData"
475calcType = 0
476# 0 LEP, 1 TeV 2 LHC, 3 LHC - one sided
477testStatType = 3
478ntoys = 5000
479useNC = False
480nuisPriorName = 0
481# Running ...
483 infile, workspaceName, modelSBName, modelBName, dataName, calcType, testStatType, ntoys, useNC, nuisPriorName
484)
static void update(gsl_integration_workspace *workspace, double a1, double b1, double area1, double error1, double a2, double b2, double area2, double error2)
void Error(const char *location, const char *msgfmt,...)
Use this function in case an error occurred.
Definition TError.cxx:185
Option_t Option_t TPoint TPoint const char GetTextMagnitude GetFillStyle GetLineColor GetLineWidth GetMarkerStyle GetTextAlign GetTextColor GetTextSize void char Point_t Rectangle_t WindowAttributes_t Float_t Float_t Float_t Int_t Int_t UInt_t UInt_t Rectangle_t Int_t Int_t Window_t TString Int_t GCValues_t GetPrimarySelectionOwner GetDisplay GetScreen GetColormap GetNativeEvent const char const char dpyName wid window const char font_name cursor keysym reg const char only_if_exist regb h Point_t winding char text const char depth char const char Int_t count const char ColorStruct_t color const char Pixmap_t Pixmap_t PictureAttributes_t attr const char char ret_data h unsigned char height h Atom_t Int_t ULong_t ULong_t unsigned char prop_list Atom_t Atom_t Atom_t Time_t format
double normal_cdf(double x, double sigma=1, double x0=0)
Cumulative distribution function of the normal (Gaussian) distribution (lower tail).
double normal_quantile_c(double z, double sigma)
Inverse ( ) of the cumulative distribution function of the upper tail of the normal (Gaussian) distri...
bool SetAllConstant(const RooAbsCollection &coll, bool constant=true)
utility function to set all variable constant in a collection (from G.