Logo ROOT  
Reference Guide
 
Loading...
Searching...
No Matches
TLimit.cxx
Go to the documentation of this file.
1// @(#)root/hist:$Id$
2// Author: Christophe.Delaere@cern.ch 21/08/2002
3
4///////////////////////////////////////////////////////////////////////////
5//
6// TLimit
7//
8// Class to compute 95% CL limits
9//
10// adapted from the mclimit code from Tom Junk (CLs method)
11// see http://root.cern/root/doc/TomJunk.pdf
12// see http://cern.ch/thomasj/searchlimits/ecl.html
13// see: Tom Junk,NIM A434, p. 435-443, 1999
14//
15// see also the following interesting references:
16// Alex Read, "Presentation of search results: the CLs technique"
17// Journal of Physics G: Nucl. Part. Phys. 28 2693-2704 (2002).
18// http://www.iop.org/EJ/abstract/0954-3899/28/10/313/
19//
20// A nice article is also available in the CERN yellow report with the proceeding
21// of the 2000 CERN workshop on confidence intervals.
22//
23// Alex Read, "Modified Frequentist Analysis of Search Results (The CLs Method)"
24// CERN 2000-005 (30 May 2000)
25//
26///////////////////////////////////////////////////////////////////////////
27
28/** \class TLimit
29 \legacy{TLimit, Consider switching to RooStats.}
30 \ingroup Hist
31 Algorithm to compute 95% CL limits using the Likelihood ratio semi-bayesian method.
32
33 Implemented by C. Delaere from the mclimit code written by Tom Junk [HEP-EX/9902006].
34 See [http://cern.ch/thomasj/searchlimits/ecl.html](http://cern.ch/thomasj/searchlimits/ecl.html) for more details.
35
36 It takes signal, background and data histograms wrapped in a
37 TLimitDataSource as input and runs a set of Monte Carlo experiments in
38 order to compute the limits. If needed, inputs are fluctuated according
39 to systematics. The output is a TConfidenceLevel.
40
41 The class TLimitDataSource takes the signal, background and data histograms as well as different
42 systematics sources to form the TLimit input.
43
44 The class TConfidenceLevel represents the final result of the TLimit algorithm. It is created just after the
45 time-consuming part and can be stored in a TFile for further processing.
46 It contains light methods to return CLs, CLb and other interesting
47 quantities.
48
49 The actual algorithm...
50
51 From an input (TLimitDataSource) it produces an output TConfidenceLevel.
52 For this, nmc Monte Carlo experiments are performed.
53 As usual, the larger this number, the longer the compute time,
54 but the better the result.
55
56 Supposing that there is a plotfile.root file containing 3 histograms
57 (signal, background and data), you can imagine doing things like:
58
59~~~{.cpp}
60 TFile* infile=new TFile("plotfile.root","READ");
61 infile->cd();
62 TH1* sh=(TH1*)infile->Get("signal");
63 TH1* bh=(TH1*)infile->Get("background");
64 TH1* dh=(TH1*)infile->Get("data");
65 TLimitDataSource* mydatasource = new TLimitDataSource(sh,bh,dh);
66 TConfidenceLevel *myconfidence = TLimit::ComputeLimit(mydatasource,50000);
67 std::cout << " CLs : " << myconfidence->CLs() << std::endl;
68 std::cout << " CLsb : " << myconfidence->CLsb() << std::endl;
69 std::cout << " CLb : " << myconfidence->CLb() << std::endl;
70 std::cout << "< CLs > : " << myconfidence->GetExpectedCLs_b() << std::endl;
71 std::cout << "< CLsb > : " << myconfidence->GetExpectedCLsb_b() << std::endl;
72 std::cout << "< CLb > : " << myconfidence->GetExpectedCLb_b() << std::endl;
73 delete myconfidence;
74 delete mydatasource;
75 infile->Close();
76~~~
77 More information can still be found on [this page](http://cern.ch/aleph-proj-alphapp/doc/tlimit.html)
78 \see https://doi.org/10.1088/0954-3899/28/10/313, https://cds.cern.ch/record/451614/files/open-2000-205.pdf
79 \note see note about: "Should I use TRolke, TFeldmanCousins, TLimit?" in the TRolke class description.
80 */
81
82#include "TLimit.h"
83#include "TArrayD.h"
84#include "TVectorD.h"
85#include "TOrdCollection.h"
86#include "TConfidenceLevel.h"
87#include "TLimitDataSource.h"
88#include "TRandom3.h"
89#include "TH1.h"
90#include "TObjArray.h"
91#include "TMath.h"
92#include "TIterator.h"
93#include "TObjString.h"
94#include "TClassTable.h"
95#include "Riostream.h"
96
97
100
102 Int_t nmc, bool stat,
104{
105 // The final object returned...
107 // The random generator used...
109 // Compute some total quantities on all the channels
110 Int_t maxbins = 0;
111 Double_t nsig = 0;
112 Double_t nbg = 0;
113 Int_t ncand = 0;
114 Int_t i;
115 for (i = 0; i <= data->GetSignal()->GetLast(); i++) {
116 maxbins = (((TH1 *) (data->GetSignal()->At(i)))->GetNbinsX() + 2) > maxbins ?
117 (((TH1 *) (data->GetSignal()->At(i)))->GetNbinsX() + 2) : maxbins;
118 nsig += ((TH1 *) (data->GetSignal()->At(i)))->Integral();
119 nbg += ((TH1 *) (data->GetBackground()->At(i)))->Integral();
120 ncand += (Int_t) ((TH1 *) (data->GetCandidates()->At(i)))->Integral();
121 }
122 result->SetBtot(nbg);
123 result->SetStot(nsig);
124 result->SetDtot(ncand);
125 Double_t buffer = 0;
126 fgTable->Set(maxbins * (data->GetSignal()->GetLast() + 1));
127 for (Int_t channel = 0; channel <= data->GetSignal()->GetLast(); channel++)
128 for (Int_t bin = 0;
129 bin <= ((TH1 *) (data->GetSignal()->At(channel)))->GetNbinsX()+1;
130 bin++) {
131 Double_t s = (Double_t) ((TH1 *) (data->GetSignal()->At(channel)))->GetBinContent(bin);
132 Double_t b = (Double_t) ((TH1 *) (data->GetBackground()->At(channel)))->GetBinContent(bin);
133 Double_t d = (Double_t) ((TH1 *) (data->GetCandidates()->At(channel)))->GetBinContent(bin);
134 // Compute the value of the "-2lnQ" for the actual data
135 if ((b == 0) && (s > 0)) {
136 std::cout << "WARNING: Ignoring bin " << bin << " of channel "
137 << channel << " which has s=" << s << " but b=" << b << std::endl;
138 std::cout << " Maybe the MC statistic has to be improved..." << std::endl;
139 }
140 if ((s > 0) && (b > 0))
141 buffer += LogLikelihood(s, b, b, d);
142 // precompute the log(1+s/b)'s in an array to speed up computation
143 // background-free bins are set to have a maximum t.s. value
144 // for protection (corresponding to s/b of about 5E8)
145 if ((s > 0) && (b > 0))
146 fgTable->AddAt(LogLikelihood(s, b, b, 1), (channel * maxbins) + bin);
147 else if ((s > 0) && (b == 0))
148 fgTable->AddAt(20, (channel * maxbins) + bin);
149 }
150 result->SetTSD(buffer);
151 // accumulate MC experiments. Hold the test statistic function fixed, but
152 // fluctuate s and b within syst. errors for computing probabilities of
153 // having that outcome. (Alex Read's prescription -- errors are on the ensemble,
154 // not on the observed test statistic. This technique does not split outcomes.)
155 // keep the tstats as sum log(1+s/b). convert to -2lnQ when preparing the results
156 // (reason -- like to keep the < signs right)
157 Double_t *tss = new Double_t[nmc];
158 Double_t *tsb = new Double_t[nmc];
159 Double_t *lrs = new Double_t[nmc];
160 Double_t *lrb = new Double_t[nmc];
163 for (i = 0; i < nmc; i++) {
164 tss[i] = 0;
165 tsb[i] = 0;
166 lrs[i] = 0;
167 lrb[i] = 0;
168 // fluctuate signal and background
170 // make an independent set of fluctuations for reweighting pseudoexperiments
171 // it turns out using the same fluctuated ones for numerator and denominator
172 // is biased.
174
175 for (Int_t channel = 0;
176 channel <= fluctuated->GetSignal()->GetLast(); channel++) {
177 for (Int_t bin = 0;
178 bin <=((TH1 *) (fluctuated->GetSignal()->At(channel)))->GetNbinsX()+1;
179 bin++) {
180 if ((Double_t) ((TH1 *) (fluctuated->GetSignal()->At(channel)))->GetBinContent(bin) != 0) {
181 // s+b hypothesis
182 Double_t rate = (Double_t) ((TH1 *) (fluctuated->GetSignal()->At(channel)))->GetBinContent(bin) +
183 (Double_t) ((TH1 *) (fluctuated->GetBackground()->At(channel)))->GetBinContent(bin);
184 Double_t rand = myrandom->Poisson(rate);
185 tss[i] += rand * fgTable->At((channel * maxbins) + bin);
186 Double_t s = (Double_t) ((TH1 *) (fluctuated->GetSignal()->At(channel)))->GetBinContent(bin);
187 Double_t s2= (Double_t) ((TH1 *) (fluctuated2->GetSignal()->At(channel)))->GetBinContent(bin);
188 Double_t b = (Double_t) ((TH1 *) (fluctuated->GetBackground()->At(channel)))->GetBinContent(bin);
189 Double_t b2= (Double_t) ((TH1 *) (fluctuated2->GetBackground()->At(channel)))->GetBinContent(bin);
190 if ((s > 0) && (b2 > 0))
191 lrs[i] += LogLikelihood(s, b, b2, rand) - s - b + b2;
192 else if ((s > 0) && (b2 == 0))
193 lrs[i] += 20 * rand - s;
194 // b hypothesis
195 rate = (Double_t) ((TH1 *) (fluctuated->GetBackground()->At(channel)))->GetBinContent(bin);
196 rand = myrandom->Poisson(rate);
197 tsb[i] += rand * fgTable->At((channel * maxbins) + bin);
198 if ((s2 > 0) && (b > 0))
199 lrb[i] += LogLikelihood(s2, b2, b, rand) - s2 - b2 + b;
200 else if ((s > 0) && (b == 0))
201 lrb[i] += 20 * rand - s;
202 }
203 }
204 }
205 lrs[i] = TMath::Exp(lrs[i] < 710 ? lrs[i] : 710);
206 lrb[i] = TMath::Exp(lrb[i] < 710 ? lrb[i] : 710);
207 }
208 delete tmp_src;
209 delete tmp_src2;
210 // lrs and lrb are the LR's (no logs) = prob(s+b)/prob(b) for
211 // that choice of s and b within syst. errors in the ensemble. These are
212 // the MC experiment weights for relating the s+b and b PDF's of the unsmeared
213 // test statistic (in which cas one can use another test statistic if one likes).
214
215 // Now produce the output object.
216 // The final quantities are computed on-demand form the arrays tss, tsb, lrs and lrb.
217 result->SetTSS(tss);
218 result->SetTSB(tsb);
219 result->SetLRS(lrs);
220 result->SetLRB(lrb);
221 if (!generator)
222 delete myrandom;
223 return result;
224}
225
227 bool init, TRandom * generator, bool stat)
228{
229 // initialisation: create a sorted list of all the names of systematics
230 if (init) {
231 // create a "map" with the systematics names
232 TIter errornames = input->GetErrorNames()->MakeIterator();
233 TObjArray *listofnames = nullptr;
234 delete fgSystNames;
236 while ((listofnames = ((TObjArray *) errornames.Next()))) {
237 TObjString *name = nullptr;
238 TIter loniter = listofnames->MakeIterator();
239 while ((name = (TObjString *) loniter.Next()))
240 if ((fgSystNames->IndexOf(name)) < 0)
241 fgSystNames->AddLast(name);
242 }
243 fgSystNames->Sort();
244 }
245 // if the output is not given, create it from the input
246 if (!output)
247 output = (TLimitDataSource*)(input->Clone());
248 // if there are no systematics, just returns the input as "fluctuated" output
249 if ((fgSystNames->GetSize() <= 0)&&(!stat)) {
250 return false;
251 }
252 // if there are only stat, just fluctuate stats.
253 if (fgSystNames->GetSize() <= 0) {
254 output->SetOwner();
255 for (Int_t channel = 0; channel <= input->GetSignal()->GetLast(); channel++) {
256 TH1 *newsignal = (TH1*)(output->GetSignal()->At(channel));
257 TH1 *oldsignal = (TH1*)(input->GetSignal()->At(channel));
258 if(stat)
259 for(int i=1; i<=newsignal->GetNbinsX(); i++) {
260 newsignal->SetBinContent(i,oldsignal->GetBinContent(i)+generator->Gaus(0,oldsignal->GetBinError(i)));
261 }
262 newsignal->SetDirectory(nullptr);
263 TH1 *newbackground = (TH1*)(output->GetBackground()->At(channel));
264 TH1 *oldbackground = (TH1*)(input->GetBackground()->At(channel));
265 if(stat)
266 for(int i=1; i<=newbackground->GetNbinsX(); i++)
267 newbackground->SetBinContent(i,oldbackground->GetBinContent(i)+generator->Gaus(0,oldbackground->GetBinError(i)));
268 newbackground->SetDirectory(nullptr);
269 }
270 return true;
271 }
272 // Find a choice for the random variation and
273 // re-toss all random numbers if any background or signal
274 // goes negative. (background = 0 is bad too, so put a little protection
275 // around it -- must have at least 10% of the bg estimate).
277 Double_t *serrf = new Double_t[(input->GetSignal()->GetLast()) + 1];
278 Double_t *berrf = new Double_t[(input->GetSignal()->GetLast()) + 1];
279 do {
280 Double_t *toss = new Double_t[fgSystNames->GetSize()];
281 for (Int_t i = 0; i < fgSystNames->GetSize(); i++)
282 toss[i] = generator->Gaus(0, 1);
283 retoss = kFALSE;
284 for (Int_t channel = 0;
285 channel <= input->GetSignal()->GetLast();
286 channel++) {
287 serrf[channel] = 0;
288 berrf[channel] = 0;
289 for (Int_t bin = 0;
290 bin <((TVectorD *) (input->GetErrorOnSignal()->At(channel)))->GetNrows();
291 bin++) {
292 serrf[channel] += ((TVectorD *) (input->GetErrorOnSignal()->At(channel)))->operator[](bin) *
293 toss[fgSystNames->BinarySearch((TObjString*) (((TObjArray *) (input->GetErrorNames()->At(channel)))->At(bin)))];
294 berrf[channel] += ((TVectorD *) (input->GetErrorOnBackground()->At(channel)))->operator[](bin) *
295 toss[fgSystNames->BinarySearch((TObjString*) (((TObjArray *) (input->GetErrorNames()->At(channel)))->At(bin)))];
296 }
297 if ((serrf[channel] < -1.0) || (berrf[channel] < -0.9)) {
298 retoss = kTRUE;
299 continue;
300 }
301 }
302 delete[]toss;
303 } while (retoss);
304 // adjust the fluctuated signal and background counts with a legal set
305 // of random fluctuations above.
306 output->SetOwner();
307 for (Int_t channel = 0; channel <= input->GetSignal()->GetLast();
308 channel++) {
309 TH1 *newsignal = (TH1*)(output->GetSignal()->At(channel));
310 TH1 *oldsignal = (TH1*)(input->GetSignal()->At(channel));
311 if(stat)
312 for(int i=1; i<=newsignal->GetNbinsX(); i++)
313 newsignal->SetBinContent(i,oldsignal->GetBinContent(i)+generator->Gaus(0,oldsignal->GetBinError(i)));
314 else
315 for(int i=1; i<=newsignal->GetNbinsX(); i++)
316 newsignal->SetBinContent(i,oldsignal->GetBinContent(i));
317 newsignal->Scale(1 + serrf[channel]);
318 newsignal->SetDirectory(nullptr);
319 TH1 *newbackground = (TH1*)(output->GetBackground()->At(channel));
320 TH1 *oldbackground = (TH1*)(input->GetBackground()->At(channel));
321 if(stat)
322 for(int i=1; i<=newbackground->GetNbinsX(); i++)
323 newbackground->SetBinContent(i,oldbackground->GetBinContent(i)+generator->Gaus(0,oldbackground->GetBinError(i)));
324 else
325 for(int i=1; i<=newbackground->GetNbinsX(); i++)
326 newbackground->SetBinContent(i,oldbackground->GetBinContent(i));
327 newbackground->Scale(1 + berrf[channel]);
328 newbackground->SetDirectory(nullptr);
329 }
330 delete[] serrf;
331 delete[] berrf;
332 return true;
333}
334
336 Int_t nmc, bool stat,
338{
339 // Compute limit.
340
343 delete lds;
344 return out;
345}
346
349 Int_t nmc, bool stat,
351{
352 // Compute limit.
353
356 delete lds;
357 return out;
358}
359
361 Int_t nmc,
362 bool stat,
364{
365 // Compute limit.
366
367 TH1D* sh = new TH1D("__sh","__sh",1,0,2);
368 sh->Fill(1,s);
369 TH1D* bh = new TH1D("__bh","__bh",1,0,2);
370 bh->Fill(1,b);
371 TH1D* dh = new TH1D("__dh","__dh",1,0,2);
372 dh->Fill(1,d);
375 delete lds;
376 delete sh;
377 delete bh;
378 delete dh;
379 return out;
380}
381
384 Int_t nmc,
385 bool stat,
387{
388 // Compute limit.
389
390 TH1D* sh = new TH1D("__sh","__sh",1,0,2);
391 sh->Fill(1,s);
392 TH1D* bh = new TH1D("__bh","__bh",1,0,2);
393 bh->Fill(1,b);
394 TH1D* dh = new TH1D("__dh","__dh",1,0,2);
395 dh->Fill(1,d);
398 delete lds;
399 delete sh;
400 delete bh;
401 delete dh;
402 return out;
403}
404
406{
407 // Compute LogLikelihood (static function)
408
409 return d*TMath::Log((s+b)/b2);
410}
#define d(i)
Definition RSha256.hxx:102
#define b(i)
Definition RSha256.hxx:100
int Int_t
Signed integer 4 bytes (int)
Definition RtypesCore.h:59
constexpr Bool_t kFALSE
Definition RtypesCore.h:108
double Double_t
Double 8 bytes.
Definition RtypesCore.h:73
constexpr Bool_t kTRUE
Definition RtypesCore.h:107
ROOT::Detail::TRangeCast< T, true > TRangeDynCast
TRangeDynCast is an adapter class that allows the typed iteration through a TCollection.
Option_t Option_t TPoint TPoint const char GetTextMagnitude GetFillStyle GetLineColor GetLineWidth GetMarkerStyle GetTextAlign GetTextColor GetTextSize void data
Option_t Option_t TPoint TPoint const char GetTextMagnitude GetFillStyle GetLineColor GetLineWidth GetMarkerStyle GetTextAlign GetTextColor GetTextSize void input
Option_t Option_t TPoint TPoint const char GetTextMagnitude GetFillStyle GetLineColor GetLineWidth GetMarkerStyle GetTextAlign GetTextColor GetTextSize void char Point_t Rectangle_t WindowAttributes_t Float_t Float_t Float_t Int_t Int_t UInt_t UInt_t Rectangle_t result
char name[80]
Definition TGX11.cxx:110
Array of doubles (64 bits per element).
Definition TArrayD.h:27
Class to compute 95% CL limits.
1-D histogram with a double per channel (see TH1 documentation)
Definition TH1.h:927
TH1 is the base class of all histogram classes in ROOT.
Definition TH1.h:109
This class serves as input for the TLimit::ComputeLimit method.
static Double_t LogLikelihood(Double_t s, Double_t b, Double_t b2, Double_t d)
Definition TLimit.cxx:405
static TOrdCollection * fgSystNames
Collection of systematics names.
Definition TLimit.h:51
static TArrayD * fgTable
A log table... just to speed up calculation.
Definition TLimit.h:50
static bool Fluctuate(TLimitDataSource *input, TLimitDataSource *output, bool init, TRandom *, bool stat=false)
Definition TLimit.cxx:226
static TConfidenceLevel * ComputeLimit(TLimitDataSource *data, Int_t nmc=50000, bool stat=false, TRandom *generator=nullptr)
Definition TLimit.cxx:101
An array of TObjects.
Definition TObjArray.h:31
Collectable string class.
Definition TObjString.h:28
Ordered collection.
Random number generator class based on M.
Definition TRandom3.h:27
This is the base class for the ROOT Random number generators.
Definition TRandom.h:27
Double_t Exp(Double_t x)
Returns the base-e exponential function of x, which is e raised to the power x.
Definition TMath.h:720
Double_t Log(Double_t x)
Returns the natural logarithm of x.
Definition TMath.h:767
TLine l
Definition textangle.C:4
static void output()