ROOT  6.07/01
Reference Guide
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Properties Friends Macros Groups Pages
TLimit.cxx
Go to the documentation of this file.
1 // @(#)root/hist:$Id$
2 // Author: Christophe.Delaere@cern.ch 21/08/2002
3 
4 ///////////////////////////////////////////////////////////////////////////
5 //
6 // TLimit
7 //
8 // Class to compute 95% CL limits
9 //
10 // adapted from the mclimit code from Tom Junk (CLs method)
11 // see http://root.cern.ch/root/doc/TomJunk.pdf
12 // see http://cern.ch/thomasj/searchlimits/ecl.html
13 // see: Tom Junk,NIM A434, p. 435-443, 1999
14 //
15 // see also the following interesting references:
16 // Alex Read, "Presentation of search results: the CLs technique"
17 // Journal of Physics G: Nucl. Part. Phys. 28 2693-2704 (2002).
18 // http://www.iop.org/EJ/abstract/0954-3899/28/10/313/
19 //
20 // A nice article is also available in the CERN yellow report with the proceeding
21 // of the 2000 CERN workshop on confidence intervals.
22 //
23 // Alex Read, "Modified Frequentist Analysis of Search Results (The CLs Method)"
24 // CERN 2000-005 (30 May 2000)
25 //
26 // see note about: "Should I use TRolke, TFeldmanCousins, TLimit?"
27 // in the TRolke class description.
28 //
29 ///////////////////////////////////////////////////////////////////////////
30 
31 /** \class TLimit
32  \ingroup Hist
33  Algorithm to compute 95% C.L. limits using the Likelihood ratio
34  semi-bayesian method.
35 
36  Implemented by C. Delaere from the mclimit code written by Tom Junk [HEP-EX/9902006].
37  See [http://cern.ch/thomasj/searchlimits/ecl.html](http://cern.ch/thomasj/searchlimits/ecl.html) for more details.
38 
39  It takes signal, background and data histograms wrapped in a
40  TLimitDataSource as input and runs a set of Monte Carlo experiments in
41  order to compute the limits. If needed, inputs are fluctuated according
42  to systematics. The output is a TConfidenceLevel.
43 
44  The class TLimitDataSource takes the signal, background and data histograms as well as different
45  systematics sources to form the TLimit input.
46 
47  The class TConfidenceLevel represents the final result of the TLimit algorithm. It is created just after the
48  time-consuming part and can be stored in a TFile for further processing.
49  It contains light methods to return CLs, CLb and other interesting
50  quantities.
51 
52  The actual algorithm...
53 
54  From an input (TLimitDataSource) it produces an output TConfidenceLevel.
55  For this, nmc Monte Carlo experiments are performed.
56  As usual, the larger this number, the longer the compute time,
57  but the better the result.
58 
59  Supposing that there is a plotfile.root file containing 3 histograms
60  (signal, background and data), you can imagine doing things like:
61 
62 ~~~{.cpp}
63  TFile* infile=new TFile("plotfile.root","READ");
64  infile->cd();
65  TH1* sh=(TH1*)infile->Get("signal");
66  TH1* bh=(TH1*)infile->Get("background");
67  TH1* dh=(TH1*)infile->Get("data");
68  TLimitDataSource* mydatasource = new TLimitDataSource(sh,bh,dh);
69  TConfidenceLevel *myconfidence = TLimit::ComputeLimit(mydatasource,50000);
70  std::cout << " CLs : " << myconfidence->CLs() << std::endl;
71  std::cout << " CLsb : " << myconfidence->CLsb() << std::endl;
72  std::cout << " CLb : " << myconfidence->CLb() << std::endl;
73  std::cout << "< CLs > : " << myconfidence->GetExpectedCLs_b() << std::endl;
74  std::cout << "< CLsb > : " << myconfidence->GetExpectedCLsb_b() << std::endl;
75  std::cout << "< CLb > : " << myconfidence->GetExpectedCLb_b() << std::endl;
76  delete myconfidence;
77  delete mydatasource;
78  infile->Close();
79 ~~~
80  More information can still be found on [this page](http://cern.ch/aleph-proj-alphapp/doc/tlimit.html)
81  */
82 
83 #include "TLimit.h"
84 #include "TArrayD.h"
85 #include "TVectorD.h"
86 #include "TOrdCollection.h"
87 #include "TConfidenceLevel.h"
88 #include "TLimitDataSource.h"
89 #include "TRandom3.h"
90 #include "TH1.h"
91 #include "TObjArray.h"
92 #include "TMath.h"
93 #include "TIterator.h"
94 #include "TObjString.h"
95 #include "TClassTable.h"
96 #include "Riostream.h"
97 
99 
100 TArrayD *TLimit::fgTable = new TArrayD(0);
101 TOrdCollection *TLimit::fgSystNames = new TOrdCollection();
102 
104  Int_t nmc, bool stat,
105  TRandom * generator)
106 {
107  // The final object returned...
109  // The random generator used...
110  TRandom *myrandom = generator ? generator : new TRandom3;
111  // Compute some total quantities on all the channels
112  Int_t maxbins = 0;
113  Double_t nsig = 0;
114  Double_t nbg = 0;
115  Int_t ncand = 0;
116  Int_t i;
117  for (i = 0; i <= data->GetSignal()->GetLast(); i++) {
118  maxbins = (((TH1 *) (data->GetSignal()->At(i)))->GetNbinsX() + 2) > maxbins ?
119  (((TH1 *) (data->GetSignal()->At(i)))->GetNbinsX() + 2) : maxbins;
120  nsig += ((TH1 *) (data->GetSignal()->At(i)))->Integral();
121  nbg += ((TH1 *) (data->GetBackground()->At(i)))->Integral();
122  ncand += (Int_t) ((TH1 *) (data->GetCandidates()->At(i)))->Integral();
123  }
124  result->SetBtot(nbg);
125  result->SetStot(nsig);
126  result->SetDtot(ncand);
127  Double_t buffer = 0;
128  fgTable->Set(maxbins * (data->GetSignal()->GetLast() + 1));
129  for (Int_t channel = 0; channel <= data->GetSignal()->GetLast(); channel++)
130  for (Int_t bin = 0;
131  bin <= ((TH1 *) (data->GetSignal()->At(channel)))->GetNbinsX()+1;
132  bin++) {
133  Double_t s = (Double_t) ((TH1 *) (data->GetSignal()->At(channel)))->GetBinContent(bin);
134  Double_t b = (Double_t) ((TH1 *) (data->GetBackground()->At(channel)))->GetBinContent(bin);
135  Double_t d = (Double_t) ((TH1 *) (data->GetCandidates()->At(channel)))->GetBinContent(bin);
136  // Compute the value of the "-2lnQ" for the actual data
137  if ((b == 0) && (s > 0)) {
138  std::cout << "WARNING: Ignoring bin " << bin << " of channel "
139  << channel << " which has s=" << s << " but b=" << b << std::endl;
140  std::cout << " Maybe the MC statistic has to be improved..." << std::endl;
141  }
142  if ((s > 0) && (b > 0))
143  buffer += LogLikelihood(s, b, b, d);
144  // precompute the log(1+s/b)'s in an array to speed up computation
145  // background-free bins are set to have a maximum t.s. value
146  // for protection (corresponding to s/b of about 5E8)
147  if ((s > 0) && (b > 0))
148  fgTable->AddAt(LogLikelihood(s, b, b, 1), (channel * maxbins) + bin);
149  else if ((s > 0) && (b == 0))
150  fgTable->AddAt(20, (channel * maxbins) + bin);
151  }
152  result->SetTSD(buffer);
153  // accumulate MC experiments. Hold the test statistic function fixed, but
154  // fluctuate s and b within syst. errors for computing probabilities of
155  // having that outcome. (Alex Read's prescription -- errors are on the ensemble,
156  // not on the observed test statistic. This technique does not split outcomes.)
157  // keep the tstats as sum log(1+s/b). convert to -2lnQ when preparing the results
158  // (reason -- like to keep the < signs right)
159  Double_t *tss = new Double_t[nmc];
160  Double_t *tsb = new Double_t[nmc];
161  Double_t *lrs = new Double_t[nmc];
162  Double_t *lrb = new Double_t[nmc];
163  TLimitDataSource* tmp_src = (TLimitDataSource*)(data->Clone());
164  TLimitDataSource* tmp_src2 = (TLimitDataSource*)(data->Clone());
165  for (i = 0; i < nmc; i++) {
166  tss[i] = 0;
167  tsb[i] = 0;
168  lrs[i] = 0;
169  lrb[i] = 0;
170  // fluctuate signal and background
171  TLimitDataSource* fluctuated = Fluctuate(data, tmp_src, !i, myrandom, stat) ? tmp_src : data;
172  // make an independent set of fluctuations for reweighting pseudoexperiments
173  // it turns out using the same fluctuated ones for numerator and denominator
174  // is biased.
175  TLimitDataSource* fluctuated2 = Fluctuate(data, tmp_src2, false, myrandom, stat) ? tmp_src2 : data;
176 
177  for (Int_t channel = 0;
178  channel <= fluctuated->GetSignal()->GetLast(); channel++) {
179  for (Int_t bin = 0;
180  bin <=((TH1 *) (fluctuated->GetSignal()->At(channel)))->GetNbinsX()+1;
181  bin++) {
182  if ((Double_t) ((TH1 *) (fluctuated->GetSignal()->At(channel)))->GetBinContent(bin) != 0) {
183  // s+b hypothesis
184  Double_t rate = (Double_t) ((TH1 *) (fluctuated->GetSignal()->At(channel)))->GetBinContent(bin) +
185  (Double_t) ((TH1 *) (fluctuated->GetBackground()->At(channel)))->GetBinContent(bin);
186  Double_t rand = myrandom->Poisson(rate);
187  tss[i] += rand * fgTable->At((channel * maxbins) + bin);
188  Double_t s = (Double_t) ((TH1 *) (fluctuated->GetSignal()->At(channel)))->GetBinContent(bin);
189  Double_t s2= (Double_t) ((TH1 *) (fluctuated2->GetSignal()->At(channel)))->GetBinContent(bin);
190  Double_t b = (Double_t) ((TH1 *) (fluctuated->GetBackground()->At(channel)))->GetBinContent(bin);
191  Double_t b2= (Double_t) ((TH1 *) (fluctuated2->GetBackground()->At(channel)))->GetBinContent(bin);
192  if ((s > 0) && (b2 > 0))
193  lrs[i] += LogLikelihood(s, b, b2, rand) - s - b + b2;
194  else if ((s > 0) && (b2 == 0))
195  lrs[i] += 20 * rand - s;
196  // b hypothesis
197  rate = (Double_t) ((TH1 *) (fluctuated->GetBackground()->At(channel)))->GetBinContent(bin);
198  rand = myrandom->Poisson(rate);
199  tsb[i] += rand * fgTable->At((channel * maxbins) + bin);
200  if ((s2 > 0) && (b > 0))
201  lrb[i] += LogLikelihood(s2, b2, b, rand) - s2 - b2 + b;
202  else if ((s > 0) && (b == 0))
203  lrb[i] += 20 * rand - s;
204  }
205  }
206  }
207  lrs[i] = TMath::Exp(lrs[i] < 710 ? lrs[i] : 710);
208  lrb[i] = TMath::Exp(lrb[i] < 710 ? lrb[i] : 710);
209  }
210  delete tmp_src;
211  delete tmp_src2;
212  // lrs and lrb are the LR's (no logs) = prob(s+b)/prob(b) for
213  // that choice of s and b within syst. errors in the ensemble. These are
214  // the MC experiment weights for relating the s+b and b PDF's of the unsmeared
215  // test statistic (in which cas one can use another test statistic if one likes).
216 
217  // Now produce the output object.
218  // The final quantities are computed on-demand form the arrays tss, tsb, lrs and lrb.
219  result->SetTSS(tss);
220  result->SetTSB(tsb);
221  result->SetLRS(lrs);
222  result->SetLRB(lrb);
223  if (!generator)
224  delete myrandom;
225  return result;
226 }
227 
229  bool init, TRandom * generator, bool stat)
230 {
231  // initialisation: create a sorted list of all the names of systematics
232  if (init) {
233  // create a "map" with the systematics names
234  TIterator *errornames = input->GetErrorNames()->MakeIterator();
235  TObjArray *listofnames = 0;
236  delete fgSystNames;
237  fgSystNames = new TOrdCollection();
238  while ((listofnames = ((TObjArray *) errornames->Next()))) {
239  TObjString *name = 0;
240  TIterator *loniter = listofnames->MakeIterator();
241  while ((name = (TObjString *) (loniter->Next())))
242  if ((fgSystNames->IndexOf(name)) < 0)
243  fgSystNames->AddLast(name);
244  }
245  fgSystNames->Sort();
246  }
247  // if the output is not given, create it from the input
248  if (!output)
249  output = (TLimitDataSource*)(input->Clone());
250  // if there are no systematics, just returns the input as "fluctuated" output
251  if ((fgSystNames->GetSize() <= 0)&&(!stat)) {
252  return 0;
253  }
254  // if there are only stat, just fluctuate stats.
255  if (fgSystNames->GetSize() <= 0) {
256  output->SetOwner();
257  for (Int_t channel = 0; channel <= input->GetSignal()->GetLast(); channel++) {
258  TH1 *newsignal = (TH1*)(output->GetSignal()->At(channel));
259  TH1 *oldsignal = (TH1*)(input->GetSignal()->At(channel));
260  if(stat)
261  for(int i=1; i<=newsignal->GetNbinsX(); i++) {
262  newsignal->SetBinContent(i,oldsignal->GetBinContent(i)+generator->Gaus(0,oldsignal->GetBinError(i)));
263  }
264  newsignal->SetDirectory(0);
265  TH1 *newbackground = (TH1*)(output->GetBackground()->At(channel));
266  TH1 *oldbackground = (TH1*)(input->GetBackground()->At(channel));
267  if(stat)
268  for(int i=1; i<=newbackground->GetNbinsX(); i++)
269  newbackground->SetBinContent(i,oldbackground->GetBinContent(i)+generator->Gaus(0,oldbackground->GetBinError(i)));
270  newbackground->SetDirectory(0);
271  }
272  return 1;
273  }
274  // Find a choice for the random variation and
275  // re-toss all random numbers if any background or signal
276  // goes negative. (background = 0 is bad too, so put a little protection
277  // around it -- must have at least 10% of the bg estimate).
278  Bool_t retoss = kTRUE;
279  Double_t *serrf = new Double_t[(input->GetSignal()->GetLast()) + 1];
280  Double_t *berrf = new Double_t[(input->GetSignal()->GetLast()) + 1];
281  do {
282  Double_t *toss = new Double_t[fgSystNames->GetSize()];
283  for (Int_t i = 0; i < fgSystNames->GetSize(); i++)
284  toss[i] = generator->Gaus(0, 1);
285  retoss = kFALSE;
286  for (Int_t channel = 0;
287  channel <= input->GetSignal()->GetLast();
288  channel++) {
289  serrf[channel] = 0;
290  berrf[channel] = 0;
291  for (Int_t bin = 0;
292  bin <((TVectorD *) (input->GetErrorOnSignal()->At(channel)))->GetNrows();
293  bin++) {
294  serrf[channel] += ((TVectorD *) (input->GetErrorOnSignal()->At(channel)))->operator[](bin) *
295  toss[fgSystNames->BinarySearch((TObjString*) (((TObjArray *) (input->GetErrorNames()->At(channel)))->At(bin)))];
296  berrf[channel] += ((TVectorD *) (input->GetErrorOnBackground()->At(channel)))->operator[](bin) *
297  toss[fgSystNames->BinarySearch((TObjString*) (((TObjArray *) (input->GetErrorNames()->At(channel)))->At(bin)))];
298  }
299  if ((serrf[channel] < -1.0) || (berrf[channel] < -0.9)) {
300  retoss = kTRUE;
301  continue;
302  }
303  }
304  delete[]toss;
305  } while (retoss);
306  // adjust the fluctuated signal and background counts with a legal set
307  // of random fluctuations above.
308  output->SetOwner();
309  for (Int_t channel = 0; channel <= input->GetSignal()->GetLast();
310  channel++) {
311  TH1 *newsignal = (TH1*)(output->GetSignal()->At(channel));
312  TH1 *oldsignal = (TH1*)(input->GetSignal()->At(channel));
313  if(stat)
314  for(int i=1; i<=newsignal->GetNbinsX(); i++)
315  newsignal->SetBinContent(i,oldsignal->GetBinContent(i)+generator->Gaus(0,oldsignal->GetBinError(i)));
316  else
317  for(int i=1; i<=newsignal->GetNbinsX(); i++)
318  newsignal->SetBinContent(i,oldsignal->GetBinContent(i));
319  newsignal->Scale(1 + serrf[channel]);
320  newsignal->SetDirectory(0);
321  TH1 *newbackground = (TH1*)(output->GetBackground()->At(channel));
322  TH1 *oldbackground = (TH1*)(input->GetBackground()->At(channel));
323  if(stat)
324  for(int i=1; i<=newbackground->GetNbinsX(); i++)
325  newbackground->SetBinContent(i,oldbackground->GetBinContent(i)+generator->Gaus(0,oldbackground->GetBinError(i)));
326  else
327  for(int i=1; i<=newbackground->GetNbinsX(); i++)
328  newbackground->SetBinContent(i,oldbackground->GetBinContent(i));
329  newbackground->Scale(1 + berrf[channel]);
330  newbackground->SetDirectory(0);
331  }
332  delete[] serrf;
333  delete[] berrf;
334  return 1;
335 }
336 
338  Int_t nmc, bool stat,
339  TRandom * generator)
340 {
341  // Compute limit.
342 
343  TLimitDataSource* lds = new TLimitDataSource(s,b,d);
344  TConfidenceLevel* out = ComputeLimit(lds,nmc,stat,generator);
345  delete lds;
346  return out;
347 }
348 
350  TVectorD* se, TVectorD* be, TObjArray* l,
351  Int_t nmc, bool stat,
352  TRandom * generator)
353 {
354  // Compute limit.
355 
356  TLimitDataSource* lds = new TLimitDataSource(s,b,d,se,be,l);
357  TConfidenceLevel* out = ComputeLimit(lds,nmc,stat,generator);
358  delete lds;
359  return out;
360 }
361 
363  Int_t nmc,
364  bool stat,
365  TRandom * generator)
366 {
367  // Compute limit.
368 
369  TH1D* sh = new TH1D("__sh","__sh",1,0,2);
370  sh->Fill(1,s);
371  TH1D* bh = new TH1D("__bh","__bh",1,0,2);
372  bh->Fill(1,b);
373  TH1D* dh = new TH1D("__dh","__dh",1,0,2);
374  dh->Fill(1,d);
375  TLimitDataSource* lds = new TLimitDataSource(sh,bh,dh);
376  TConfidenceLevel* out = ComputeLimit(lds,nmc,stat,generator);
377  delete lds;
378  delete sh;
379  delete bh;
380  delete dh;
381  return out;
382 }
383 
385  TVectorD* se, TVectorD* be, TObjArray* l,
386  Int_t nmc,
387  bool stat,
388  TRandom * generator)
389 {
390  // Compute limit.
391 
392  TH1D* sh = new TH1D("__sh","__sh",1,0,2);
393  sh->Fill(1,s);
394  TH1D* bh = new TH1D("__bh","__bh",1,0,2);
395  bh->Fill(1,b);
396  TH1D* dh = new TH1D("__dh","__dh",1,0,2);
397  dh->Fill(1,d);
398  TLimitDataSource* lds = new TLimitDataSource(sh,bh,dh,se,be,l);
399  TConfidenceLevel* out = ComputeLimit(lds,nmc,stat,generator);
400  delete lds;
401  delete sh;
402  delete bh;
403  delete dh;
404  return out;
405 }
406 
408 {
409  // Compute LogLikelihood (static function)
410 
411  return d*TMath::Log((s+b)/b2);
412 }
virtual void Scale(Double_t c1=1, Option_t *option="")
Multiply this histogram by a constant c1.
Definition: TH1.cxx:6174
virtual Int_t Fill(Double_t x)
Increment bin with abscissa X by 1.
Definition: TH1.cxx:3159
An array of TObjects.
Definition: TObjArray.h:39
Random number generator class based on M.
Definition: TRandom3.h:29
tuple buffer
Definition: tree.py:99
virtual Double_t GetBinContent(Int_t bin) const
Return content of bin number bin.
Definition: TH1.cxx:4629
Double_t Log(Double_t x)
Definition: TMath.h:526
Collectable string class.
Definition: TObjString.h:32
virtual void SetDirectory(TDirectory *dir)
By default when an histogram is created, it is added to the list of histogram objects in the current ...
Definition: TH1.cxx:8266
Int_t GetLast() const
Return index of last object in array.
Definition: TObjArray.cxx:528
virtual Double_t Gaus(Double_t mean=0, Double_t sigma=1)
Samples a random number from the standard Normal (Gaussian) Distribution with the given mean and sigm...
Definition: TRandom.cxx:235
void AddLast(TObject *obj)
Add object at the end of the collection.
void SetLRB(Double_t *in)
int Int_t
Definition: RtypesCore.h:41
bool Bool_t
Definition: RtypesCore.h:59
const Bool_t kFALSE
Definition: Rtypes.h:92
static Double_t LogLikelihood(Double_t s, Double_t b, Double_t b2, Double_t d)
Definition: TLimit.cxx:407
static bool Fluctuate(TLimitDataSource *input, TLimitDataSource *output, bool init, TRandom *, bool stat=false)
Definition: TLimit.cxx:228
static TOrdCollection * fgSystNames
Definition: TLimit.h:52
virtual Int_t GetNbinsX() const
Definition: TH1.h:296
This class serves as interface to feed data into the TLimit routines.
TIterator * MakeIterator(Bool_t dir=kIterForward) const
Returns an array iterator.
Definition: TObjArray.cxx:591
void SetDtot(Int_t in)
Iterator abstract base class.
Definition: TIterator.h:32
Int_t IndexOf(const TObject *obj) const
Return index of object in collection.
void SetStot(Double_t in)
virtual TObject * Clone(const char *newname="") const
Make a clone of an object using the Streamer facility.
Definition: TObject.cxx:203
void SetLRS(Double_t *in)
virtual TObjArray * GetBackground()
This is the base class for the ROOT Random number generators.
Definition: TRandom.h:29
int d
Definition: tornado.py:11
char * out
Definition: TBase64.cxx:29
void SetTSD(Double_t in)
void SetBtot(Double_t in)
Algorithm to compute 95% C.L.
Definition: TLimit.h:20
virtual void SetBinContent(Int_t bin, Double_t content)
Set bin content see convention for numbering bins in TH1::GetBin In case the bin number is greater th...
Definition: TH1.cxx:8543
TLine * l
Definition: textangle.C:4
static TConfidenceLevel * ComputeLimit(TLimitDataSource *data, Int_t nmc=50000, bool stat=false, TRandom *generator=0)
Definition: TLimit.cxx:103
Class to compute 95% CL limits.
1-D histogram with a double per channel (see TH1 documentation)}
Definition: TH1.h:613
void SetTSS(Double_t *in)
Set the TSS.
void SetTSB(Double_t *in)
Set the TSB.
virtual void SetOwner(bool swtch=kTRUE)
void Sort()
If objects in collection are sortable (i.e.
Double_t Exp(Double_t x)
Definition: TMath.h:495
virtual Int_t GetSize() const
Definition: TCollection.h:95
TH1F * s2
Definition: threadsh2.C:15
#define ClassImp(name)
Definition: Rtypes.h:279
static Int_t init()
double Double_t
Definition: RtypesCore.h:55
The TH1 histogram class.
Definition: TH1.h:80
Array of doubles (64 bits per element).
Definition: TArrayD.h:29
Int_t BinarySearch(TObject *obj)
Find object using a binary search.
#define name(a, b)
Definition: linkTestLib0.cpp:5
virtual TObjArray * GetErrorNames()
virtual TObjArray * GetErrorOnSignal()
virtual TObjArray * GetSignal()
virtual TObject * Next()=0
virtual TObjArray * GetErrorOnBackground()
double result[121]
virtual Int_t Poisson(Double_t mean)
Generates a random integer N according to a Poisson law.
Definition: TRandom.cxx:362
TObject * At(Int_t idx) const
Definition: TObjArray.h:167
static void output(int code)
Definition: gifencode.c:226
virtual Double_t GetBinError(Int_t bin) const
Return value of error associated to bin number bin.
Definition: TH1.cxx:8395
const Bool_t kTRUE
Definition: Rtypes.h:91
Ordered collection.