Logo ROOT   6.14/05
Reference Guide
RooNLLVar.cxx
Go to the documentation of this file.
1 /*****************************************************************************
2  * Project: RooFit *
3  * Package: RooFitCore *
4  * @(#)root/roofitcore:$Id$
5  * Authors: *
6  * WV, Wouter Verkerke, UC Santa Barbara, verkerke@slac.stanford.edu *
7  * DK, David Kirkby, UC Irvine, dkirkby@uci.edu *
8  * *
9  * Copyright (c) 2000-2005, Regents of the University of California *
10  * and Stanford University. All rights reserved. *
11  * *
12  * Redistribution and use in source and binary forms, *
13  * with or without modification, are permitted according to the terms *
14  * listed in LICENSE (http://roofit.sourceforge.net/license.txt) *
15  *****************************************************************************/
16 
17 /**
18 \file RooNLLVar.cxx
19 \class RooNLLVar
20 \ingroup Roofitcore
21 
22 Class RooNLLVar implements a a -log(likelihood) calculation from a dataset
23 and a PDF. The NLL is calculated as
24 <pre>
25  Sum[data] -log( pdf(x_data) )
26 </pre>
27 In extended mode, a (Nexpect - Nobserved*log(NExpected) term is added
28 **/
29 
30 #include <algorithm>
31 
32 #include "RooFit.h"
33 #include "Riostream.h"
34 #include "TMath.h"
35 
36 #include "RooNLLVar.h"
37 #include "RooAbsData.h"
38 #include "RooAbsPdf.h"
39 #include "RooCmdConfig.h"
40 #include "RooMsgService.h"
41 #include "RooAbsDataStore.h"
42 #include "RooRealMPFE.h"
43 #include "RooRealSumPdf.h"
44 #include "RooRealVar.h"
45 #include "RooProdPdf.h"
46 
48 ;
49 
51 
52 
53 ////////////////////////////////////////////////////////////////////////////////
54 /// Construct likelihood from given p.d.f and (binned or unbinned dataset)
55 ///
56 /// Argument | Description
57 /// -------------------------|------------
58 /// Extended() | Include extended term in calculation
59 /// NumCPU() | Activate parallel processing feature
60 /// Range() | Fit only selected region
61 /// SumCoefRange() | Set the range in which to interpret the coefficients of RooAddPdf components
62 /// SplitRange() | Fit range is split by index catory of simultaneous PDF
63 /// ConditionalObservables() | Define conditional observables
64 /// Verbose() | Verbose output of GOF framework classes
65 /// CloneData() | Clone input dataset for internal use (default is kTRUE)
66 
67 RooNLLVar::RooNLLVar(const char *name, const char* title, RooAbsPdf& pdf, RooAbsData& indata,
68  const RooCmdArg& arg1, const RooCmdArg& arg2,const RooCmdArg& arg3,
69  const RooCmdArg& arg4, const RooCmdArg& arg5,const RooCmdArg& arg6,
70  const RooCmdArg& arg7, const RooCmdArg& arg8,const RooCmdArg& arg9) :
71  RooAbsOptTestStatistic(name,title,pdf,indata,
72  *(const RooArgSet*)RooCmdConfig::decodeObjOnTheFly("RooNLLVar::RooNLLVar","ProjectedObservables",0,&_emptySet
73  ,arg1,arg2,arg3,arg4,arg5,arg6,arg7,arg8,arg9),
74  RooCmdConfig::decodeStringOnTheFly("RooNLLVar::RooNLLVar","RangeWithName",0,"",arg1,arg2,arg3,arg4,arg5,arg6,arg7,arg8,arg9),
75  RooCmdConfig::decodeStringOnTheFly("RooNLLVar::RooNLLVar","AddCoefRange",0,"",arg1,arg2,arg3,arg4,arg5,arg6,arg7,arg8,arg9),
76  RooCmdConfig::decodeIntOnTheFly("RooNLLVar::RooNLLVar","NumCPU",0,1,arg1,arg2,arg3,arg4,arg5,arg6,arg7,arg8,arg9),
78  RooCmdConfig::decodeIntOnTheFly("RooNLLVar::RooNLLVar","Verbose",0,1,arg1,arg2,arg3,arg4,arg5,arg6,arg7,arg8,arg9),
79  RooCmdConfig::decodeIntOnTheFly("RooNLLVar::RooNLLVar","SplitRange",0,0,arg1,arg2,arg3,arg4,arg5,arg6,arg7,arg8,arg9),
80  RooCmdConfig::decodeIntOnTheFly("RooNLLVar::RooNLLVar","CloneData",0,1,arg1,arg2,arg3,arg4,arg5,arg6,arg7,arg8,arg9))
81 {
82  RooCmdConfig pc("RooNLLVar::RooNLLVar") ;
83  pc.allowUndefined() ;
84  pc.defineInt("extended","Extended",0,kFALSE) ;
85 
86  pc.process(arg1) ; pc.process(arg2) ; pc.process(arg3) ;
87  pc.process(arg4) ; pc.process(arg5) ; pc.process(arg6) ;
88  pc.process(arg7) ; pc.process(arg8) ; pc.process(arg9) ;
89 
90  _extended = pc.getInt("extended") ;
91  _weightSq = kFALSE ;
92  _first = kTRUE ;
93  _offset = 0.;
94  _offsetCarry = 0.;
95  _offsetSaveW2 = 0.;
96  _offsetCarrySaveW2 = 0.;
97 
98  _binnedPdf = 0 ;
99 }
100 
101 
102 
103 ////////////////////////////////////////////////////////////////////////////////
104 /// Construct likelihood from given p.d.f and (binned or unbinned dataset)
105 /// For internal use.
106 
107 RooNLLVar::RooNLLVar(const char *name, const char *title, RooAbsPdf& pdf, RooAbsData& indata,
108  Bool_t extended, const char* rangeName, const char* addCoefRangeName,
109  Int_t nCPU, RooFit::MPSplit interleave, Bool_t verbose, Bool_t splitRange, Bool_t cloneData, Bool_t binnedL) :
110  RooAbsOptTestStatistic(name,title,pdf,indata,RooArgSet(),rangeName,addCoefRangeName,nCPU,interleave,verbose,splitRange,cloneData),
111  _extended(extended),
112  _weightSq(kFALSE),
114 {
115  // If binned likelihood flag is set, pdf is a RooRealSumPdf representing a yield vector
116  // for a binned likelihood calculation
117  _binnedPdf = binnedL ? (RooRealSumPdf*)_funcClone : 0 ;
118 
119  // Retrieve and cache bin widths needed to convert unnormalized binnedPdf values back to yields
120  if (_binnedPdf) {
121 
122  // The Active label will disable pdf integral calculations
123  _binnedPdf->setAttribute("BinnedLikelihoodActive") ;
124 
126  if (obs->getSize()!=1) {
127  _binnedPdf = 0 ;
128  } else {
129  RooRealVar* var = (RooRealVar*) obs->first() ;
130  std::list<Double_t>* boundaries = _binnedPdf->binBoundaries(*var,var->getMin(),var->getMax()) ;
131  std::list<Double_t>::iterator biter = boundaries->begin() ;
132  _binw.resize(boundaries->size()-1) ;
133  Double_t lastBound = (*biter) ;
134  ++biter ;
135  int ibin=0 ;
136  while (biter!=boundaries->end()) {
137  _binw[ibin] = (*biter) - lastBound ;
138  lastBound = (*biter) ;
139  ibin++ ;
140  ++biter ;
141  }
142  }
143  }
144 }
145 
146 
147 
148 ////////////////////////////////////////////////////////////////////////////////
149 /// Construct likelihood from given p.d.f and (binned or unbinned dataset)
150 /// For internal use.
151 
152 RooNLLVar::RooNLLVar(const char *name, const char *title, RooAbsPdf& pdf, RooAbsData& indata,
153  const RooArgSet& projDeps, Bool_t extended, const char* rangeName,const char* addCoefRangeName,
154  Int_t nCPU,RooFit::MPSplit interleave,Bool_t verbose, Bool_t splitRange, Bool_t cloneData, Bool_t binnedL) :
155  RooAbsOptTestStatistic(name,title,pdf,indata,projDeps,rangeName,addCoefRangeName,nCPU,interleave,verbose,splitRange,cloneData),
156  _extended(extended),
157  _weightSq(kFALSE),
159 {
160  // If binned likelihood flag is set, pdf is a RooRealSumPdf representing a yield vector
161  // for a binned likelihood calculation
162  _binnedPdf = binnedL ? (RooRealSumPdf*)_funcClone : 0 ;
163 
164  // Retrieve and cache bin widths needed to convert unnormalized binnedPdf values back to yields
165  if (_binnedPdf) {
166 
168  if (obs->getSize()!=1) {
169  _binnedPdf = 0 ;
170  } else {
171  RooRealVar* var = (RooRealVar*) obs->first() ;
172  std::list<Double_t>* boundaries = _binnedPdf->binBoundaries(*var,var->getMin(),var->getMax()) ;
173  std::list<Double_t>::iterator biter = boundaries->begin() ;
174  _binw.resize(boundaries->size()-1) ;
175  Double_t lastBound = (*biter) ;
176  ++biter ;
177  int ibin=0 ;
178  while (biter!=boundaries->end()) {
179  _binw[ibin] = (*biter) - lastBound ;
180  lastBound = (*biter) ;
181  ibin++ ;
182  ++biter ;
183  }
184  }
185  }
186 }
187 
188 
189 
190 ////////////////////////////////////////////////////////////////////////////////
191 /// Copy constructor
192 
193 RooNLLVar::RooNLLVar(const RooNLLVar& other, const char* name) :
194  RooAbsOptTestStatistic(other,name),
195  _extended(other._extended),
196  _weightSq(other._weightSq),
199  _binw(other._binw) {
201 }
202 
203 
204 
205 
206 ////////////////////////////////////////////////////////////////////////////////
207 /// Destructor
208 
210 {
211 }
212 
213 
214 
215 
216 ////////////////////////////////////////////////////////////////////////////////
217 
219 {
220  if (_gofOpMode==Slave) {
221  if (flag != _weightSq) {
222  _weightSq = flag;
225  }
226  setValueDirty();
227  } else if ( _gofOpMode==MPMaster) {
228  for (Int_t i=0 ; i<_nCPU ; i++)
229  _mpfeArray[i]->applyNLLWeightSquared(flag);
230  } else if ( _gofOpMode==SimMaster) {
231  for (Int_t i=0 ; i<_nGof ; i++)
232  ((RooNLLVar*)_gofArray[i])->applyWeightSquared(flag);
233  }
234 }
235 
236 
237 
238 ////////////////////////////////////////////////////////////////////////////////
239 /// Calculate and return likelihood on subset of data from firstEvent to lastEvent
240 /// processed with a step size of 'stepSize'. If this an extended likelihood and
241 /// and the zero event is processed the extended term is added to the return
242 /// likelihood.
243 
244 Double_t RooNLLVar::evaluatePartition(Int_t firstEvent, Int_t lastEvent, Int_t stepSize) const
245 {
246  // Throughout the calculation, we use Kahan's algorithm for summing to
247  // prevent loss of precision - this is a factor four more expensive than
248  // straight addition, but since evaluating the PDF is usually much more
249  // expensive than that, we tolerate the additional cost...
250  Int_t i ;
251  Double_t result(0), carry(0);
252 
253  RooAbsPdf* pdfClone = (RooAbsPdf*) _funcClone ;
254 
255  // cout << "RooNLLVar::evaluatePartition(" << GetName() << ") projDeps = " << (_projDeps?*_projDeps:RooArgSet()) << endl ;
256 
257  _dataClone->store()->recalculateCache( _projDeps, firstEvent, lastEvent, stepSize,(_binnedPdf?kFALSE:kTRUE) ) ;
258 
259  Double_t sumWeight(0), sumWeightCarry(0);
260 
261  // If pdf is marked as binned - do a binned likelihood calculation here (sum of log-Poisson for each bin)
262  if (_binnedPdf) {
263 
264  for (i=firstEvent ; i<lastEvent ; i+=stepSize) {
265 
266  _dataClone->get(i) ;
267 
268  if (!_dataClone->valid()) continue;
269 
270  Double_t eventWeight = _dataClone->weight();
271 
272 
273  // Calculate log(Poisson(N|mu) for this bin
274  Double_t N = eventWeight ;
275  Double_t mu = _binnedPdf->getVal()*_binw[i] ;
276  //cout << "RooNLLVar::binnedL(" << GetName() << ") N=" << N << " mu = " << mu << endl ;
277 
278  if (mu<=0 && N>0) {
279 
280  // Catch error condition: data present where zero events are predicted
281  logEvalError(Form("Observed %f events in bin %d with zero event yield",N,i)) ;
282 
283  } else if (fabs(mu)<1e-10 && fabs(N)<1e-10) {
284 
285  // Special handling of this case since log(Poisson(0,0)=0 but can't be calculated with usual log-formula
286  // since log(mu)=0. No update of result is required since term=0.
287 
288  } else {
289 
290  Double_t term = -1*(-mu + N*log(mu) - TMath::LnGamma(N+1)) ;
291 
292  // Kahan summation of sumWeight
293  Double_t y = eventWeight - sumWeightCarry;
294  Double_t t = sumWeight + y;
295  sumWeightCarry = (t - sumWeight) - y;
296  sumWeight = t;
297 
298  // Kahan summation of result
299  y = term - carry;
300  t = result + y;
301  carry = (t - result) - y;
302  result = t;
303  }
304  }
305 
306 
307  } else {
308 
309  for (i=firstEvent ; i<lastEvent ; i+=stepSize) {
310 
311  _dataClone->get(i) ;
312 
313  if (!_dataClone->valid()) continue;
314 
315  Double_t eventWeight = _dataClone->weight();
316  if (0. == eventWeight * eventWeight) continue ;
317  if (_weightSq) eventWeight = _dataClone->weightSquared() ;
318 
319  Double_t term = -eventWeight * pdfClone->getLogVal(_normSet);
320 
321 
322  Double_t y = eventWeight - sumWeightCarry;
323  Double_t t = sumWeight + y;
324  sumWeightCarry = (t - sumWeight) - y;
325  sumWeight = t;
326 
327  y = term - carry;
328  t = result + y;
329  carry = (t - result) - y;
330  result = t;
331  }
332 
333  // include the extended maximum likelihood term, if requested
334  if(_extended && _setNum==_extSet) {
335  if (_weightSq) {
336 
337  // Calculate sum of weights-squared here for extended term
338  Double_t sumW2(0), sumW2carry(0);
339  for (i=0 ; i<_dataClone->numEntries() ; i++) {
340  _dataClone->get(i);
341  Double_t y = _dataClone->weightSquared() - sumW2carry;
342  Double_t t = sumW2 + y;
343  sumW2carry = (t - sumW2) - y;
344  sumW2 = t;
345  }
346 
347  Double_t expected= pdfClone->expectedEvents(_dataClone->get());
348 
349  // Adjust calculation of extended term with W^2 weighting: adjust poisson such that
350  // estimate of Nexpected stays at the same value, but has a different variance, rescale
351  // both the observed and expected count of the Poisson with a factor sum[w] / sum[w^2] which is
352  // the effective weight of the Poisson term.
353  // i.e. change Poisson(Nobs = sum[w]| Nexp ) --> Poisson( sum[w] * sum[w] / sum[w^2] | Nexp * sum[w] / sum[w^2] )
354  // weighted by the effective weight sum[w^2]/ sum[w] in the likelihood.
355  // Since here we compute the likelihood with the weight square we need to multiply by the
356  // square of the effective weight
357  // expectedW = expected * sum[w] / sum[w^2] : effective expected entries
358  // observedW = sum[w] * sum[w] / sum[w^2] : effective observed entries
359  // The extended term for the likelihood weighted by the square of the weight will be then:
360  // (sum[w^2]/ sum[w] )^2 * expectedW - (sum[w^2]/ sum[w] )^2 * observedW * log (expectedW) and this is
361  // using the previous expressions for expectedW and observedW
362  // sum[w^2] / sum[w] * expected - sum[w^2] * log (expectedW)
363  // and since the weights are constants in the likelihood we can use log(expected) instead of log(expectedW)
364 
365  Double_t expectedW2 = expected * sumW2 / _dataClone->sumEntries() ;
366  Double_t extra= expectedW2 - sumW2*log(expected );
367 
368  // Double_t y = pdfClone->extendedTerm(sumW2, _dataClone->get()) - carry;
369 
370  Double_t y = extra - carry ;
371 
372  Double_t t = result + y;
373  carry = (t - result) - y;
374  result = t;
375  } else {
376  Double_t y = pdfClone->extendedTerm(_dataClone->sumEntries(), _dataClone->get()) - carry;
377  Double_t t = result + y;
378  carry = (t - result) - y;
379  result = t;
380  }
381  }
382  }
383 
384 
385  // If part of simultaneous PDF normalize probability over
386  // number of simultaneous PDFs: -sum(log(p/n)) = -sum(log(p)) + N*log(n)
387  if (_simCount>1) {
388  Double_t y = sumWeight*log(1.0*_simCount) - carry;
389  Double_t t = result + y;
390  carry = (t - result) - y;
391  result = t;
392  }
393 
394  //timer.Stop() ;
395  //cout << "RooNLLVar::evalPart(" << GetName() << ") SET=" << _setNum << " first=" << firstEvent << ", last=" << lastEvent << ", step=" << stepSize << ") result = " << result << " CPU = " << timer.CpuTime() << endl ;
396 
397  // At the end of the first full calculation, wire the caches
398  if (_first) {
399  _first = kFALSE ;
401  }
402 
403 
404  // Check if value offset flag is set.
405  if (_doOffset) {
406 
407  // If no offset is stored enable this feature now
408  if (_offset==0 && result !=0 ) {
409  coutI(Minimization) << "RooNLLVar::evaluatePartition(" << GetName() << ") first = "<< firstEvent << " last = " << lastEvent << " Likelihood offset now set to " << result << std::endl ;
410  _offset = result ;
411  _offsetCarry = carry;
412  }
413 
414  // Substract offset
415  Double_t y = -_offset - (carry + _offsetCarry);
416  Double_t t = result + y;
417  carry = (t - result) - y;
418  result = t;
419  }
420 
421 
422  _evalCarry = carry;
423  return result ;
424 }
425 
426 
427 
428 
virtual Double_t sumEntries() const =0
virtual Double_t getMin(const char *name=0) const
void setAttribute(const Text_t *name, Bool_t value=kTRUE)
Set (default) or clear a named boolean attribute of this object.
Definition: RooAbsArg.cxx:240
virtual const char * GetName() const
Returns name of object.
Definition: TNamed.h:47
Double_t _offsetCarry
Offset.
virtual Double_t getMax(const char *name=0) const
virtual Double_t evaluatePartition(Int_t firstEvent, Int_t lastEvent, Int_t stepSize) const
Calculate and return likelihood on subset of data from firstEvent to lastEvent processed with a step ...
Definition: RooNLLVar.cxx:244
GOFOpMode _gofOpMode
Is object initialized.
RooArgSet * getObservables(const RooArgSet &set, Bool_t valueOnly=kTRUE) const
Definition: RooAbsArg.h:194
virtual const RooArgSet * get() const
Definition: RooAbsData.h:79
void swap(TDirectoryEntry &e1, TDirectoryEntry &e2) noexcept
#define coutI(a)
Definition: RooMsgService.h:31
Double_t getVal(const RooArgSet *set=0) const
Definition: RooAbsReal.h:64
Double_t _offsetSaveW2
Definition: RooNLLVar.h:71
RooRealSumPdf * _binnedPdf
Definition: RooNLLVar.h:75
#define N
virtual Double_t getLogVal(const RooArgSet *set=0) const
Return the log of the current value with given normalization An error message is printed if the argum...
Definition: RooAbsPdf.cxx:607
int Int_t
Definition: RtypesCore.h:41
bool Bool_t
Definition: RtypesCore.h:59
void allowUndefined(Bool_t flag=kTRUE)
Definition: RooCmdConfig.h:39
Bool_t _weightSq
Definition: RooNLLVar.h:69
Class RooRealSumPdf implements a PDF constructed from a sum of functions:
Definition: RooRealSumPdf.h:24
void applyWeightSquared(Bool_t flag)
Definition: RooNLLVar.cxx:218
Bool_t _first
Definition: RooNLLVar.h:70
virtual Double_t weightSquared() const =0
void setValueDirty() const
Definition: RooAbsArg.h:441
Bool_t process(const RooCmdArg &arg)
Process given RooCmdArg.
pRooAbsTestStatistic * _gofArray
Double_t _evalCarry
avoids loss of precision
std::vector< Double_t > _binw
Definition: RooNLLVar.h:74
void wireAllCaches()
Definition: RooAbsArg.cxx:2363
virtual Double_t expectedEvents(const RooArgSet *nset) const
Return expected number of events from this p.d.f for use in extended likelihood calculations.
Definition: RooAbsPdf.cxx:2953
Bool_t defineInt(const char *name, const char *argName, Int_t intNum, Int_t defValue=0)
Define integer property name &#39;name&#39; mapped to integer in slot &#39;intNum&#39; in RooCmdArg with name argName...
RooRealVar represents a fundamental (non-derived) real valued object.
Definition: RooRealVar.h:36
Int_t getSize() const
Class RooNLLVar implements a a -log(likelihood) calculation from a dataset and a PDF.
Definition: RooNLLVar.h:26
VecExpr< UnaryOp< Fabs< T >, VecExpr< A, T, D >, T >, T, D > fabs(const VecExpr< A, T, D > &rhs)
Int_t getInt(const char *name, Int_t defaultValue=0)
Return integer property registered with name &#39;name&#39;.
virtual std::list< Double_t > * binBoundaries(RooAbsRealLValue &, Double_t, Double_t) const
RooAbsArg * first() const
void logEvalError(const char *message, const char *serverValueString=0) const
Log evaluation error message.
char * Form(const char *fmt,...)
Class RooCmdConfig is a configurable parser for RooCmdArg named arguments.
Definition: RooCmdConfig.h:27
RooAbsData is the common abstract base class for binned and unbinned datasets.
Definition: RooAbsData.h:37
virtual ~RooNLLVar()
Destructor.
Definition: RooNLLVar.cxx:209
RooAbsDataStore * store()
Definition: RooAbsData.h:55
const Bool_t kFALSE
Definition: RtypesCore.h:88
static RooArgSet _emptySet
Definition: RooNLLVar.h:65
Bool_t _extended
Definition: RooNLLVar.h:67
#define ClassImp(name)
Definition: Rtypes.h:359
virtual Bool_t valid() const
Definition: RooAbsData.h:85
double Double_t
Definition: RtypesCore.h:55
Int_t _nGof
Number of designated set to calculated extended term.
Double_t y[n]
Definition: legend1.C:17
you should not use this method at all Int_t Int_t Double_t Double_t Double_t e
Definition: TRolke.cxx:630
RooNLLVar()
Definition: RooNLLVar.h:30
RooAbsPdf is the abstract interface for all probability density functions The class provides hybrid a...
Definition: RooAbsPdf.h:41
virtual Double_t weight() const =0
Double_t LnGamma(Double_t z)
Computation of ln[gamma(z)] for all z.
Definition: TMath.cxx:486
static constexpr double pc
Double_t _offsetCarrySaveW2
Definition: RooNLLVar.h:72
virtual void recalculateCache(const RooArgSet *, Int_t, Int_t, Int_t, Bool_t)
const Bool_t kTRUE
Definition: RtypesCore.h:87
virtual Double_t extendedTerm(Double_t observedEvents, const RooArgSet *nset=0) const
Returned the extended likelihood term (Nexpect - Nobserved*log(NExpected) of this PDF for the given n...
Definition: RooAbsPdf.cxx:647
char name[80]
Definition: TGX11.cxx:109
double log(double)
virtual Int_t numEntries() const
Definition: RooAbsData.cxx:285
RooAbsOptTestStatistic is the abstract base class for test statistics objects that evaluate a functio...
RooCmdArg is a named container for two doubles, two integers two object points and three string point...
Definition: RooCmdArg.h:27
Int_t _nCPU
GOF MP Split mode specified by component (when Auto is active)