ROOT  6.06/09
Reference Guide
RooNLLVar.cxx
Go to the documentation of this file.
1 /*****************************************************************************
2  * Project: RooFit *
3  * Package: RooFitCore *
4  * @(#)root/roofitcore:$Id$
5  * Authors: *
6  * WV, Wouter Verkerke, UC Santa Barbara, verkerke@slac.stanford.edu *
7  * DK, David Kirkby, UC Irvine, dkirkby@uci.edu *
8  * *
9  * Copyright (c) 2000-2005, Regents of the University of California *
10  * and Stanford University. All rights reserved. *
11  * *
12  * Redistribution and use in source and binary forms, *
13  * with or without modification, are permitted according to the terms *
14  * listed in LICENSE (http://roofit.sourceforge.net/license.txt) *
15  *****************************************************************************/
16 
17 //////////////////////////////////////////////////////////////////////////////
18 //
19 // BEGIN_HTML
20 // Class RooNLLVar implements a a -log(likelihood) calculation from a dataset
21 // and a PDF. The NLL is calculated as
22 // <pre>
23 // Sum[data] -log( pdf(x_data) )
24 // </pre>
25 // In extended mode, a (Nexpect - Nobserved*log(NExpected) term is added
26 // END_HTML
27 //
28 
29 #include <algorithm>
30 
31 #include "RooFit.h"
32 #include "Riostream.h"
33 #include "TMath.h"
34 
35 #include "RooNLLVar.h"
36 #include "RooAbsData.h"
37 #include "RooAbsPdf.h"
38 #include "RooCmdConfig.h"
39 #include "RooMsgService.h"
40 #include "RooAbsDataStore.h"
41 #include "RooRealMPFE.h"
42 #include "RooRealSumPdf.h"
43 #include "RooRealVar.h"
44 #include "RooProdPdf.h"
45 
47 ;
48 
50 
51 
52 ////////////////////////////////////////////////////////////////////////////////
53 /// Construct likelihood from given p.d.f and (binned or unbinned dataset)
54 ///
55 /// Extended() -- Include extended term in calculation
56 /// NumCPU() -- Activate parallel processing feature
57 /// Range() -- Fit only selected region
58 /// SumCoefRange() -- Set the range in which to interpret the coefficients of RooAddPdf components
59 /// SplitRange() -- Fit range is split by index catory of simultaneous PDF
60 /// ConditionalObservables() -- Define conditional observables
61 /// Verbose() -- Verbose output of GOF framework classes
62 /// CloneData() -- Clone input dataset for internal use (default is kTRUE)
63 
64 RooNLLVar::RooNLLVar(const char *name, const char* title, RooAbsPdf& pdf, RooAbsData& indata,
65  const RooCmdArg& arg1, const RooCmdArg& arg2,const RooCmdArg& arg3,
66  const RooCmdArg& arg4, const RooCmdArg& arg5,const RooCmdArg& arg6,
67  const RooCmdArg& arg7, const RooCmdArg& arg8,const RooCmdArg& arg9) :
68  RooAbsOptTestStatistic(name,title,pdf,indata,
69  *(const RooArgSet*)RooCmdConfig::decodeObjOnTheFly("RooNLLVar::RooNLLVar","ProjectedObservables",0,&_emptySet
70  ,arg1,arg2,arg3,arg4,arg5,arg6,arg7,arg8,arg9),
71  RooCmdConfig::decodeStringOnTheFly("RooNLLVar::RooNLLVar","RangeWithName",0,"",arg1,arg2,arg3,arg4,arg5,arg6,arg7,arg8,arg9),
72  RooCmdConfig::decodeStringOnTheFly("RooNLLVar::RooNLLVar","AddCoefRange",0,"",arg1,arg2,arg3,arg4,arg5,arg6,arg7,arg8,arg9),
73  RooCmdConfig::decodeIntOnTheFly("RooNLLVar::RooNLLVar","NumCPU",0,1,arg1,arg2,arg3,arg4,arg5,arg6,arg7,arg8,arg9),
75  RooCmdConfig::decodeIntOnTheFly("RooNLLVar::RooNLLVar","Verbose",0,1,arg1,arg2,arg3,arg4,arg5,arg6,arg7,arg8,arg9),
76  RooCmdConfig::decodeIntOnTheFly("RooNLLVar::RooNLLVar","SplitRange",0,0,arg1,arg2,arg3,arg4,arg5,arg6,arg7,arg8,arg9),
77  RooCmdConfig::decodeIntOnTheFly("RooNLLVar::RooNLLVar","CloneData",0,1,arg1,arg2,arg3,arg4,arg5,arg6,arg7,arg8,arg9))
78 {
79  RooCmdConfig pc("RooNLLVar::RooNLLVar") ;
80  pc.allowUndefined() ;
81  pc.defineInt("extended","Extended",0,kFALSE) ;
82 
83  pc.process(arg1) ; pc.process(arg2) ; pc.process(arg3) ;
84  pc.process(arg4) ; pc.process(arg5) ; pc.process(arg6) ;
85  pc.process(arg7) ; pc.process(arg8) ; pc.process(arg9) ;
86 
87  _extended = pc.getInt("extended") ;
88  _weightSq = kFALSE ;
89  _first = kTRUE ;
90  _offset = 0.;
91  _offsetCarry = 0.;
92  _offsetSaveW2 = 0.;
93  _offsetCarrySaveW2 = 0.;
94 
95  _binnedPdf = 0 ;
96 }
97 
98 
99 
100 ////////////////////////////////////////////////////////////////////////////////
101 /// Construct likelihood from given p.d.f and (binned or unbinned dataset)
102 /// For internal use.
103 
104 RooNLLVar::RooNLLVar(const char *name, const char *title, RooAbsPdf& pdf, RooAbsData& indata,
105  Bool_t extended, const char* rangeName, const char* addCoefRangeName,
106  Int_t nCPU, RooFit::MPSplit interleave, Bool_t verbose, Bool_t splitRange, Bool_t cloneData, Bool_t binnedL) :
107  RooAbsOptTestStatistic(name,title,pdf,indata,RooArgSet(),rangeName,addCoefRangeName,nCPU,interleave,verbose,splitRange,cloneData),
108  _extended(extended),
109  _weightSq(kFALSE),
110  _first(kTRUE), _offsetSaveW2(0.), _offsetCarrySaveW2(0.)
111 {
112  // If binned likelihood flag is set, pdf is a RooRealSumPdf representing a yield vector
113  // for a binned likelihood calculation
114  _binnedPdf = binnedL ? (RooRealSumPdf*)_funcClone : 0 ;
115 
116  // Retrieve and cache bin widths needed to convert unnormalized binnedPdf values back to yields
117  if (_binnedPdf) {
118 
119  // The Active label will disable pdf integral calculations
120  _binnedPdf->setAttribute("BinnedLikelihoodActive") ;
121 
123  if (obs->getSize()!=1) {
124  _binnedPdf = 0 ;
125  } else {
126  RooRealVar* var = (RooRealVar*) obs->first() ;
127  std::list<Double_t>* boundaries = _binnedPdf->binBoundaries(*var,var->getMin(),var->getMax()) ;
128  std::list<Double_t>::iterator biter = boundaries->begin() ;
129  _binw.resize(boundaries->size()-1) ;
130  Double_t lastBound = (*biter) ;
131  biter++ ;
132  int ibin=0 ;
133  while (biter!=boundaries->end()) {
134  _binw[ibin] = (*biter) - lastBound ;
135  lastBound = (*biter) ;
136  ibin++ ;
137  biter++ ;
138  }
139  }
140  }
141 }
142 
143 
144 
145 ////////////////////////////////////////////////////////////////////////////////
146 /// Construct likelihood from given p.d.f and (binned or unbinned dataset)
147 /// For internal use.
148 
149 RooNLLVar::RooNLLVar(const char *name, const char *title, RooAbsPdf& pdf, RooAbsData& indata,
150  const RooArgSet& projDeps, Bool_t extended, const char* rangeName,const char* addCoefRangeName,
151  Int_t nCPU,RooFit::MPSplit interleave,Bool_t verbose, Bool_t splitRange, Bool_t cloneData, Bool_t binnedL) :
152  RooAbsOptTestStatistic(name,title,pdf,indata,projDeps,rangeName,addCoefRangeName,nCPU,interleave,verbose,splitRange,cloneData),
153  _extended(extended),
154  _weightSq(kFALSE),
155  _first(kTRUE), _offsetSaveW2(0.), _offsetCarrySaveW2(0.)
156 {
157  // If binned likelihood flag is set, pdf is a RooRealSumPdf representing a yield vector
158  // for a binned likelihood calculation
159  _binnedPdf = binnedL ? (RooRealSumPdf*)_funcClone : 0 ;
160 
161  // Retrieve and cache bin widths needed to convert unnormalized binnedPdf values back to yields
162  if (_binnedPdf) {
163 
165  if (obs->getSize()!=1) {
166  _binnedPdf = 0 ;
167  } else {
168  RooRealVar* var = (RooRealVar*) obs->first() ;
169  std::list<Double_t>* boundaries = _binnedPdf->binBoundaries(*var,var->getMin(),var->getMax()) ;
170  std::list<Double_t>::iterator biter = boundaries->begin() ;
171  _binw.resize(boundaries->size()-1) ;
172  Double_t lastBound = (*biter) ;
173  biter++ ;
174  int ibin=0 ;
175  while (biter!=boundaries->end()) {
176  _binw[ibin] = (*biter) - lastBound ;
177  lastBound = (*biter) ;
178  ibin++ ;
179  biter++ ;
180  }
181  }
182  }
183 }
184 
185 
186 
187 ////////////////////////////////////////////////////////////////////////////////
188 /// Copy constructor
189 
190 RooNLLVar::RooNLLVar(const RooNLLVar& other, const char* name) :
191  RooAbsOptTestStatistic(other,name),
192  _extended(other._extended),
193  _weightSq(other._weightSq),
194  _first(kTRUE), _offsetSaveW2(other._offsetSaveW2),
195  _offsetCarrySaveW2(other._offsetCarrySaveW2),
196  _binw(other._binw) {
198 }
199 
200 
201 
202 
203 ////////////////////////////////////////////////////////////////////////////////
204 /// Destructor
205 
207 {
208 }
209 
210 
211 
212 
213 ////////////////////////////////////////////////////////////////////////////////
214 
216 {
217  if (_gofOpMode==Slave) {
218  if (flag != _weightSq) {
219  _weightSq = flag;
222  }
223  setValueDirty();
224  } else if ( _gofOpMode==MPMaster) {
225  for (Int_t i=0 ; i<_nCPU ; i++)
226  _mpfeArray[i]->applyNLLWeightSquared(flag);
227  } else if ( _gofOpMode==SimMaster) {
228  for (Int_t i=0 ; i<_nGof ; i++)
229  ((RooNLLVar*)_gofArray[i])->applyWeightSquared(flag);
230  }
231 }
232 
233 
234 
235 ////////////////////////////////////////////////////////////////////////////////
236 /// Calculate and return likelihood on subset of data from firstEvent to lastEvent
237 /// processed with a step size of 'stepSize'. If this an extended likelihood and
238 /// and the zero event is processed the extended term is added to the return
239 /// likelihood.
240 
241 Double_t RooNLLVar::evaluatePartition(Int_t firstEvent, Int_t lastEvent, Int_t stepSize) const
242 {
243  // Throughout the calculation, we use Kahan's algorithm for summing to
244  // prevent loss of precision - this is a factor four more expensive than
245  // straight addition, but since evaluating the PDF is usually much more
246  // expensive than that, we tolerate the additional cost...
247  Int_t i ;
248  Double_t result(0), carry(0);
249 
250  RooAbsPdf* pdfClone = (RooAbsPdf*) _funcClone ;
251 
252  // cout << "RooNLLVar::evaluatePartition(" << GetName() << ") projDeps = " << (_projDeps?*_projDeps:RooArgSet()) << endl ;
253 
254  _dataClone->store()->recalculateCache( _projDeps, firstEvent, lastEvent, stepSize,(_binnedPdf?kFALSE:kTRUE) ) ;
255 
256  Double_t sumWeight(0), sumWeightCarry(0);
257 
258  // If pdf is marked as binned - do a binned likelihood calculation here (sum of log-Poisson for each bin)
259  if (_binnedPdf) {
260 
261  for (i=firstEvent ; i<lastEvent ; i+=stepSize) {
262 
263  _dataClone->get(i) ;
264 
265  if (!_dataClone->valid()) continue;
266 
267  Double_t eventWeight = _dataClone->weight();
268 
269 
270  // Calculate log(Poisson(N|mu) for this bin
271  Double_t N = eventWeight ;
272  Double_t mu = _binnedPdf->getVal()*_binw[i] ;
273  //cout << "RooNLLVar::binnedL(" << GetName() << ") N=" << N << " mu = " << mu << endl ;
274 
275  if (mu<=0 && N>0) {
276 
277  // Catch error condition: data present where zero events are predicted
278  logEvalError(Form("Observed %f events in bin %d with zero event yield",N,i)) ;
279 
280  } else if (fabs(mu)<1e-10 && fabs(N)<1e-10) {
281 
282  // Special handling of this case since log(Poisson(0,0)=0 but can't be calculated with usual log-formula
283  // since log(mu)=0. No update of result is required since term=0.
284 
285  } else {
286 
287  Double_t term = -1*(-mu + N*log(mu) - TMath::LnGamma(N+1)) ;
288 
289  // Kahan summation of sumWeight
290  Double_t y = eventWeight - sumWeightCarry;
291  Double_t t = sumWeight + y;
292  sumWeightCarry = (t - sumWeight) - y;
293  sumWeight = t;
294 
295  // Kahan summation of result
296  y = term - carry;
297  t = result + y;
298  carry = (t - result) - y;
299  result = t;
300  }
301  }
302 
303 
304  } else {
305 
306  for (i=firstEvent ; i<lastEvent ; i+=stepSize) {
307 
308  _dataClone->get(i) ;
309 
310  if (!_dataClone->valid()) continue;
311 
312  Double_t eventWeight = _dataClone->weight();
313  if (0. == eventWeight * eventWeight) continue ;
314  if (_weightSq) eventWeight = _dataClone->weightSquared() ;
315 
316  Double_t term = -eventWeight * pdfClone->getLogVal(_normSet);
317 
318 
319  Double_t y = eventWeight - sumWeightCarry;
320  Double_t t = sumWeight + y;
321  sumWeightCarry = (t - sumWeight) - y;
322  sumWeight = t;
323 
324  y = term - carry;
325  t = result + y;
326  carry = (t - result) - y;
327  result = t;
328  }
329 
330  // include the extended maximum likelihood term, if requested
331  if(_extended && _setNum==_extSet) {
332  if (_weightSq) {
333 
334  // Calculate sum of weights-squared here for extended term
335  Double_t sumW2(0), sumW2carry(0);
336  for (i=0 ; i<_dataClone->numEntries() ; i++) {
337  _dataClone->get(i);
338  Double_t y = _dataClone->weightSquared() - sumW2carry;
339  Double_t t = sumW2 + y;
340  sumW2carry = (t - sumW2) - y;
341  sumW2 = t;
342  }
343 
344  Double_t expected= pdfClone->expectedEvents(_dataClone->get());
345 
346  // Adjust calculation of extended term with W^2 weighting: adjust poisson such that
347  // estimate of Nexpected stays at the same value, but has a different variance, rescale
348  // both the observed and expected count of the Poisson with a factor sum[w] / sum[w^2] which is
349  // the effective weight of the Poisson term.
350  // i.e. change Poisson(Nobs = sum[w]| Nexp ) --> Poisson( sum[w] * sum[w] / sum[w^2] | Nexp * sum[w] / sum[w^2] )
351  // weighted by the effective weight sum[w^2]/ sum[w] in the likelihood.
352  // Since here we compute the likelihood with the weight square we need to multiply by the
353  // square of the effective weight
354  // expectedW = expected * sum[w] / sum[w^2] : effective expected entries
355  // observedW = sum[w] * sum[w] / sum[w^2] : effective observed entries
356  // The extended term for the likelihood weighted by the square of the weight will be then:
357  // (sum[w^2]/ sum[w] )^2 * expectedW - (sum[w^2]/ sum[w] )^2 * observedW * log (expectedW) and this is
358  // using the previous expressions for expectedW and observedW
359  // sum[w^2] / sum[w] * expected - sum[w^2] * log (expectedW)
360  // and since the weights are constants in the likelihood we can use log(expected) instead of log(expectedW)
361 
362  Double_t expectedW2 = expected * sumW2 / _dataClone->sumEntries() ;
363  Double_t extra= expectedW2 - sumW2*log(expected );
364 
365  // Double_t y = pdfClone->extendedTerm(sumW2, _dataClone->get()) - carry;
366 
367  Double_t y = extra - carry ;
368 
369  Double_t t = result + y;
370  carry = (t - result) - y;
371  result = t;
372  } else {
373  Double_t y = pdfClone->extendedTerm(_dataClone->sumEntries(), _dataClone->get()) - carry;
374  Double_t t = result + y;
375  carry = (t - result) - y;
376  result = t;
377  }
378  }
379  }
380 
381 
382  // If part of simultaneous PDF normalize probability over
383  // number of simultaneous PDFs: -sum(log(p/n)) = -sum(log(p)) + N*log(n)
384  if (_simCount>1) {
385  Double_t y = sumWeight*log(1.0*_simCount) - carry;
386  Double_t t = result + y;
387  carry = (t - result) - y;
388  result = t;
389  }
390 
391  //timer.Stop() ;
392  //cout << "RooNLLVar::evalPart(" << GetName() << ") SET=" << _setNum << " first=" << firstEvent << ", last=" << lastEvent << ", step=" << stepSize << ") result = " << result << " CPU = " << timer.CpuTime() << endl ;
393 
394  // At the end of the first full calculation, wire the caches
395  if (_first) {
396  _first = kFALSE ;
398  }
399 
400 
401  // Check if value offset flag is set.
402  if (_doOffset) {
403 
404  // If no offset is stored enable this feature now
405  if (_offset==0 && result !=0 ) {
406  coutI(Minimization) << "RooNLLVar::evaluatePartition(" << GetName() << ") first = "<< firstEvent << " last = " << lastEvent << " Likelihood offset now set to " << result << std::endl ;
407  _offset = result ;
408  _offsetCarry = carry;
409  }
410 
411  // Substract offset
412  Double_t y = -_offset - (carry + _offsetCarry);
413  Double_t t = result + y;
414  carry = (t - result) - y;
415  result = t;
416  }
417 
418 
419  _evalCarry = carry;
420  return result ;
421 }
422 
423 
424 
425 
virtual Double_t sumEntries() const =0
void setAttribute(const Text_t *name, Bool_t value=kTRUE)
Set (default) or clear a named boolean attribute of this object.
Definition: RooAbsArg.cxx:265
Double_t _offsetCarry
Offset.
GOFOpMode _gofOpMode
Is object initialized.
#define coutI(a)
Definition: RooMsgService.h:32
virtual Bool_t valid() const
Definition: RooAbsData.h:83
RooArgSet * getObservables(const RooArgSet &set, Bool_t valueOnly=kTRUE) const
Definition: RooAbsArg.h:194
Double_t _offsetSaveW2
Definition: RooNLLVar.h:71
void swap(ROOT::THist< DIMENSIONS, PRECISION > &a, ROOT::THist< DIMENSIONS, PRECISION > &b) noexcept
Swap two histograms.
Definition: THist.h:196
RooRealSumPdf * _binnedPdf
Definition: RooNLLVar.h:75
#define N
virtual Double_t getMin(const char *name=0) const
int Int_t
Definition: RtypesCore.h:41
bool Bool_t
Definition: RtypesCore.h:59
const Bool_t kFALSE
Definition: Rtypes.h:92
void allowUndefined(Bool_t flag=kTRUE)
Definition: RooCmdConfig.h:39
Bool_t _weightSq
Definition: RooNLLVar.h:69
virtual Double_t expectedEvents(const RooArgSet *nset) const
Return expected number of events from this p.d.f for use in extended likelihood calculations.
Definition: RooAbsPdf.cxx:2930
virtual Double_t evaluatePartition(Int_t firstEvent, Int_t lastEvent, Int_t stepSize) const
Calculate and return likelihood on subset of data from firstEvent to lastEvent processed with a step ...
Definition: RooNLLVar.cxx:241
void applyWeightSquared(Bool_t flag)
Definition: RooNLLVar.cxx:215
Bool_t _first
Definition: RooNLLVar.h:70
virtual Double_t weightSquared() const =0
RooAbsArg * first() const
Bool_t process(const RooCmdArg &arg)
Process given RooCmdArg.
pRooAbsTestStatistic * _gofArray
Double_t _evalCarry
avoids loss of precision
std::vector< Double_t > _binw
Definition: RooNLLVar.h:74
if on multiple lines(like in C++).**The" * configuration fragment. * * The "import myobject continue
Parses the configuration file.
Definition: HLFactory.cxx:368
virtual Double_t getLogVal(const RooArgSet *set=0) const
Return the log of the current value with given normalization An error message is printed if the argum...
Definition: RooAbsPdf.cxx:606
Double_t getVal(const RooArgSet *set=0) const
Definition: RooAbsReal.h:64
void wireAllCaches()
Definition: RooAbsArg.cxx:2371
Bool_t defineInt(const char *name, const char *argName, Int_t intNum, Int_t defValue=0)
Define integer property name 'name' mapped to integer in slot 'intNum' in RooCmdArg with name argName...
virtual const RooArgSet * get() const
Definition: RooAbsData.h:77
VecExpr< UnaryOp< Fabs< T >, VecExpr< A, T, D >, T >, T, D > fabs(const VecExpr< A, T, D > &rhs)
Int_t getInt(const char *name, Int_t defaultValue=0)
Return integer property registered with name 'name'.
virtual Int_t numEntries() const
Definition: RooAbsData.cxx:291
bool verbose
char * Form(const char *fmt,...)
virtual const char * GetName() const
Returns name of object.
Definition: TNamed.h:51
virtual ~RooNLLVar()
Destructor.
Definition: RooNLLVar.cxx:206
RooAbsDataStore * store()
Definition: RooAbsData.h:55
static RooArgSet _emptySet
Definition: RooNLLVar.h:65
Bool_t _extended
Definition: RooNLLVar.h:67
virtual Double_t extendedTerm(Double_t observedEvents, const RooArgSet *nset=0) const
Returned the extended likelihood term (Nexpect - Nobserved*log(NExpected) of this PDF for the given n...
Definition: RooAbsPdf.cxx:646
double Double_t
Definition: RtypesCore.h:55
Int_t _nGof
Number of designated set to calculated extended term.
Double_t y[n]
Definition: legend1.C:17
void setValueDirty() const
Definition: RooAbsArg.h:439
RooNLLVar()
Definition: RooNLLVar.h:30
#define name(a, b)
Definition: linkTestLib0.cpp:5
virtual Double_t getMax(const char *name=0) const
RooAbsPdf is the abstract interface for all probability density functions The class provides hybrid a...
Definition: RooAbsPdf.h:41
virtual Double_t weight() const =0
Double_t LnGamma(Double_t z)
Computation of ln[gamma(z)] for all z.
Definition: TMath.cxx:490
double result[121]
virtual std::list< Double_t > * binBoundaries(RooAbsRealLValue &, Double_t, Double_t) const
Int_t getSize() const
Double_t _offsetCarrySaveW2
Definition: RooNLLVar.h:72
virtual void recalculateCache(const RooArgSet *, Int_t, Int_t, Int_t, Bool_t)
const Bool_t kTRUE
Definition: Rtypes.h:91
void logEvalError(const char *message, const char *serverValueString=0) const
Log evaluation error message.
ClassImp(RooNLLVar)
double log(double)
Int_t _nCPU
GOF MP Split mode specified by component (when Auto is active)