Logo ROOT   6.16/01
Reference Guide
RooNLLVar.cxx
Go to the documentation of this file.
1/*****************************************************************************
2 * Project: RooFit *
3 * Package: RooFitCore *
4 * @(#)root/roofitcore:$Id$
5 * Authors: *
6 * WV, Wouter Verkerke, UC Santa Barbara, verkerke@slac.stanford.edu *
7 * DK, David Kirkby, UC Irvine, dkirkby@uci.edu *
8 * *
9 * Copyright (c) 2000-2005, Regents of the University of California *
10 * and Stanford University. All rights reserved. *
11 * *
12 * Redistribution and use in source and binary forms, *
13 * with or without modification, are permitted according to the terms *
14 * listed in LICENSE (http://roofit.sourceforge.net/license.txt) *
15 *****************************************************************************/
16
17/**
18\file RooNLLVar.cxx
19\class RooNLLVar
20\ingroup Roofitcore
21
22Class RooNLLVar implements a a -log(likelihood) calculation from a dataset
23and a PDF. The NLL is calculated as
24<pre>
25 Sum[data] -log( pdf(x_data) )
26</pre>
27In extended mode, a (Nexpect - Nobserved*log(NExpected) term is added
28**/
29
30#include <algorithm>
31
32#include "RooFit.h"
33#include "Riostream.h"
34#include "TMath.h"
35
36#include "RooNLLVar.h"
37#include "RooAbsData.h"
38#include "RooAbsPdf.h"
39#include "RooCmdConfig.h"
40#include "RooMsgService.h"
41#include "RooAbsDataStore.h"
42#include "RooRealMPFE.h"
43#include "RooRealSumPdf.h"
44#include "RooRealVar.h"
45#include "RooProdPdf.h"
46
48;
49
51
52
53////////////////////////////////////////////////////////////////////////////////
54/// Construct likelihood from given p.d.f and (binned or unbinned dataset)
55///
56/// Argument | Description
57/// -------------------------|------------
58/// Extended() | Include extended term in calculation
59/// NumCPU() | Activate parallel processing feature
60/// Range() | Fit only selected region
61/// SumCoefRange() | Set the range in which to interpret the coefficients of RooAddPdf components
62/// SplitRange() | Fit range is split by index catory of simultaneous PDF
63/// ConditionalObservables() | Define conditional observables
64/// Verbose() | Verbose output of GOF framework classes
65/// CloneData() | Clone input dataset for internal use (default is kTRUE)
66
67RooNLLVar::RooNLLVar(const char *name, const char* title, RooAbsPdf& pdf, RooAbsData& indata,
68 const RooCmdArg& arg1, const RooCmdArg& arg2,const RooCmdArg& arg3,
69 const RooCmdArg& arg4, const RooCmdArg& arg5,const RooCmdArg& arg6,
70 const RooCmdArg& arg7, const RooCmdArg& arg8,const RooCmdArg& arg9) :
71 RooAbsOptTestStatistic(name,title,pdf,indata,
72 *(const RooArgSet*)RooCmdConfig::decodeObjOnTheFly("RooNLLVar::RooNLLVar","ProjectedObservables",0,&_emptySet
73 ,arg1,arg2,arg3,arg4,arg5,arg6,arg7,arg8,arg9),
74 RooCmdConfig::decodeStringOnTheFly("RooNLLVar::RooNLLVar","RangeWithName",0,"",arg1,arg2,arg3,arg4,arg5,arg6,arg7,arg8,arg9),
75 RooCmdConfig::decodeStringOnTheFly("RooNLLVar::RooNLLVar","AddCoefRange",0,"",arg1,arg2,arg3,arg4,arg5,arg6,arg7,arg8,arg9),
76 RooCmdConfig::decodeIntOnTheFly("RooNLLVar::RooNLLVar","NumCPU",0,1,arg1,arg2,arg3,arg4,arg5,arg6,arg7,arg8,arg9),
78 RooCmdConfig::decodeIntOnTheFly("RooNLLVar::RooNLLVar","Verbose",0,1,arg1,arg2,arg3,arg4,arg5,arg6,arg7,arg8,arg9),
79 RooCmdConfig::decodeIntOnTheFly("RooNLLVar::RooNLLVar","SplitRange",0,0,arg1,arg2,arg3,arg4,arg5,arg6,arg7,arg8,arg9),
80 RooCmdConfig::decodeIntOnTheFly("RooNLLVar::RooNLLVar","CloneData",0,1,arg1,arg2,arg3,arg4,arg5,arg6,arg7,arg8,arg9))
81{
82 RooCmdConfig pc("RooNLLVar::RooNLLVar") ;
83 pc.allowUndefined() ;
84 pc.defineInt("extended","Extended",0,kFALSE) ;
85
86 pc.process(arg1) ; pc.process(arg2) ; pc.process(arg3) ;
87 pc.process(arg4) ; pc.process(arg5) ; pc.process(arg6) ;
88 pc.process(arg7) ; pc.process(arg8) ; pc.process(arg9) ;
89
90 _extended = pc.getInt("extended") ;
92 _first = kTRUE ;
93 _offset = 0.;
94 _offsetCarry = 0.;
95 _offsetSaveW2 = 0.;
97
98 _binnedPdf = 0 ;
99}
100
101
102
103////////////////////////////////////////////////////////////////////////////////
104/// Construct likelihood from given p.d.f and (binned or unbinned dataset)
105/// For internal use.
106
107RooNLLVar::RooNLLVar(const char *name, const char *title, RooAbsPdf& pdf, RooAbsData& indata,
108 Bool_t extended, const char* rangeName, const char* addCoefRangeName,
109 Int_t nCPU, RooFit::MPSplit interleave, Bool_t verbose, Bool_t splitRange, Bool_t cloneData, Bool_t binnedL) :
110 RooAbsOptTestStatistic(name,title,pdf,indata,RooArgSet(),rangeName,addCoefRangeName,nCPU,interleave,verbose,splitRange,cloneData),
111 _extended(extended),
112 _weightSq(kFALSE),
113 _first(kTRUE), _offsetSaveW2(0.), _offsetCarrySaveW2(0.)
114{
115 // If binned likelihood flag is set, pdf is a RooRealSumPdf representing a yield vector
116 // for a binned likelihood calculation
117 _binnedPdf = binnedL ? (RooRealSumPdf*)_funcClone : 0 ;
118
119 // Retrieve and cache bin widths needed to convert unnormalized binnedPdf values back to yields
120 if (_binnedPdf) {
121
122 // The Active label will disable pdf integral calculations
123 _binnedPdf->setAttribute("BinnedLikelihoodActive") ;
124
126 if (obs->getSize()!=1) {
127 _binnedPdf = 0 ;
128 } else {
129 RooRealVar* var = (RooRealVar*) obs->first() ;
130 std::list<Double_t>* boundaries = _binnedPdf->binBoundaries(*var,var->getMin(),var->getMax()) ;
131 std::list<Double_t>::iterator biter = boundaries->begin() ;
132 _binw.resize(boundaries->size()-1) ;
133 Double_t lastBound = (*biter) ;
134 ++biter ;
135 int ibin=0 ;
136 while (biter!=boundaries->end()) {
137 _binw[ibin] = (*biter) - lastBound ;
138 lastBound = (*biter) ;
139 ibin++ ;
140 ++biter ;
141 }
142 }
143 }
144}
145
146
147
148////////////////////////////////////////////////////////////////////////////////
149/// Construct likelihood from given p.d.f and (binned or unbinned dataset)
150/// For internal use.
151
152RooNLLVar::RooNLLVar(const char *name, const char *title, RooAbsPdf& pdf, RooAbsData& indata,
153 const RooArgSet& projDeps, Bool_t extended, const char* rangeName,const char* addCoefRangeName,
154 Int_t nCPU,RooFit::MPSplit interleave,Bool_t verbose, Bool_t splitRange, Bool_t cloneData, Bool_t binnedL) :
155 RooAbsOptTestStatistic(name,title,pdf,indata,projDeps,rangeName,addCoefRangeName,nCPU,interleave,verbose,splitRange,cloneData),
156 _extended(extended),
157 _weightSq(kFALSE),
158 _first(kTRUE), _offsetSaveW2(0.), _offsetCarrySaveW2(0.)
159{
160 // If binned likelihood flag is set, pdf is a RooRealSumPdf representing a yield vector
161 // for a binned likelihood calculation
162 _binnedPdf = binnedL ? (RooRealSumPdf*)_funcClone : 0 ;
163
164 // Retrieve and cache bin widths needed to convert unnormalized binnedPdf values back to yields
165 if (_binnedPdf) {
166
168 if (obs->getSize()!=1) {
169 _binnedPdf = 0 ;
170 } else {
171 RooRealVar* var = (RooRealVar*) obs->first() ;
172 std::list<Double_t>* boundaries = _binnedPdf->binBoundaries(*var,var->getMin(),var->getMax()) ;
173 std::list<Double_t>::iterator biter = boundaries->begin() ;
174 _binw.resize(boundaries->size()-1) ;
175 Double_t lastBound = (*biter) ;
176 ++biter ;
177 int ibin=0 ;
178 while (biter!=boundaries->end()) {
179 _binw[ibin] = (*biter) - lastBound ;
180 lastBound = (*biter) ;
181 ibin++ ;
182 ++biter ;
183 }
184 }
185 }
186}
187
188
189
190////////////////////////////////////////////////////////////////////////////////
191/// Copy constructor
192
193RooNLLVar::RooNLLVar(const RooNLLVar& other, const char* name) :
195 _extended(other._extended),
196 _weightSq(other._weightSq),
197 _first(kTRUE), _offsetSaveW2(other._offsetSaveW2),
198 _offsetCarrySaveW2(other._offsetCarrySaveW2),
199 _binw(other._binw) {
201}
202
203
204
205
206////////////////////////////////////////////////////////////////////////////////
207/// Destructor
208
210{
211}
212
213
214
215
216////////////////////////////////////////////////////////////////////////////////
217
219{
220 if (_gofOpMode==Slave) {
221 if (flag != _weightSq) {
222 _weightSq = flag;
225 }
227 } else if ( _gofOpMode==MPMaster) {
228 for (Int_t i=0 ; i<_nCPU ; i++)
229 _mpfeArray[i]->applyNLLWeightSquared(flag);
230 } else if ( _gofOpMode==SimMaster) {
231 for (Int_t i=0 ; i<_nGof ; i++)
232 ((RooNLLVar*)_gofArray[i])->applyWeightSquared(flag);
233 }
234}
235
236
237
238////////////////////////////////////////////////////////////////////////////////
239/// Calculate and return likelihood on subset of data from firstEvent to lastEvent
240/// processed with a step size of 'stepSize'. If this an extended likelihood and
241/// and the zero event is processed the extended term is added to the return
242/// likelihood.
243
244Double_t RooNLLVar::evaluatePartition(Int_t firstEvent, Int_t lastEvent, Int_t stepSize) const
245{
246 // Throughout the calculation, we use Kahan's algorithm for summing to
247 // prevent loss of precision - this is a factor four more expensive than
248 // straight addition, but since evaluating the PDF is usually much more
249 // expensive than that, we tolerate the additional cost...
250 Int_t i ;
251 Double_t result(0), carry(0);
252
253 RooAbsPdf* pdfClone = (RooAbsPdf*) _funcClone ;
254
255 // cout << "RooNLLVar::evaluatePartition(" << GetName() << ") projDeps = " << (_projDeps?*_projDeps:RooArgSet()) << endl ;
256
257 _dataClone->store()->recalculateCache( _projDeps, firstEvent, lastEvent, stepSize,(_binnedPdf?kFALSE:kTRUE) ) ;
258
259 Double_t sumWeight(0), sumWeightCarry(0);
260
261 // If pdf is marked as binned - do a binned likelihood calculation here (sum of log-Poisson for each bin)
262 if (_binnedPdf) {
263
264 for (i=firstEvent ; i<lastEvent ; i+=stepSize) {
265
266 _dataClone->get(i) ;
267
268 if (!_dataClone->valid()) continue;
269
270 Double_t eventWeight = _dataClone->weight();
271
272
273 // Calculate log(Poisson(N|mu) for this bin
274 Double_t N = eventWeight ;
275 Double_t mu = _binnedPdf->getVal()*_binw[i] ;
276 //cout << "RooNLLVar::binnedL(" << GetName() << ") N=" << N << " mu = " << mu << endl ;
277
278 if (mu<=0 && N>0) {
279
280 // Catch error condition: data present where zero events are predicted
281 logEvalError(Form("Observed %f events in bin %d with zero event yield",N,i)) ;
282
283 } else if (fabs(mu)<1e-10 && fabs(N)<1e-10) {
284
285 // Special handling of this case since log(Poisson(0,0)=0 but can't be calculated with usual log-formula
286 // since log(mu)=0. No update of result is required since term=0.
287
288 } else {
289
290 Double_t term = -1*(-mu + N*log(mu) - TMath::LnGamma(N+1)) ;
291
292 // Kahan summation of sumWeight
293 Double_t y = eventWeight - sumWeightCarry;
294 Double_t t = sumWeight + y;
295 sumWeightCarry = (t - sumWeight) - y;
296 sumWeight = t;
297
298 // Kahan summation of result
299 y = term - carry;
300 t = result + y;
301 carry = (t - result) - y;
302 result = t;
303 }
304 }
305
306
307 } else {
308
309 for (i=firstEvent ; i<lastEvent ; i+=stepSize) {
310
311 _dataClone->get(i) ;
312
313 if (!_dataClone->valid()) continue;
314
315 Double_t eventWeight = _dataClone->weight();
316 if (0. == eventWeight * eventWeight) continue ;
317 if (_weightSq) eventWeight = _dataClone->weightSquared() ;
318
319 Double_t term = -eventWeight * pdfClone->getLogVal(_normSet);
320
321
322 Double_t y = eventWeight - sumWeightCarry;
323 Double_t t = sumWeight + y;
324 sumWeightCarry = (t - sumWeight) - y;
325 sumWeight = t;
326
327 y = term - carry;
328 t = result + y;
329 carry = (t - result) - y;
330 result = t;
331 }
332
333 // include the extended maximum likelihood term, if requested
334 if(_extended && _setNum==_extSet) {
335 if (_weightSq) {
336
337 // Calculate sum of weights-squared here for extended term
338 Double_t sumW2(0), sumW2carry(0);
339 for (i=0 ; i<_dataClone->numEntries() ; i++) {
340 _dataClone->get(i);
341 Double_t y = _dataClone->weightSquared() - sumW2carry;
342 Double_t t = sumW2 + y;
343 sumW2carry = (t - sumW2) - y;
344 sumW2 = t;
345 }
346
347 Double_t expected= pdfClone->expectedEvents(_dataClone->get());
348
349 // Adjust calculation of extended term with W^2 weighting: adjust poisson such that
350 // estimate of Nexpected stays at the same value, but has a different variance, rescale
351 // both the observed and expected count of the Poisson with a factor sum[w] / sum[w^2] which is
352 // the effective weight of the Poisson term.
353 // i.e. change Poisson(Nobs = sum[w]| Nexp ) --> Poisson( sum[w] * sum[w] / sum[w^2] | Nexp * sum[w] / sum[w^2] )
354 // weighted by the effective weight sum[w^2]/ sum[w] in the likelihood.
355 // Since here we compute the likelihood with the weight square we need to multiply by the
356 // square of the effective weight
357 // expectedW = expected * sum[w] / sum[w^2] : effective expected entries
358 // observedW = sum[w] * sum[w] / sum[w^2] : effective observed entries
359 // The extended term for the likelihood weighted by the square of the weight will be then:
360 // (sum[w^2]/ sum[w] )^2 * expectedW - (sum[w^2]/ sum[w] )^2 * observedW * log (expectedW) and this is
361 // using the previous expressions for expectedW and observedW
362 // sum[w^2] / sum[w] * expected - sum[w^2] * log (expectedW)
363 // and since the weights are constants in the likelihood we can use log(expected) instead of log(expectedW)
364
365 Double_t expectedW2 = expected * sumW2 / _dataClone->sumEntries() ;
366 Double_t extra= expectedW2 - sumW2*log(expected );
367
368 // Double_t y = pdfClone->extendedTerm(sumW2, _dataClone->get()) - carry;
369
370 Double_t y = extra - carry ;
371
372 Double_t t = result + y;
373 carry = (t - result) - y;
374 result = t;
375 } else {
376 Double_t y = pdfClone->extendedTerm(_dataClone->sumEntries(), _dataClone->get()) - carry;
377 Double_t t = result + y;
378 carry = (t - result) - y;
379 result = t;
380 }
381 }
382 }
383
384
385 // If part of simultaneous PDF normalize probability over
386 // number of simultaneous PDFs: -sum(log(p/n)) = -sum(log(p)) + N*log(n)
387 if (_simCount>1) {
388 Double_t y = sumWeight*log(1.0*_simCount) - carry;
389 Double_t t = result + y;
390 carry = (t - result) - y;
391 result = t;
392 }
393
394 //timer.Stop() ;
395 //cout << "RooNLLVar::evalPart(" << GetName() << ") SET=" << _setNum << " first=" << firstEvent << ", last=" << lastEvent << ", step=" << stepSize << ") result = " << result << " CPU = " << timer.CpuTime() << endl ;
396
397 // At the end of the first full calculation, wire the caches
398 if (_first) {
399 _first = kFALSE ;
401 }
402
403
404 // Check if value offset flag is set.
405 if (_doOffset) {
406
407 // If no offset is stored enable this feature now
408 if (_offset==0 && result !=0 ) {
409 coutI(Minimization) << "RooNLLVar::evaluatePartition(" << GetName() << ") first = "<< firstEvent << " last = " << lastEvent << " Likelihood offset now set to " << result << std::endl ;
410 _offset = result ;
411 _offsetCarry = carry;
412 }
413
414 // Substract offset
415 Double_t y = -_offset - (carry + _offsetCarry);
416 Double_t t = result + y;
417 carry = (t - result) - y;
418 result = t;
419 }
420
421
422 _evalCarry = carry;
423 return result ;
424}
425
426
427
428
#define e(i)
Definition: RSha256.hxx:103
#define coutI(a)
Definition: RooMsgService.h:31
int Int_t
Definition: RtypesCore.h:41
const Bool_t kFALSE
Definition: RtypesCore.h:88
bool Bool_t
Definition: RtypesCore.h:59
double Double_t
Definition: RtypesCore.h:55
const Bool_t kTRUE
Definition: RtypesCore.h:87
#define ClassImp(name)
Definition: Rtypes.h:363
#define N
double log(double)
char * Form(const char *fmt,...)
RooArgSet * getObservables(const RooArgSet &set, Bool_t valueOnly=kTRUE) const
Definition: RooAbsArg.h:194
void setAttribute(const Text_t *name, Bool_t value=kTRUE)
Set (default) or clear a named boolean attribute of this object.
Definition: RooAbsArg.cxx:241
void wireAllCaches()
Definition: RooAbsArg.cxx:2364
void setValueDirty() const
Definition: RooAbsArg.h:441
Int_t getSize() const
RooAbsArg * first() const
virtual void recalculateCache(const RooArgSet *, Int_t, Int_t, Int_t, Bool_t)
RooAbsData is the common abstract base class for binned and unbinned datasets.
Definition: RooAbsData.h:37
virtual const RooArgSet * get() const
Definition: RooAbsData.h:79
RooAbsDataStore * store()
Definition: RooAbsData.h:55
virtual Double_t sumEntries() const =0
virtual Bool_t valid() const
Definition: RooAbsData.h:85
virtual Double_t weight() const =0
virtual Double_t weightSquared() const =0
virtual Int_t numEntries() const
Definition: RooAbsData.cxx:285
RooAbsOptTestStatistic is the abstract base class for test statistics objects that evaluate a functio...
RooAbsPdf is the abstract interface for all probability density functions The class provides hybrid a...
Definition: RooAbsPdf.h:41
virtual Double_t extendedTerm(Double_t observedEvents, const RooArgSet *nset=0) const
Returned the extended likelihood term (Nexpect - Nobserved*log(NExpected) of this PDF for the given n...
Definition: RooAbsPdf.cxx:647
virtual Double_t getLogVal(const RooArgSet *set=0) const
Return the log of the current value with given normalization An error message is printed if the argum...
Definition: RooAbsPdf.cxx:607
virtual Double_t expectedEvents(const RooArgSet *nset) const
Return expected number of events from this p.d.f for use in extended likelihood calculations.
Definition: RooAbsPdf.cxx:2855
virtual Double_t getMax(const char *name=0) const
virtual Double_t getMin(const char *name=0) const
Double_t getVal(const RooArgSet *set=0) const
Evaluate object. Returns either cached value or triggers a recalculation.
Definition: RooAbsReal.h:64
void logEvalError(const char *message, const char *serverValueString=0) const
Log evaluation error message.
Int_t _nGof
Number of designated set to calculated extended term.
Int_t _nCPU
GOF MP Split mode specified by component (when Auto is active)
pRooAbsTestStatistic * _gofArray
GOFOpMode _gofOpMode
Is object initialized
Double_t _evalCarry
avoids loss of precision
Double_t _offsetCarry
Offset.
RooArgSet is a container object that can hold multiple RooAbsArg objects.
Definition: RooArgSet.h:28
RooCmdArg is a named container for two doubles, two integers two object points and three string point...
Definition: RooCmdArg.h:27
Class RooCmdConfig is a configurable parser for RooCmdArg named arguments.
Definition: RooCmdConfig.h:27
Class RooNLLVar implements a a -log(likelihood) calculation from a dataset and a PDF.
Definition: RooNLLVar.h:26
void applyWeightSquared(Bool_t flag)
Definition: RooNLLVar.cxx:218
RooNLLVar()
Definition: RooNLLVar.h:30
RooRealSumPdf * _binnedPdf
Definition: RooNLLVar.h:75
Bool_t _extended
Definition: RooNLLVar.h:67
virtual Double_t evaluatePartition(Int_t firstEvent, Int_t lastEvent, Int_t stepSize) const
Calculate and return likelihood on subset of data from firstEvent to lastEvent processed with a step ...
Definition: RooNLLVar.cxx:244
static RooArgSet _emptySet
Definition: RooNLLVar.h:65
virtual ~RooNLLVar()
Destructor.
Definition: RooNLLVar.cxx:209
Bool_t _first
Definition: RooNLLVar.h:70
std::vector< Double_t > _binw
Definition: RooNLLVar.h:74
Double_t _offsetCarrySaveW2
Definition: RooNLLVar.h:72
Double_t _offsetSaveW2
Definition: RooNLLVar.h:71
Bool_t _weightSq
Definition: RooNLLVar.h:69
Class RooRealSumPdf implements a PDF constructed from a sum of functions:
Definition: RooRealSumPdf.h:24
virtual std::list< Double_t > * binBoundaries(RooAbsRealLValue &, Double_t, Double_t) const
RooRealVar represents a fundamental (non-derived) real valued object.
Definition: RooRealVar.h:36
virtual const char * GetName() const
Returns name of object.
Definition: TNamed.h:47
Double_t y[n]
Definition: legend1.C:17
VecExpr< UnaryOp< Fabs< T >, VecExpr< A, T, D >, T >, T, D > fabs(const VecExpr< A, T, D > &rhs)
@ BulkPartition
Definition: RooGlobalFunc.h:60
@ Minimization
Definition: RooGlobalFunc.h:57
static constexpr double pc
Double_t LnGamma(Double_t z)
Computation of ln[gamma(z)] for all z.
Definition: TMath.cxx:486
void swap(nlohmann::json &j1, nlohmann::json &j2) noexcept(is_nothrow_move_constructible< nlohmann::json >::value and is_nothrow_move_assignable< nlohmann::json >::value)
exchanges the values of two JSON objects
Definition: json.hpp:12929