Logo ROOT  
Reference Guide
 
Loading...
Searching...
No Matches
RooNLLVarNew.cxx
Go to the documentation of this file.
1/*
2 * Project: RooFit
3 * Authors:
4 * Jonas Rembser, CERN 2021
5 * Emmanouil Michalainas, CERN 2021
6 *
7 * Copyright (c) 2021, CERN
8 *
9 * Redistribution and use in source and binary forms,
10 * with or without modification, are permitted according to the terms
11 * listed in LICENSE (http://roofit.sourceforge.net/license.txt)
12 */
13
14/**
15\file RooNLLVarNew.cxx
16\class RooNLLVarNew
17\ingroup Roofitcore
18
19This is a simple class designed to produce the nll values needed by the fitter.
20This class calls functions from `RooBatchCompute` library to provide faster
21computation times.
22**/
23
24#include "RooNLLVarNew.h"
25
26#include <RooHistPdf.h>
27#include <RooBatchCompute.h>
28#include <RooDataHist.h>
29#include <RooNaNPacker.h>
30#include <RooConstVar.h>
31#include <RooRealVar.h>
32#include <RooSetProxy.h>
34
35#include "RooFitImplHelpers.h"
36
37#include <ROOT/StringUtils.hxx>
38
39#include <TClass.h>
40#include <TMath.h>
41#include <Math/Util.h>
42
43#include <numeric>
44#include <stdexcept>
45#include <vector>
46
47// Declare constexpr static members to make them available if odr-used in C++14.
48constexpr const char *RooNLLVarNew::weightVarName;
49constexpr const char *RooNLLVarNew::weightVarNameSumW2;
50
51namespace {
52
53// Use RooConstVar for dummies such that they don't get included in getParameters().
54RooConstVar *dummyVar(const char *name)
55{
56 return new RooConstVar(name, name, 1.0);
57}
58
59// Helper class to represent a template pdf based on the fit dataset.
60class RooOffsetPdf : public RooAbsPdf {
61public:
62 RooOffsetPdf(const char *name, const char *title, RooArgSet const &observables, RooAbsReal &weightVar)
63 : RooAbsPdf(name, title),
64 _observables("!observables", "List of observables", this),
65 _weightVar{"!weightVar", "weightVar", this, weightVar, true, false}
66 {
67 for (RooAbsArg *obs : observables) {
68 _observables.add(*obs);
69 }
70 }
71 RooOffsetPdf(const RooOffsetPdf &other, const char *name = nullptr)
72 : RooAbsPdf(other, name),
73 _observables("!servers", this, other._observables),
74 _weightVar{"!weightVar", this, other._weightVar}
75 {
76 }
77 TObject *clone(const char *newname) const override { return new RooOffsetPdf(*this, newname); }
78
79 void computeBatch(double *output, size_t nEvents, RooFit::Detail::DataMap const &dataMap) const override
80 {
81 std::span<const double> weights = dataMap.at(_weightVar);
82
83 // Create the template histogram from the data. This operation is very
84 // expensive, but since the offset only depends on the observables it
85 // only has to be done once.
86
87 RooDataHist dataHist{"data", "data", _observables};
88 // Loop over events to fill the histogram
89 for (std::size_t i = 0; i < nEvents; ++i) {
90 for (auto *var : static_range_cast<RooRealVar *>(_observables)) {
91 var->setVal(dataMap.at(var)[i]);
92 }
93 dataHist.add(_observables, weights[weights.size() == 1 ? 0 : i]);
94 }
95
96 // Lookup bin weights via RooHistPdf
97 RooHistPdf pdf{"offsetPdf", "offsetPdf", _observables, dataHist};
98 for (std::size_t i = 0; i < nEvents; ++i) {
99 for (auto *var : static_range_cast<RooRealVar *>(_observables)) {
100 var->setVal(dataMap.at(var)[i]);
101 }
102 output[i] = pdf.getVal(_observables);
103 }
104 }
105
106private:
107 double evaluate() const override { return 0.0; } // should never be called
108
109 RooSetProxy _observables;
111};
112
113} // namespace
114
115/** Construct a RooNLLVarNew
116\param name the name
117\param title the title
118\param pdf The pdf for which the nll is computed for
119\param observables The observabes of the pdf
120\param isExtended Set to true if this is an extended fit
121**/
122RooNLLVarNew::RooNLLVarNew(const char *name, const char *title, RooAbsPdf &pdf, RooArgSet const &observables,
123 bool isExtended, RooFit::OffsetMode offsetMode)
124 : RooAbsReal(name, title),
125 _pdf{"pdf", "pdf", this, pdf},
126 _weightVar{"weightVar", "weightVar", this, *dummyVar(weightVarName), true, false, true},
127 _weightSquaredVar{weightVarNameSumW2, weightVarNameSumW2, this, *dummyVar("weightSquardVar"), true, false, true},
128 _binnedL{pdf.getAttribute("BinnedLikelihoodActive")}
129{
130 RooArgSet obs;
131 pdf.getObservables(&observables, obs);
132
133 // In the "BinnedLikelihoodActiveYields" mode, the pdf values can directly
134 // be interpreted as yields and don't need to be multiplied by the bin
135 // widths. That's why we don't need to even fill them in this case.
136 if (_binnedL && !pdf.getAttribute("BinnedLikelihoodActiveYields")) {
138 }
139
140 if (isExtended && !_binnedL) {
141 std::unique_ptr<RooAbsReal> expectedEvents = pdf.createExpectedEventsFunc(&obs);
142 if (expectedEvents) {
144 std::make_unique<RooTemplateProxy<RooAbsReal>>("expectedEvents", "expectedEvents", this, *expectedEvents);
145 addOwnedComponents(std::move(expectedEvents));
146 }
147 }
148
152
153 // In the binned likelihood code path, we directly use that data weights for
154 // the offsetting.
155 if (!_binnedL && _doBinOffset) {
156 auto offsetPdf = std::make_unique<RooOffsetPdf>("_offset_pdf", "_offset_pdf", obs, *_weightVar);
157 _offsetPdf = std::make_unique<RooTemplateProxy<RooAbsPdf>>("offsetPdf", "offsetPdf", this, *offsetPdf);
158 addOwnedComponents(std::move(offsetPdf));
159 }
160}
161
163 : RooAbsReal(other, name),
164 _pdf{"pdf", this, other._pdf},
165 _weightVar{"weightVar", this, other._weightVar},
166 _weightSquaredVar{"weightSquaredVar", this, other._weightSquaredVar},
167 _weightSquared{other._weightSquared},
168 _binnedL{other._binnedL},
169 _doOffset{other._doOffset},
170 _simCount{other._simCount},
171 _prefix{other._prefix},
172 _binw{other._binw}
173{
174 if (other._expectedEvents) {
175 _expectedEvents = std::make_unique<RooTemplateProxy<RooAbsReal>>("expectedEvents", this, *other._expectedEvents);
176 }
177}
178
180{
181 // Check if the bin widths were already filled
182 if (!_binw.empty()) {
183 return;
184 }
185
186 if (observables.size() != 1) {
187 throw std::runtime_error("BinnedPdf optimization only works with a 1D pdf.");
188 } else {
189 auto *var = static_cast<RooRealVar *>(observables.first());
190 std::list<double> *boundaries = pdf.binBoundaries(*var, var->getMin(), var->getMax());
191 std::list<double>::iterator biter = boundaries->begin();
192 _binw.resize(boundaries->size() - 1);
193 double lastBound = (*biter);
194 ++biter;
195 int ibin = 0;
196 while (biter != boundaries->end()) {
197 _binw[ibin] = (*biter) - lastBound;
198 lastBound = (*biter);
199 ibin++;
200 ++biter;
201 }
202 }
203}
204
205double RooNLLVarNew::computeBatchBinnedL(std::span<const double> preds, std::span<const double> weights) const
206{
208 ROOT::Math::KahanSum<double> sumWeightKahanSum{0.0};
209
210 const bool predsAreYields = _binw.empty();
211
212 for (std::size_t i = 0; i < preds.size(); ++i) {
213
214 double eventWeight = weights[i];
215
216 // Calculate log(Poisson(N|mu) for this bin
217 double N = eventWeight;
218 double mu = preds[i];
219 if (!predsAreYields) {
220 mu *= _binw[i];
221 }
222
223 if (mu <= 0 && N > 0) {
224
225 // Catch error condition: data present where zero events are predicted
226 logEvalError(Form("Observed %f events in bin %lu with zero event yield", N, (unsigned long)i));
227
228 } else if (std::abs(mu) < 1e-10 && std::abs(N) < 1e-10) {
229
230 // Special handling of this case since log(Poisson(0,0)=0 but can't be calculated with usual log-formula
231 // since log(mu)=0. No update of result is required since term=0.
232
233 } else {
234
235 double term = 0.0;
236 if (_doBinOffset) {
237 term -= -mu + N + N * (std::log(mu) - std::log(N));
238 } else {
239 term -= -mu + N * std::log(mu) - TMath::LnGamma(N + 1);
240 }
241 result += term;
242 sumWeightKahanSum += eventWeight;
243 }
244 }
245
246 return finalizeResult(result, sumWeightKahanSum.Sum());
247}
248
249/** Compute multiple negative logs of probabilities.
250
251\param output An array of doubles where the computation results will be stored
252\param nOut not used
253\note nEvents is the number of events to be processed (the dataMap size)
254\param dataMap A map containing spans with the input data for the computation
255**/
256void RooNLLVarNew::computeBatch(double *output, size_t /*nOut*/, RooFit::Detail::DataMap const &dataMap) const
257{
258 std::span<const double> weights = dataMap.at(_weightVar);
259 std::span<const double> weightsSumW2 = dataMap.at(_weightSquaredVar);
260
261 if (_binnedL) {
262 output[0] = computeBatchBinnedL(dataMap.at(&*_pdf), _weightSquared ? weightsSumW2 : weights);
263 return;
264 }
265
266 auto config = dataMap.config(this);
267
268 auto probas = dataMap.at(_pdf);
269
270 _sumWeight = weights.size() == 1 ? weights[0] * probas.size()
271 : RooBatchCompute::reduceSum(config, weights.data(), weights.size());
272 if (_expectedEvents && _weightSquared && _sumWeight2 == 0.0) {
273 _sumWeight2 = weights.size() == 1 ? weightsSumW2[0] * probas.size()
274 : RooBatchCompute::reduceSum(config, weightsSumW2.data(), weightsSumW2.size());
275 }
276
277 auto nllOut = RooBatchCompute::reduceNLL(config, probas, _weightSquared ? weightsSumW2 : weights,
278 _doBinOffset ? dataMap.at(*_offsetPdf) : std::span<const double>{});
279
280 if (nllOut.nLargeValues > 0) {
281 oocoutW(&*_pdf, Eval) << "RooAbsPdf::getLogVal(" << _pdf->GetName()
282 << ") WARNING: top-level pdf has unexpectedly large values" << std::endl;
283 }
284 for (std::size_t i = 0; i < nllOut.nNonPositiveValues; ++i) {
285 _pdf->logEvalError("getLogVal() top-level p.d.f not greater than zero");
286 }
287 for (std::size_t i = 0; i < nllOut.nNaNValues; ++i) {
288 _pdf->logEvalError("getLogVal() top-level p.d.f evaluates to NaN");
289 }
290
291 if (_expectedEvents) {
292 std::span<const double> expected = dataMap.at(*_expectedEvents);
293 nllOut.nllSum += _pdf->extendedTerm(_sumWeight, expected[0], _weightSquared ? _sumWeight2 : 0.0, _doBinOffset);
294 }
295
296 output[0] = finalizeResult({nllOut.nllSum, nllOut.nllSumCarry}, _sumWeight);
297}
298
299void RooNLLVarNew::getParametersHook(const RooArgSet * /*nset*/, RooArgSet *params, bool /*stripDisconnected*/) const
300{
301 // strip away the special variables
302 params->remove(RooArgList{*_weightVar, *_weightSquaredVar}, true, true);
303}
304
305////////////////////////////////////////////////////////////////////////////////
306/// Sets the prefix for the special variables of this NLL, like weights or bin
307/// volumes.
308/// \param[in] prefix The prefix to add to the observables and weight names.
309void RooNLLVarNew::setPrefix(std::string const &prefix)
310{
311 _prefix = prefix;
312
314}
315
317{
320 if (_offsetPdf) {
321 (*_offsetPdf)->SetName((_prefix + "_offset_pdf").c_str());
322 }
323}
324
325////////////////////////////////////////////////////////////////////////////////
326/// Toggles the weight square correction.
328{
329 _weightSquared = flag;
330}
331
333{
334 _doOffset = flag;
336}
337
339{
340 // If part of simultaneous PDF normalize probability over
341 // number of simultaneous PDFs: -sum(log(p/n)) = -sum(log(p)) + N*log(n)
342 // If we do bin-by bin offsetting, we don't do this because it cancels out
343 if (!_doBinOffset && _simCount > 1) {
344 result += weightSum * std::log(static_cast<double>(_simCount));
345 }
346
347 // Check if value offset flag is set.
348 if (_doOffset) {
349
350 // If no offset is stored enable this feature now
351 if (_offset.Sum() == 0 && _offset.Carry() == 0 && (result.Sum() != 0 || result.Carry() != 0)) {
352 _offset = result;
353 }
354
355 // Subtract offset
356 if (!RooAbsReal::hideOffset()) {
357 result -= _offset;
358 }
359 }
360 return result.Sum();
361}
362
364{
365 std::string weightSumName = RooFit::Detail::makeValidVarName(GetName()) + "WeightSum";
366 std::string resName = RooFit::Detail::makeValidVarName(GetName()) + "Result";
367 ctx.addResult(this, resName);
368 ctx.addToGlobalScope("double " + weightSumName + " = 0.0;\n");
369 ctx.addToGlobalScope("double " + resName + " = 0.0;\n");
370
371 const bool needWeightSum = _expectedEvents || _simCount > 1;
372
373 if (needWeightSum) {
374 auto scope = ctx.beginLoop(this);
375 ctx.addToCodeBody(weightSumName + " += " + ctx.getResult(*_weightVar) + ";\n");
376 }
377 if (_simCount > 1) {
378 std::string simCountStr = std::to_string(static_cast<double>(_simCount));
379 ctx.addToCodeBody(resName + " += " + weightSumName + " * std::log(" + simCountStr + ");\n");
380 }
381
382 // Begin loop scope for the observables and weight variable. If the weight
383 // is a scalar, the context will ignore it for the loop scope. The closing
384 // brackets of the loop is written at the end of the scopes lifetime.
385 {
386 auto scope = ctx.beginLoop(this);
387 std::string const &weight = ctx.getResult(_weightVar.arg());
388 std::string const &pdfName = ctx.getResult(_pdf.arg());
389
390 if (_binnedL) {
391 // Since we only support uniform binning, bin width is the same for all.
392 if (!_pdf->getAttribute("BinnedLikelihoodActiveYields")) {
393 std::stringstream errorMsg;
394 errorMsg << "RooNLLVarNew::translate(): binned likelihood optimization is only supported when raw pdf "
395 "values can be interpreted as yields."
396 << " This is not the case for HistFactory models written with ROOT versions before 6.26.00";
397 coutE(InputArguments) << errorMsg.str() << std::endl;
398 throw std::runtime_error(errorMsg.str());
399 }
400 std::string muName = pdfName;
401 ctx.addToCodeBody(this, resName + " += -1 * (-" + muName + " + " + weight + " * std::log(" + muName +
402 ") - TMath::LnGamma(" + weight + "+ 1));\n");
403 } else {
404 ctx.addToCodeBody(this, resName + " -= " + weight + " * std::log(" + pdfName + ");\n");
405 }
406 }
407 if (_expectedEvents) {
408 std::string expected = ctx.getResult(**_expectedEvents);
409 ctx.addToCodeBody(resName + " += " + expected + " - " + weightSumName + " * std::log(" + expected + ");\n");
410 }
411}
#define e(i)
Definition RSha256.hxx:103
ROOT::RRangeCast< T, false, Range_t > static_range_cast(Range_t &&coll)
TObject * clone(const char *newname) const override
#define oocoutW(o, a)
#define coutE(a)
#define N
Option_t Option_t TPoint TPoint const char GetTextMagnitude GetFillStyle GetLineColor GetLineWidth GetMarkerStyle GetTextAlign GetTextColor GetTextSize void char Point_t Rectangle_t WindowAttributes_t Float_t Float_t Float_t Int_t Int_t UInt_t UInt_t Rectangle_t result
char name[80]
Definition TGX11.cxx:110
char * Form(const char *fmt,...)
Formats a string in a circular formatting buffer.
Definition TString.cxx:2467
The Kahan summation is a compensated summation algorithm, which significantly reduces numerical error...
Definition Util.h:122
T Sum() const
Definition Util.h:240
T Carry() const
Definition Util.h:250
Common abstract base class for objects that represent a value and a "shape" in RooFit.
Definition RooAbsArg.h:79
RooFit::OwningPtr< RooArgSet > getObservables(const RooArgSet &set, bool valueOnly=true) const
Given a set of possible observables, return the observables that this PDF depends on.
void SetName(const char *name) override
Set the name of the TNamed.
bool addOwnedComponents(const RooAbsCollection &comps)
Take ownership of the contents of 'comps'.
bool getAttribute(const Text_t *name) const
Check if a named attribute is set. By default, all attributes are unset.
virtual bool remove(const RooAbsArg &var, bool silent=false, bool matchByNameOnly=false)
Remove the specified argument from our list.
Storage_t::size_type size() const
RooAbsArg * first() const
Abstract interface for all probability density functions.
Definition RooAbsPdf.h:40
virtual std::unique_ptr< RooAbsReal > createExpectedEventsFunc(const RooArgSet *nset) const
Returns an object that represents the expected number of events for a given normalization set,...
double extendedTerm(double sumEntries, double expected, double sumEntriesW2=0.0, bool doOffset=false) const
Abstract base class for objects that represent a real value and implements functionality common to al...
Definition RooAbsReal.h:59
virtual std::list< double > * binBoundaries(RooAbsRealLValue &obs, double xlo, double xhi) const
Retrieve bin boundaries if this distribution is binned in obs.
double getVal(const RooArgSet *normalisationSet=nullptr) const
Evaluate object.
Definition RooAbsReal.h:103
static bool hideOffset()
void logEvalError(const char *message, const char *serverValueString=nullptr) const
Log evaluation error message.
RooArgList is a container object that can hold multiple RooAbsArg objects.
Definition RooArgList.h:22
RooArgSet is a container object that can hold multiple RooAbsArg objects.
Definition RooArgSet.h:55
RooConstVar represent a constant real-valued object.
Definition RooConstVar.h:23
The RooDataHist is a container class to hold N-dimensional binned data.
Definition RooDataHist.h:39
void add(const RooArgSet &row, double wgt=1.0) override
Add wgt to the bin content enclosed by the coordinates passed in row.
Definition RooDataHist.h:66
A class to maintain the context for squashing of RooFit models into code.
void addResult(RooAbsArg const *key, std::string const &value)
A function to save an expression that includes/depends on the result of the input node.
void addToCodeBody(RooAbsArg const *klass, std::string const &in)
Adds the input string to the squashed code body.
std::string const & getResult(RooAbsArg const &arg)
Gets the result for the given node using the node name.
void addToGlobalScope(std::string const &str)
Adds the given string to the string block that will be emitted at the top of the squashed function.
std::unique_ptr< LoopScope > beginLoop(RooAbsArg const *in)
Create a RAII scope for iterating over vector observables.
RooBatchCompute::Config config(RooAbsArg const *arg) const
Definition DataMap.cxx:40
std::span< const double > at(RooAbsArg const *arg, RooAbsArg const *caller=nullptr)
Definition DataMap.cxx:22
RooHistPdf implements a propability density function sampled from a multidimensional histogram.
Definition RooHistPdf.h:30
This is a simple class designed to produce the nll values needed by the fitter.
ROOT::Math::KahanSum< double > _offset
! Offset as KahanSum to avoid loss of precision
void enableOffsetting(bool) override
std::unique_ptr< RooTemplateProxy< RooAbsReal > > _expectedEvents
RooNLLVarNew(const char *name, const char *title, RooAbsPdf &pdf, RooArgSet const &observables, bool isExtended, RooFit::OffsetMode offsetMode)
Construct a RooNLLVarNew.
bool _weightSquared
void translate(RooFit::Detail::CodeSquashContext &ctx) const override
This function defines a translation for each RooAbsReal based object that can be used to express the ...
void applyWeightSquared(bool flag) override
Toggles the weight square correction.
void getParametersHook(const RooArgSet *nset, RooArgSet *list, bool stripDisconnected) const override
void enableBinOffsetting(bool on=true)
std::unique_ptr< RooTemplateProxy< RooAbsPdf > > _offsetPdf
void computeBatch(double *output, size_t nOut, RooFit::Detail::DataMap const &) const override
Compute multiple negative logs of probabilities.
void setPrefix(std::string const &prefix)
Sets the prefix for the special variables of this NLL, like weights or bin volumes.
double _sumWeight
void fillBinWidthsFromPdfBoundaries(RooAbsReal const &pdf, RooArgSet const &observables)
void resetWeightVarNames()
std::string _prefix
std::vector< double > _binw
RooTemplateProxy< RooAbsPdf > _pdf
double computeBatchBinnedL(std::span< const double > preds, std::span< const double > weights) const
double _sumWeight2
RooTemplateProxy< RooAbsReal > _weightVar
static constexpr const char * weightVarName
double finalizeResult(ROOT::Math::KahanSum< double > result, double weightSum) const
static constexpr const char * weightVarNameSumW2
RooTemplateProxy< RooAbsReal > _weightSquaredVar
RooRealVar represents a variable that can be changed from the outside.
Definition RooRealVar.h:37
const T & arg() const
Return reference to object held in proxy.
const char * GetName() const override
Returns name of object.
Definition TNamed.h:47
Mother of all ROOT objects.
Definition TObject.h:41
double reduceSum(Config cfg, InputArr input, size_t n)
ReduceNLLOutput reduceNLL(Config cfg, std::span< const double > probas, std::span< const double > weights, std::span< const double > offsetProbas)
std::string makeValidVarName(std::string const &in)
OffsetMode
For setting the offset mode with the Offset() command argument to RooAbsPdf::fitTo()
void evaluate(typename Architecture_t::Tensor_t &A, EActivationFunction f)
Apply the given activation function to each value in the given tensor A.
Definition Functions.h:98
Double_t LnGamma(Double_t z)
Computation of ln[gamma(z)] for all z.
Definition TMath.cxx:509
static void output()