Logo ROOT  
Reference Guide
 
Loading...
Searching...
No Matches
RooNLLVarNew.cxx
Go to the documentation of this file.
1/// \cond ROOFIT_INTERNAL
2
3/*
4 * Project: RooFit
5 * Authors:
6 * Jonas Rembser, CERN 2021
7 * Emmanouil Michalainas, CERN 2021
8 *
9 * Copyright (c) 2021, CERN
10 *
11 * Redistribution and use in source and binary forms,
12 * with or without modification, are permitted according to the terms
13 * listed in LICENSE (http://roofit.sourceforge.net/license.txt)
14 */
15
16/**
17\file RooNLLVarNew.cxx
18\class RooNLLVarNew
19\ingroup Roofitcore
20
21This is a simple class designed to produce the nll values needed by the fitter.
22This class calls functions from `RooBatchCompute` library to provide faster
23computation times.
24**/
25
26#include "RooNLLVarNew.h"
27
28#include <RooHistPdf.h>
29#include <RooBatchCompute.h>
30#include <RooDataHist.h>
31#include <RooNaNPacker.h>
32#include <RooConstVar.h>
33#include <RooRealVar.h>
34#include <RooSetProxy.h>
37
38#include "RooFitImplHelpers.h"
39
40#include <ROOT/StringUtils.hxx>
41
42#include <TClass.h>
43#include <TMath.h>
44#include <Math/Util.h>
45
46#include <numeric>
47#include <stdexcept>
48#include <vector>
49
50// Declare constexpr static members to make them available if odr-used in C++14.
51constexpr const char *RooNLLVarNew::weightVarName;
52constexpr const char *RooNLLVarNew::weightVarNameSumW2;
53
54namespace {
55
56// Use RooConstVar for dummies such that they don't get included in getParameters().
57RooConstVar *dummyVar(const char *name)
58{
59 return new RooConstVar(name, name, 1.0);
60}
61
62// Helper class to represent a template pdf based on the fit dataset.
63class RooOffsetPdf : public RooAbsPdf {
64public:
65 RooOffsetPdf(const char *name, const char *title, RooArgSet const &observables, RooAbsReal &weightVar)
66 : RooAbsPdf(name, title),
67 _observables("!observables", "List of observables", this),
68 _weightVar{"!weightVar", "weightVar", this, weightVar, true, false}
69 {
70 for (RooAbsArg *obs : observables) {
71 _observables.add(*obs);
72 }
73 }
74 RooOffsetPdf(const RooOffsetPdf &other, const char *name = nullptr)
75 : RooAbsPdf(other, name),
76 _observables("!servers", this, other._observables),
77 _weightVar{"!weightVar", this, other._weightVar}
78 {
79 }
80 TObject *clone(const char *newname) const override { return new RooOffsetPdf(*this, newname); }
81
82 void doEval(RooFit::EvalContext &ctx) const override
83 {
84 std::span<double> output = ctx.output();
85 std::size_t nEvents = output.size();
86
87 std::span<const double> weights = ctx.at(_weightVar);
88
89 // Create the template histogram from the data. This operation is very
90 // expensive, but since the offset only depends on the observables it
91 // only has to be done once.
92
93 RooDataHist dataHist{"data", "data", _observables};
94 // Loop over events to fill the histogram
95 for (std::size_t i = 0; i < nEvents; ++i) {
96 for (auto *var : static_range_cast<RooRealVar *>(_observables)) {
97 var->setVal(ctx.at(var)[i]);
98 }
99 dataHist.add(_observables, weights[weights.size() == 1 ? 0 : i]);
100 }
101
102 // Lookup bin weights via RooHistPdf
103 RooHistPdf pdf{"offsetPdf", "offsetPdf", _observables, dataHist};
104 for (std::size_t i = 0; i < nEvents; ++i) {
105 for (auto *var : static_range_cast<RooRealVar *>(_observables)) {
106 var->setVal(ctx.at(var)[i]);
107 }
108 output[i] = pdf.getVal(_observables);
109 }
110 }
111
112private:
113 double evaluate() const override { return 0.0; } // should never be called
114
115 RooSetProxy _observables;
117};
118
119} // namespace
120
121/** Construct a RooNLLVarNew
122\param name the name
123\param title the title
124\param pdf The pdf for which the nll is computed for
125\param observables The observabes of the pdf
126\param isExtended Set to true if this is an extended fit
127**/
128RooNLLVarNew::RooNLLVarNew(const char *name, const char *title, RooAbsPdf &pdf, RooArgSet const &observables,
129 bool isExtended, RooFit::OffsetMode offsetMode)
130 : RooAbsReal(name, title),
131 _pdf{"pdf", "pdf", this, pdf},
132 _weightVar{"weightVar", "weightVar", this, *dummyVar(weightVarName), true, false, true},
133 _weightSquaredVar{weightVarNameSumW2, weightVarNameSumW2, this, *dummyVar("weightSquardVar"), true, false, true},
134 _binnedL{pdf.getAttribute("BinnedLikelihoodActive")}
135{
136 RooArgSet obs;
137 pdf.getObservables(&observables, obs);
138
139 // In the "BinnedLikelihoodActiveYields" mode, the pdf values can directly
140 // be interpreted as yields and don't need to be multiplied by the bin
141 // widths. That's why we don't need to even fill them in this case.
142 if (_binnedL && !pdf.getAttribute("BinnedLikelihoodActiveYields")) {
143 fillBinWidthsFromPdfBoundaries(pdf, obs);
144 }
145
146 if (isExtended && !_binnedL) {
147 std::unique_ptr<RooAbsReal> expectedEvents = pdf.createExpectedEventsFunc(&obs);
148 if (expectedEvents) {
149 _expectedEvents =
150 std::make_unique<RooTemplateProxy<RooAbsReal>>("expectedEvents", "expectedEvents", this, *expectedEvents);
151 addOwnedComponents(std::move(expectedEvents));
152 }
153 }
154
155 resetWeightVarNames();
158
159 // In the binned likelihood code path, we directly use that data weights for
160 // the offsetting.
161 if (!_binnedL && _doBinOffset) {
162 auto offsetPdf = std::make_unique<RooOffsetPdf>("_offset_pdf", "_offset_pdf", obs, *_weightVar);
163 _offsetPdf = std::make_unique<RooTemplateProxy<RooAbsPdf>>("offsetPdf", "offsetPdf", this, *offsetPdf);
164 addOwnedComponents(std::move(offsetPdf));
165 }
166}
167
168RooNLLVarNew::RooNLLVarNew(const RooNLLVarNew &other, const char *name)
169 : RooAbsReal(other, name),
170 _pdf{"pdf", this, other._pdf},
171 _weightVar{"weightVar", this, other._weightVar},
172 _weightSquaredVar{"weightSquaredVar", this, other._weightSquaredVar},
173 _weightSquared{other._weightSquared},
174 _binnedL{other._binnedL},
175 _doOffset{other._doOffset},
176 _simCount{other._simCount},
177 _prefix{other._prefix},
178 _binw{other._binw}
179{
180 if (other._expectedEvents) {
181 _expectedEvents = std::make_unique<RooTemplateProxy<RooAbsReal>>("expectedEvents", this, *other._expectedEvents);
182 }
183}
184
185void RooNLLVarNew::fillBinWidthsFromPdfBoundaries(RooAbsReal const &pdf, RooArgSet const &observables)
186{
187 // Check if the bin widths were already filled
188 if (!_binw.empty()) {
189 return;
190 }
191
192 if (observables.size() != 1) {
193 throw std::runtime_error("BinnedPdf optimization only works with a 1D pdf.");
194 } else {
195 auto *var = static_cast<RooRealVar *>(observables.first());
196 std::list<double> *boundaries = pdf.binBoundaries(*var, var->getMin(), var->getMax());
197 std::list<double>::iterator biter = boundaries->begin();
198 _binw.resize(boundaries->size() - 1);
199 double lastBound = (*biter);
200 ++biter;
201 int ibin = 0;
202 while (biter != boundaries->end()) {
203 _binw[ibin] = (*biter) - lastBound;
204 lastBound = (*biter);
205 ibin++;
206 ++biter;
207 }
208 }
209}
210
211void RooNLLVarNew::doEvalBinnedL(RooFit::EvalContext &ctx, std::span<const double> preds,
212 std::span<const double> weights) const
213{
215 ROOT::Math::KahanSum<double> sumWeightKahanSum{0.0};
216
217 const bool predsAreYields = _binw.empty();
218
219 for (std::size_t i = 0; i < preds.size(); ++i) {
220
221 // Calculate log(Poisson(N|mu) for this bin
222 double N = weights[i];
223 double mu = preds[i];
224 if (!predsAreYields) {
225 mu *= _binw[i];
226 }
227
228 if (mu <= 0 && N > 0) {
229 // Catch error condition: data present where zero events are predicted
230 logEvalError(Form("Observed %f events in bin %lu with zero event yield", N, (unsigned long)i));
231 } else {
233 sumWeightKahanSum += N;
234 }
235 }
236
237 finalizeResult(ctx, result, sumWeightKahanSum.Sum());
238}
239
240void RooNLLVarNew::doEval(RooFit::EvalContext &ctx) const
241{
242 std::span<const double> weights = ctx.at(_weightVar);
243 std::span<const double> weightsSumW2 = ctx.at(_weightSquaredVar);
244
245 if (_binnedL) {
246 return doEvalBinnedL(ctx, ctx.at(&*_pdf), _weightSquared ? weightsSumW2 : weights);
247 }
248
249 auto config = ctx.config(this);
250
251 auto probas = ctx.at(_pdf);
252
253 _sumWeight = weights.size() == 1 ? weights[0] * probas.size()
254 : RooBatchCompute::reduceSum(config, weights.data(), weights.size());
255 if (_expectedEvents && _weightSquared && _sumWeight2 == 0.0) {
256 _sumWeight2 = weights.size() == 1 ? weightsSumW2[0] * probas.size()
257 : RooBatchCompute::reduceSum(config, weightsSumW2.data(), weightsSumW2.size());
258 }
259
260 auto nllOut = RooBatchCompute::reduceNLL(config, probas, _weightSquared ? weightsSumW2 : weights,
261 _doBinOffset ? ctx.at(*_offsetPdf) : std::span<const double>{});
262
263 if (nllOut.nLargeValues > 0) {
264 oocoutW(&*_pdf, Eval) << "RooAbsPdf::getLogVal(" << _pdf->GetName()
265 << ") WARNING: top-level pdf has unexpectedly large values" << std::endl;
266 }
267 for (std::size_t i = 0; i < nllOut.nNonPositiveValues; ++i) {
268 _pdf->logEvalError("getLogVal() top-level p.d.f not greater than zero");
269 }
270 for (std::size_t i = 0; i < nllOut.nNaNValues; ++i) {
271 _pdf->logEvalError("getLogVal() top-level p.d.f evaluates to NaN");
272 }
273
274 if (_expectedEvents) {
275 std::span<const double> expected = ctx.at(*_expectedEvents);
276 nllOut.nllSum += _pdf->extendedTerm(_sumWeight, expected[0], _weightSquared ? _sumWeight2 : 0.0, _doBinOffset);
277 }
278
279 finalizeResult(ctx, {nllOut.nllSum, nllOut.nllSumCarry}, _sumWeight);
280}
281
282////////////////////////////////////////////////////////////////////////////////
283/// Sets the prefix for the special variables of this NLL, like weights or bin
284/// volumes.
285/// \param[in] prefix The prefix to add to the observables and weight names.
286void RooNLLVarNew::setPrefix(std::string const &prefix)
287{
288 _prefix = prefix;
289
290 resetWeightVarNames();
291}
292
293void RooNLLVarNew::resetWeightVarNames()
294{
295 _weightVar->SetName((_prefix + weightVarName).c_str());
296 _weightSquaredVar->SetName((_prefix + weightVarNameSumW2).c_str());
297 if (_offsetPdf) {
298 (*_offsetPdf)->SetName((_prefix + "_offset_pdf").c_str());
299 }
300}
301
302////////////////////////////////////////////////////////////////////////////////
303/// Toggles the weight square correction.
304void RooNLLVarNew::applyWeightSquared(bool flag)
305{
306 _weightSquared = flag;
307}
308
309void RooNLLVarNew::enableOffsetting(bool flag)
310{
311 _doOffset = flag;
313}
314
315void RooNLLVarNew::finalizeResult(RooFit::EvalContext &ctx, ROOT::Math::KahanSum<double> result, double weightSum) const
316{
317 // If part of simultaneous PDF normalize probability over
318 // number of simultaneous PDFs: -sum(log(p/n)) = -sum(log(p)) + N*log(n)
319 // If we do bin-by bin offsetting, we don't do this because it cancels out
320 if (!_doBinOffset && _simCount > 1) {
321 result += weightSum * std::log(static_cast<double>(_simCount));
322 }
323
324 // Check if value offset flag is set.
325 if (_doOffset) {
326
327 // If no offset is stored enable this feature now
328 if (_offset.Sum() == 0 && _offset.Carry() == 0 && (result.Sum() != 0 || result.Carry() != 0)) {
329 _offset = result;
330 }
331 }
333}
334
335void RooNLLVarNew::translate(RooFit::Detail::CodeSquashContext &ctx) const
336{
337 if (_binnedL && !_pdf->getAttribute("BinnedLikelihoodActiveYields")) {
338 std::stringstream errorMsg;
339 errorMsg << "RooNLLVarNew::translate(): binned likelihood optimization is only supported when raw pdf "
340 "values can be interpreted as yields."
341 << " This is not the case for HistFactory models written with ROOT versions before 6.26.00";
342 coutE(InputArguments) << errorMsg.str() << std::endl;
343 throw std::runtime_error(errorMsg.str());
344 }
345
346 std::string weightSumName = RooFit::Detail::makeValidVarName(GetName()) + "WeightSum";
347 std::string resName = RooFit::Detail::makeValidVarName(GetName()) + "Result";
348 ctx.addResult(this, resName);
349 ctx.addToGlobalScope("double " + weightSumName + " = 0.0;\n");
350 ctx.addToGlobalScope("double " + resName + " = 0.0;\n");
351
352 const bool needWeightSum = _expectedEvents || _simCount > 1;
353
354 if (needWeightSum) {
355 auto scope = ctx.beginLoop(this);
356 ctx.addToCodeBody(weightSumName + " += " + ctx.getResult(*_weightVar) + ";\n");
357 }
358 if (_simCount > 1) {
359 std::string simCountStr = std::to_string(static_cast<double>(_simCount));
360 ctx.addToCodeBody(resName + " += " + weightSumName + " * std::log(" + simCountStr + ");\n");
361 }
362
363 // Begin loop scope for the observables and weight variable. If the weight
364 // is a scalar, the context will ignore it for the loop scope. The closing
365 // brackets of the loop is written at the end of the scopes lifetime.
366 {
367 auto scope = ctx.beginLoop(this);
368 std::string term = ctx.buildCall("RooFit::Detail::EvaluateFuncs::nllEvaluate", _pdf, _weightVar, _binnedL, 0);
369 ctx.addToCodeBody(this, resName + " += " + term + ";");
370 }
371 if (_expectedEvents) {
372 std::string expected = ctx.getResult(**_expectedEvents);
373 ctx.addToCodeBody(resName + " += " + expected + " - " + weightSumName + " * std::log(" + expected + ");\n");
374 }
375}
376
377/// \endcond
ROOT::RRangeCast< T, false, Range_t > static_range_cast(Range_t &&coll)
bool _doOffset
Apply interval value offset to control numeric precision?
void enableOffsetting(bool flag) override
ROOT::Math::KahanSum< double > _offset
! Offset as KahanSum to avoid loss of precision
Int_t _simCount
Total number of component p.d.f.s in RooSimultaneous (if any)
double evaluate() const override
TObject * clone(const char *newname) const override
Definition RooChi2Var.h:9
#define oocoutW(o, a)
#define coutE(a)
bool _doBinOffset
Definition RooNLLVar.h:44
std::vector< double > _binw
!
Definition RooNLLVar.h:49
std::unique_ptr< RooAbsPdf > _offsetPdf
! An optional per-bin likelihood offset
Definition RooNLLVar.h:51
void enableBinOffsetting(bool on=true)
#define N
Option_t Option_t TPoint TPoint const char GetTextMagnitude GetFillStyle GetLineColor GetLineWidth GetMarkerStyle GetTextAlign GetTextColor GetTextSize void char Point_t Rectangle_t WindowAttributes_t Float_t Float_t Float_t Int_t Int_t UInt_t UInt_t Rectangle_t result
char name[80]
Definition TGX11.cxx:110
char * Form(const char *fmt,...)
Formats a string in a circular formatting buffer.
Definition TString.cxx:2489
The Kahan summation is a compensated summation algorithm, which significantly reduces numerical error...
Definition Util.h:122
T Sum() const
Definition Util.h:240
T Carry() const
Definition Util.h:250
Common abstract base class for objects that represent a value and a "shape" in RooFit.
Definition RooAbsArg.h:77
RooFit::OwningPtr< RooArgSet > getObservables(const RooArgSet &set, bool valueOnly=true) const
Given a set of possible observables, return the observables that this PDF depends on.
bool getAttribute(const Text_t *name) const
Check if a named attribute is set. By default, all attributes are unset.
Storage_t::size_type size() const
RooAbsArg * first() const
Abstract interface for all probability density functions.
Definition RooAbsPdf.h:40
virtual std::unique_ptr< RooAbsReal > createExpectedEventsFunc(const RooArgSet *nset) const
Returns an object that represents the expected number of events for a given normalization set,...
Abstract base class for objects that represent a real value and implements functionality common to al...
Definition RooAbsReal.h:59
virtual std::list< double > * binBoundaries(RooAbsRealLValue &obs, double xlo, double xhi) const
Retrieve bin boundaries if this distribution is binned in obs.
double getVal(const RooArgSet *normalisationSet=nullptr) const
Evaluate object.
Definition RooAbsReal.h:103
RooArgSet is a container object that can hold multiple RooAbsArg objects.
Definition RooArgSet.h:55
Represents a constant real-valued object.
Definition RooConstVar.h:23
Container class to hold N-dimensional binned data.
Definition RooDataHist.h:39
void add(const RooArgSet &row, double wgt=1.0) override
Add wgt to the bin content enclosed by the coordinates passed in row.
Definition RooDataHist.h:66
A class to maintain the context for squashing of RooFit models into code.
std::string buildCall(std::string const &funcname, Args_t const &...args)
Build the code to call the function with name funcname, passing some arguments.
void addResult(RooAbsArg const *key, std::string const &value)
A function to save an expression that includes/depends on the result of the input node.
void addToCodeBody(RooAbsArg const *klass, std::string const &in)
Adds the input string to the squashed code body.
std::string const & getResult(RooAbsArg const &arg)
Gets the result for the given node using the node name.
void addToGlobalScope(std::string const &str)
Adds the given string to the string block that will be emitted at the top of the squashed function.
std::unique_ptr< LoopScope > beginLoop(RooAbsArg const *in)
Create a RAII scope for iterating over vector observables.
std::span< const double > at(RooAbsArg const *arg, RooAbsArg const *caller=nullptr)
std::span< double > output()
RooBatchCompute::Config config(RooAbsArg const *arg) const
void setOutputWithOffset(RooAbsArg const *arg, ROOT::Math::KahanSum< double > val, ROOT::Math::KahanSum< double > const &offset)
Sets the output value with an offset.
A propability density function sampled from a multidimensional histogram.
Definition RooHistPdf.h:30
Variable that can be changed from the outside.
Definition RooRealVar.h:37
Mother of all ROOT objects.
Definition TObject.h:41
double reduceSum(Config cfg, InputArr input, size_t n)
ReduceNLLOutput reduceNLL(Config cfg, std::span< const double > probas, std::span< const double > weights, std::span< const double > offsetProbas)
double nllEvaluate(double pdf, double weight, int binnedL, int doBinOffset)
std::string makeValidVarName(std::string const &in)
OffsetMode
For setting the offset mode with the Offset() command argument to RooAbsPdf::fitTo()
void probas(TString dataset, TString fin="TMVA.root", Bool_t useTMVAStyle=kTRUE)
static void output()