Logo ROOT  
Reference Guide
 
Loading...
Searching...
No Matches
RooNLLVarNew.cxx
Go to the documentation of this file.
1/// \cond ROOFIT_INTERNAL
2
3/*
4 * Project: RooFit
5 * Authors:
6 * Jonas Rembser, CERN 2021
7 * Emmanouil Michalainas, CERN 2021
8 *
9 * Copyright (c) 2021, CERN
10 *
11 * Redistribution and use in source and binary forms,
12 * with or without modification, are permitted according to the terms
13 * listed in LICENSE (http://roofit.sourceforge.net/license.txt)
14 */
15
16/**
17\file RooNLLVarNew.cxx
18\class RooNLLVarNew
19\ingroup Roofitcore
20
21This is a simple class designed to produce the nll values needed by the fitter.
22This class calls functions from `RooBatchCompute` library to provide faster
23computation times.
24**/
25
26#include "RooNLLVarNew.h"
27
28#include <RooHistPdf.h>
29#include <RooBatchCompute.h>
30#include <RooDataHist.h>
31#include <RooNaNPacker.h>
32#include <RooConstVar.h>
33#include <RooRealVar.h>
34#include <RooSetProxy.h>
36
37#include "RooFitImplHelpers.h"
38
39#include <ROOT/StringUtils.hxx>
40
41#include <TClass.h>
42#include <TMath.h>
43#include <Math/Util.h>
44
45#include <numeric>
46#include <stdexcept>
47#include <vector>
48
49// Declare constexpr static members to make them available if odr-used in C++14.
50constexpr const char *RooNLLVarNew::weightVarName;
51constexpr const char *RooNLLVarNew::weightVarNameSumW2;
52
53namespace {
54
55// Use RooConstVar for dummies such that they don't get included in getParameters().
56RooConstVar *dummyVar(const char *name)
57{
58 return new RooConstVar(name, name, 1.0);
59}
60
61// Helper class to represent a template pdf based on the fit dataset.
62class RooOffsetPdf : public RooAbsPdf {
63public:
64 RooOffsetPdf(const char *name, const char *title, RooArgSet const &observables, RooAbsReal &weightVar)
65 : RooAbsPdf(name, title),
66 _observables("!observables", "List of observables", this),
67 _weightVar{"!weightVar", "weightVar", this, weightVar, true, false}
68 {
69 for (RooAbsArg *obs : observables) {
70 _observables.add(*obs);
71 }
72 }
73 RooOffsetPdf(const RooOffsetPdf &other, const char *name = nullptr)
74 : RooAbsPdf(other, name),
75 _observables("!servers", this, other._observables),
76 _weightVar{"!weightVar", this, other._weightVar}
77 {
78 }
79 TObject *clone(const char *newname) const override { return new RooOffsetPdf(*this, newname); }
80
81 void doEval(RooFit::EvalContext &ctx) const override
82 {
83 std::span<double> output = ctx.output();
84 std::size_t nEvents = output.size();
85
86 std::span<const double> weights = ctx.at(_weightVar);
87
88 // Create the template histogram from the data. This operation is very
89 // expensive, but since the offset only depends on the observables it
90 // only has to be done once.
91
92 RooDataHist dataHist{"data", "data", _observables};
93 // Loop over events to fill the histogram
94 for (std::size_t i = 0; i < nEvents; ++i) {
95 for (auto *var : static_range_cast<RooRealVar *>(_observables)) {
96 var->setVal(ctx.at(var)[i]);
97 }
98 dataHist.add(_observables, weights[weights.size() == 1 ? 0 : i]);
99 }
100
101 // Lookup bin weights via RooHistPdf
102 RooHistPdf pdf{"offsetPdf", "offsetPdf", _observables, dataHist};
103 for (std::size_t i = 0; i < nEvents; ++i) {
104 for (auto *var : static_range_cast<RooRealVar *>(_observables)) {
105 var->setVal(ctx.at(var)[i]);
106 }
107 output[i] = pdf.getVal(_observables);
108 }
109 }
110
111private:
112 double evaluate() const override { return 0.0; } // should never be called
113
114 RooSetProxy _observables;
116};
117
118} // namespace
119
120/** Construct a RooNLLVarNew
121\param name the name
122\param title the title
123\param pdf The pdf for which the nll is computed for
124\param observables The observabes of the pdf
125\param isExtended Set to true if this is an extended fit
126**/
127RooNLLVarNew::RooNLLVarNew(const char *name, const char *title, RooAbsPdf &pdf, RooArgSet const &observables,
128 bool isExtended, RooFit::OffsetMode offsetMode)
129 : RooAbsReal(name, title),
130 _pdf{"pdf", "pdf", this, pdf},
131 _weightVar{"weightVar", "weightVar", this, *dummyVar(weightVarName), true, false, true},
132 _weightSquaredVar{weightVarNameSumW2, weightVarNameSumW2, this, *dummyVar("weightSquardVar"), true, false, true},
133 _binnedL{pdf.getAttribute("BinnedLikelihoodActive")}
134{
135 RooArgSet obs;
136 pdf.getObservables(&observables, obs);
137
138 // In the "BinnedLikelihoodActiveYields" mode, the pdf values can directly
139 // be interpreted as yields and don't need to be multiplied by the bin
140 // widths. That's why we don't need to even fill them in this case.
141 if (_binnedL && !pdf.getAttribute("BinnedLikelihoodActiveYields")) {
142 fillBinWidthsFromPdfBoundaries(pdf, obs);
143 }
144
145 if (isExtended && !_binnedL) {
146 std::unique_ptr<RooAbsReal> expectedEvents = pdf.createExpectedEventsFunc(&obs);
147 if (expectedEvents) {
148 _expectedEvents =
149 std::make_unique<RooTemplateProxy<RooAbsReal>>("expectedEvents", "expectedEvents", this, *expectedEvents);
150 addOwnedComponents(std::move(expectedEvents));
151 }
152 }
153
154 resetWeightVarNames();
157
158 // In the binned likelihood code path, we directly use that data weights for
159 // the offsetting.
160 if (!_binnedL && _doBinOffset) {
161 auto offsetPdf = std::make_unique<RooOffsetPdf>("_offset_pdf", "_offset_pdf", obs, *_weightVar);
162 _offsetPdf = std::make_unique<RooTemplateProxy<RooAbsPdf>>("offsetPdf", "offsetPdf", this, *offsetPdf);
163 addOwnedComponents(std::move(offsetPdf));
164 }
165}
166
167RooNLLVarNew::RooNLLVarNew(const RooNLLVarNew &other, const char *name)
168 : RooAbsReal(other, name),
169 _pdf{"pdf", this, other._pdf},
170 _weightVar{"weightVar", this, other._weightVar},
171 _weightSquaredVar{"weightSquaredVar", this, other._weightSquaredVar},
172 _weightSquared{other._weightSquared},
173 _binnedL{other._binnedL},
174 _doOffset{other._doOffset},
175 _simCount{other._simCount},
176 _prefix{other._prefix},
177 _binw{other._binw}
178{
179 if (other._expectedEvents) {
180 _expectedEvents = std::make_unique<RooTemplateProxy<RooAbsReal>>("expectedEvents", this, *other._expectedEvents);
181 }
182}
183
184void RooNLLVarNew::fillBinWidthsFromPdfBoundaries(RooAbsReal const &pdf, RooArgSet const &observables)
185{
186 // Check if the bin widths were already filled
187 if (!_binw.empty()) {
188 return;
189 }
190
191 if (observables.size() != 1) {
192 throw std::runtime_error("BinnedPdf optimization only works with a 1D pdf.");
193 } else {
194 auto *var = static_cast<RooRealVar *>(observables.first());
195 std::list<double> *boundaries = pdf.binBoundaries(*var, var->getMin(), var->getMax());
196 std::list<double>::iterator biter = boundaries->begin();
197 _binw.resize(boundaries->size() - 1);
198 double lastBound = (*biter);
199 ++biter;
200 int ibin = 0;
201 while (biter != boundaries->end()) {
202 _binw[ibin] = (*biter) - lastBound;
203 lastBound = (*biter);
204 ibin++;
205 ++biter;
206 }
207 }
208}
209
210void RooNLLVarNew::doEvalBinnedL(RooFit::EvalContext &ctx, std::span<const double> preds,
211 std::span<const double> weights) const
212{
214 ROOT::Math::KahanSum<double> sumWeightKahanSum{0.0};
215
216 const bool predsAreYields = _binw.empty();
217
218 for (std::size_t i = 0; i < preds.size(); ++i) {
219
220 // Calculate log(Poisson(N|mu) for this bin
221 double N = weights[i];
222 double mu = preds[i];
223 if (!predsAreYields) {
224 mu *= _binw[i];
225 }
226
227 if (mu <= 0 && N > 0) {
228 // Catch error condition: data present where zero events are predicted
229 logEvalError(Form("Observed %f events in bin %lu with zero event yield", N, (unsigned long)i));
230 } else {
232 sumWeightKahanSum += N;
233 }
234 }
235
236 finalizeResult(ctx, result, sumWeightKahanSum.Sum());
237}
238
239void RooNLLVarNew::doEval(RooFit::EvalContext &ctx) const
240{
241 std::span<const double> weights = ctx.at(_weightVar);
242 std::span<const double> weightsSumW2 = ctx.at(_weightSquaredVar);
243
244 if (_binnedL) {
245 return doEvalBinnedL(ctx, ctx.at(&*_pdf), _weightSquared ? weightsSumW2 : weights);
246 }
247
248 auto config = ctx.config(this);
249
250 auto probas = ctx.at(_pdf);
251
252 _sumWeight = weights.size() == 1 ? weights[0] * probas.size()
253 : RooBatchCompute::reduceSum(config, weights.data(), weights.size());
254 if (_expectedEvents && _weightSquared && _sumWeight2 == 0.0) {
255 _sumWeight2 = weights.size() == 1 ? weightsSumW2[0] * probas.size()
256 : RooBatchCompute::reduceSum(config, weightsSumW2.data(), weightsSumW2.size());
257 }
258
259 auto nllOut = RooBatchCompute::reduceNLL(config, probas, _weightSquared ? weightsSumW2 : weights,
260 _doBinOffset ? ctx.at(*_offsetPdf) : std::span<const double>{});
261
262 if (nllOut.nLargeValues > 0) {
263 oocoutW(&*_pdf, Eval) << "RooAbsPdf::getLogVal(" << _pdf->GetName()
264 << ") WARNING: top-level pdf has unexpectedly large values" << std::endl;
265 }
266 for (std::size_t i = 0; i < nllOut.nNonPositiveValues; ++i) {
267 _pdf->logEvalError("getLogVal() top-level p.d.f not greater than zero");
268 }
269 for (std::size_t i = 0; i < nllOut.nNaNValues; ++i) {
270 _pdf->logEvalError("getLogVal() top-level p.d.f evaluates to NaN");
271 }
272
273 if (_expectedEvents) {
274 std::span<const double> expected = ctx.at(*_expectedEvents);
275 nllOut.nllSum += _pdf->extendedTerm(_sumWeight, expected[0], _weightSquared ? _sumWeight2 : 0.0, _doBinOffset);
276 }
277
278 finalizeResult(ctx, {nllOut.nllSum, nllOut.nllSumCarry}, _sumWeight);
279}
280
281////////////////////////////////////////////////////////////////////////////////
282/// Sets the prefix for the special variables of this NLL, like weights or bin
283/// volumes.
284/// \param[in] prefix The prefix to add to the observables and weight names.
285void RooNLLVarNew::setPrefix(std::string const &prefix)
286{
287 _prefix = prefix;
288
289 resetWeightVarNames();
290}
291
292void RooNLLVarNew::resetWeightVarNames()
293{
294 _weightVar->SetName((_prefix + weightVarName).c_str());
295 _weightSquaredVar->SetName((_prefix + weightVarNameSumW2).c_str());
296 if (_offsetPdf) {
297 (*_offsetPdf)->SetName((_prefix + "_offset_pdf").c_str());
298 }
299}
300
301////////////////////////////////////////////////////////////////////////////////
302/// Toggles the weight square correction.
303void RooNLLVarNew::applyWeightSquared(bool flag)
304{
305 _weightSquared = flag;
306}
307
308void RooNLLVarNew::enableOffsetting(bool flag)
309{
310 _doOffset = flag;
312}
313
314void RooNLLVarNew::finalizeResult(RooFit::EvalContext &ctx, ROOT::Math::KahanSum<double> result, double weightSum) const
315{
316 // If part of simultaneous PDF normalize probability over
317 // number of simultaneous PDFs: -sum(log(p/n)) = -sum(log(p)) + N*log(n)
318 // If we do bin-by bin offsetting, we don't do this because it cancels out
319 if (!_doBinOffset && _simCount > 1) {
320 result += weightSum * std::log(static_cast<double>(_simCount));
321 }
322
323 // Check if value offset flag is set.
324 if (_doOffset) {
325
326 // If no offset is stored enable this feature now
327 if (_offset.Sum() == 0 && _offset.Carry() == 0 && (result.Sum() != 0 || result.Carry() != 0)) {
328 _offset = result;
329 }
330 }
332}
333
334void RooNLLVarNew::translate(RooFit::Detail::CodeSquashContext &ctx) const
335{
336 if (_binnedL && !_pdf->getAttribute("BinnedLikelihoodActiveYields")) {
337 std::stringstream errorMsg;
338 errorMsg << "RooNLLVarNew::translate(): binned likelihood optimization is only supported when raw pdf "
339 "values can be interpreted as yields."
340 << " This is not the case for HistFactory models written with ROOT versions before 6.26.00";
341 coutE(InputArguments) << errorMsg.str() << std::endl;
342 throw std::runtime_error(errorMsg.str());
343 }
344
345 std::string weightSumName = RooFit::Detail::makeValidVarName(GetName()) + "WeightSum";
346 std::string resName = RooFit::Detail::makeValidVarName(GetName()) + "Result";
347 ctx.addResult(this, resName);
348 ctx.addToGlobalScope("double " + weightSumName + " = 0.0;\n");
349 ctx.addToGlobalScope("double " + resName + " = 0.0;\n");
350
351 const bool needWeightSum = _expectedEvents || _simCount > 1;
352
353 if (needWeightSum) {
354 auto scope = ctx.beginLoop(this);
355 ctx.addToCodeBody(weightSumName + " += " + ctx.getResult(*_weightVar) + ";\n");
356 }
357 if (_simCount > 1) {
358 std::string simCountStr = std::to_string(static_cast<double>(_simCount));
359 ctx.addToCodeBody(resName + " += " + weightSumName + " * std::log(" + simCountStr + ");\n");
360 }
361
362 // Begin loop scope for the observables and weight variable. If the weight
363 // is a scalar, the context will ignore it for the loop scope. The closing
364 // brackets of the loop is written at the end of the scopes lifetime.
365 {
366 auto scope = ctx.beginLoop(this);
367 std::string term = ctx.buildCall("RooFit::Detail::MathFuncs::nll", _pdf, _weightVar, _binnedL, 0);
368 ctx.addToCodeBody(this, resName + " += " + term + ";");
369 }
370 if (_expectedEvents) {
371 std::string expected = ctx.getResult(**_expectedEvents);
372 ctx.addToCodeBody(resName + " += " + expected + " - " + weightSumName + " * std::log(" + expected + ");\n");
373 }
374}
375
376/// \endcond
ROOT::RRangeCast< T, false, Range_t > static_range_cast(Range_t &&coll)
bool _doOffset
Apply interval value offset to control numeric precision?
void enableOffsetting(bool flag) override
ROOT::Math::KahanSum< double > _offset
! Offset as KahanSum to avoid loss of precision
Int_t _simCount
Total number of component p.d.f.s in RooSimultaneous (if any)
double evaluate() const override
TObject * clone(const char *newname) const override
Definition RooChi2Var.h:9
#define oocoutW(o, a)
#define coutE(a)
bool _doBinOffset
Definition RooNLLVar.h:44
std::vector< double > _binw
!
Definition RooNLLVar.h:49
std::unique_ptr< RooAbsPdf > _offsetPdf
! An optional per-bin likelihood offset
Definition RooNLLVar.h:51
void enableBinOffsetting(bool on=true)
#define N
Option_t Option_t TPoint TPoint const char GetTextMagnitude GetFillStyle GetLineColor GetLineWidth GetMarkerStyle GetTextAlign GetTextColor GetTextSize void char Point_t Rectangle_t WindowAttributes_t Float_t Float_t Float_t Int_t Int_t UInt_t UInt_t Rectangle_t result
char name[80]
Definition TGX11.cxx:110
char * Form(const char *fmt,...)
Formats a string in a circular formatting buffer.
Definition TString.cxx:2489
The Kahan summation is a compensated summation algorithm, which significantly reduces numerical error...
Definition Util.h:122
T Sum() const
Definition Util.h:240
T Carry() const
Definition Util.h:250
Common abstract base class for objects that represent a value and a "shape" in RooFit.
Definition RooAbsArg.h:77
RooFit::OwningPtr< RooArgSet > getObservables(const RooArgSet &set, bool valueOnly=true) const
Given a set of possible observables, return the observables that this PDF depends on.
bool getAttribute(const Text_t *name) const
Check if a named attribute is set. By default, all attributes are unset.
Storage_t::size_type size() const
RooAbsArg * first() const
Abstract interface for all probability density functions.
Definition RooAbsPdf.h:40
virtual std::unique_ptr< RooAbsReal > createExpectedEventsFunc(const RooArgSet *nset) const
Returns an object that represents the expected number of events for a given normalization set,...
Abstract base class for objects that represent a real value and implements functionality common to al...
Definition RooAbsReal.h:59
virtual std::list< double > * binBoundaries(RooAbsRealLValue &obs, double xlo, double xhi) const
Retrieve bin boundaries if this distribution is binned in obs.
double getVal(const RooArgSet *normalisationSet=nullptr) const
Evaluate object.
Definition RooAbsReal.h:103
RooArgSet is a container object that can hold multiple RooAbsArg objects.
Definition RooArgSet.h:55
Represents a constant real-valued object.
Definition RooConstVar.h:23
Container class to hold N-dimensional binned data.
Definition RooDataHist.h:40
void add(const RooArgSet &row, double wgt=1.0) override
Add wgt to the bin content enclosed by the coordinates passed in row.
Definition RooDataHist.h:72
A class to maintain the context for squashing of RooFit models into code.
std::string buildCall(std::string const &funcname, Args_t const &...args)
Build the code to call the function with name funcname, passing some arguments.
void addResult(RooAbsArg const *key, std::string const &value)
A function to save an expression that includes/depends on the result of the input node.
void addToCodeBody(RooAbsArg const *klass, std::string const &in)
Adds the input string to the squashed code body.
std::string const & getResult(RooAbsArg const &arg)
Gets the result for the given node using the node name.
void addToGlobalScope(std::string const &str)
Adds the given string to the string block that will be emitted at the top of the squashed function.
std::unique_ptr< LoopScope > beginLoop(RooAbsArg const *in)
Create a RAII scope for iterating over vector observables.
std::span< const double > at(RooAbsArg const *arg, RooAbsArg const *caller=nullptr)
std::span< double > output()
RooBatchCompute::Config config(RooAbsArg const *arg) const
void setOutputWithOffset(RooAbsArg const *arg, ROOT::Math::KahanSum< double > val, ROOT::Math::KahanSum< double > const &offset)
Sets the output value with an offset.
A propability density function sampled from a multidimensional histogram.
Definition RooHistPdf.h:30
Variable that can be changed from the outside.
Definition RooRealVar.h:37
Mother of all ROOT objects.
Definition TObject.h:41
double reduceSum(Config cfg, InputArr input, size_t n)
ReduceNLLOutput reduceNLL(Config cfg, std::span< const double > probas, std::span< const double > weights, std::span< const double > offsetProbas)
double nll(double pdf, double weight, int binnedL, int doBinOffset)
Definition MathFuncs.h:353
std::string makeValidVarName(std::string const &in)
OffsetMode
For setting the offset mode with the Offset() command argument to RooAbsPdf::fitTo()
void probas(TString dataset, TString fin="TMVA.root", Bool_t useTMVAStyle=kTRUE)
static void output()