Logo ROOT  
Reference Guide
 
Loading...
Searching...
No Matches
RooBatchCompute.cxx
Go to the documentation of this file.
1/*
2 * Project: RooFit
3 * Authors:
4 * Emmanouil Michalainas, CERN, September 2020
5 *
6 * Copyright (c) 2021, CERN
7 *
8 * Redistribution and use in source and binary forms,
9 * with or without modification, are permitted according to the terms
10 * listed in LICENSE (http://roofit.sourceforge.net/license.txt)
11 */
12
13/**
14\file RooBatchCompute.cxx
15\class RbcClass
16\ingroup Roobatchcompute
17
18This file contains the code for cpu computations using the RooBatchCompute library.
19**/
20
21#include "RooBatchCompute.h"
22#include "RooNaNPacker.h"
23#include "RooVDTHeaders.h"
24#include "Batches.h"
25
26#include <ROOT/RConfig.hxx>
27#include <ROOT/TExecutor.hxx>
28
29#include <Math/Util.h>
30
31#include <algorithm>
32#include <sstream>
33#include <stdexcept>
34
35#ifndef RF_ARCH
36#error "RF_ARCH should always be defined"
37#endif
38
39namespace RooBatchCompute {
40namespace RF_ARCH {
41
42namespace {
43
44void fillBatches(Batches &batches, RestrictArr output, size_t nEvents, std::size_t nBatches, ArgVector &extraArgs)
45{
46 batches._extraArgs = extraArgs.data();
47 batches._nEvents = nEvents;
48 batches._nBatches = nBatches;
49 batches._nExtraArgs = extraArgs.size();
50 batches._output = output;
51}
52
53void fillArrays(std::vector<Batch> &arrays, const VarVector &vars, double *buffer)
54{
55
56 arrays.resize(vars.size());
57 for (size_t i = 0; i < vars.size(); i++) {
58 const std::span<const double> &span = vars[i];
59 if (span.empty()) {
60 std::stringstream ss;
61 ss << "The span number " << i << " passed to Batches::Batches() is empty!";
62 throw std::runtime_error(ss.str());
63 } else if (span.size() > 1)
64 arrays[i].set(span.data(), true);
65 else {
66 std::fill_n(&buffer[i * bufferSize], bufferSize, span.data()[0]);
67 arrays[i].set(&buffer[i * bufferSize], false);
68 }
69 }
70}
71
72} // namespace
73
74std::vector<void (*)(BatchesHandle)> getFunctions();
75
76/// This class overrides some RooBatchComputeInterface functions, for the
77/// purpose of providing a CPU specific implementation of the library.
78class RooBatchComputeClass : public RooBatchComputeInterface {
79private:
80 const std::vector<void (*)(BatchesHandle)> _computeFunctions;
81
82public:
84 {
85 // Set the dispatch pointer to this instance of the library upon loading
86 dispatchCPU = this;
87 }
88
89 Architecture architecture() const override { return Architecture::RF_ARCH; };
90 std::string architectureName() const override
91 {
92 // transform to lower case to match the original architecture name passed to the compiler
93#ifdef _QUOTEVAL_ // to quote the value of the preprocessor macro instead of the name
94#error "It's unexpected that _QUOTEVAL_ is defined at this point!"
95#endif
96#define _QUOTEVAL_(x) _QUOTE_(x)
97 std::string out = _QUOTEVAL_(RF_ARCH);
98#undef _QUOTEVAL_
99 std::transform(out.begin(), out.end(), out.begin(), [](unsigned char c) { return std::tolower(c); });
100 ;
101 return out;
102 };
103
104 /** Compute multiple values using optimized functions.
105 This method creates a Batches object and passes it to the correct compute function.
106 In case Implicit Multithreading is enabled, the events to be processed are equally
107 divided among the tasks to be generated and computed in parallel.
108 \param computer An enum specifying the compute function to be used.
109 \param output The array where the computation results are stored.
110 \param nEvents The number of events to be processed.
111 \param vars A std::vector containing pointers to the variables involved in the computation.
112 \param extraArgs An optional std::vector containing extra double values that may participate in the computation. **/
113 void compute(Config const &, Computer computer, RestrictArr output, size_t nEvents, const VarVector &vars,
114 ArgVector &extraArgs) override
115 {
116 static std::vector<double> buffer;
117 buffer.resize(vars.size() * bufferSize);
118
121 std::size_t nThreads = ex.GetPoolSize();
122
123 std::size_t nEventsPerThread = nEvents / nThreads + (nEvents % nThreads > 0);
124
125 // Reset the number of threads to the number we actually need given nEventsPerThread
126 nThreads = nEvents / nEventsPerThread + (nEvents % nEventsPerThread > 0);
127
128 auto task = [&](std::size_t idx) -> int {
129 // Fill a std::vector<Batches> with the same object and with ~nEvents/nThreads
130 // Then advance every object but the first to split the work between threads
131 Batches batches;
132 std::vector<Batch> arrays;
133 fillBatches(batches, output, nEventsPerThread, vars.size(), extraArgs);
134 fillArrays(arrays, vars, buffer.data());
135 batches._arrays = arrays.data();
136 batches.advance(batches.getNEvents() * idx);
137
138 // Set the number of events of the last Batches object as the remaining events
139 if (idx == nThreads - 1) {
140 batches.setNEvents(nEvents - idx * batches.getNEvents());
141 }
142
143 std::size_t events = batches.getNEvents();
144 batches.setNEvents(bufferSize);
145 while (events > bufferSize) {
146 _computeFunctions[computer](batches);
147 batches.advance(bufferSize);
148 events -= bufferSize;
149 }
150 batches.setNEvents(events);
151 _computeFunctions[computer](batches);
152 return 0;
153 };
154
155 std::vector<std::size_t> indices(nThreads);
156 for (unsigned int i = 1; i < nThreads; i++) {
157 indices[i] = i;
158 }
159 ex.Map(task, indices);
160 } else {
161 // Fill a std::vector<Batches> with the same object and with ~nEvents/nThreads
162 // Then advance every object but the first to split the work between threads
163 Batches batches;
164 std::vector<Batch> arrays;
165 fillBatches(batches, output, nEvents, vars.size(), extraArgs);
166 fillArrays(arrays, vars, buffer.data());
167 batches._arrays = arrays.data();
168
169 std::size_t events = batches.getNEvents();
170 batches.setNEvents(bufferSize);
171 while (events > bufferSize) {
172 _computeFunctions[computer](batches);
173 batches.advance(bufferSize);
174 events -= bufferSize;
175 }
176 batches.setNEvents(events);
177 _computeFunctions[computer](batches);
178 }
179 }
180 /// Return the sum of an input array
181 double reduceSum(Config const &, InputArr input, size_t n) override;
182 ReduceNLLOutput reduceNLL(Config const &, std::span<const double> probas, std::span<const double> weights,
183 std::span<const double> offsetProbas) override;
184}; // End class RooBatchComputeClass
185
186namespace {
187
188inline std::pair<double, double> getLog(double prob, ReduceNLLOutput &out)
189{
190 if (std::abs(prob) > 1e6) {
191 out.nLargeValues++;
192 }
193
194 if (prob <= 0.0) {
195 out.nNonPositiveValues++;
196 return {std::log(prob), -prob};
197 }
198
199 if (std::isnan(prob)) {
200 out.nNaNValues++;
201 return {prob, RooNaNPacker::unpackNaN(prob)};
202 }
203
204 return {std::log(prob), 0.0};
205}
206
207} // namespace
208
209double RooBatchComputeClass::reduceSum(Config const &, InputArr input, size_t n)
210{
212}
213
214ReduceNLLOutput RooBatchComputeClass::reduceNLL(Config const &, std::span<const double> probas,
215 std::span<const double> weights, std::span<const double> offsetProbas)
216{
217 ReduceNLLOutput out;
218
219 double badness = 0.0;
220
222
223 for (std::size_t i = 0; i < probas.size(); ++i) {
224
225 const double eventWeight = weights.size() > 1 ? weights[i] : weights[0];
226
227 if (0. == eventWeight)
228 continue;
229
230 std::pair<double, double> logOut = getLog(probas[i], out);
231 double term = logOut.first;
232 badness += logOut.second;
233
234 if (!offsetProbas.empty()) {
235 term -= std::log(offsetProbas[i]);
236 }
237
238 term *= -eventWeight;
239
240 nllSum.Add(term);
241 }
242
243 out.nllSum = nllSum.Sum();
244 out.nllSumCarry = nllSum.Carry();
245
246 if (badness != 0.) {
247 // Some events with evaluation errors. Return "badness" of errors.
248 out.nllSum = RooNaNPacker::packFloatIntoNaN(badness);
249 out.nllSumCarry = 0.0;
250 }
251
252 return out;
253}
254
255/// Static object to trigger the constructor which overwrites the dispatch pointer.
257
258} // End namespace RF_ARCH
259} // End namespace RooBatchCompute
#define c(i)
Definition RSha256.hxx:101
#define _QUOTEVAL_(x)
Option_t Option_t TPoint TPoint const char GetTextMagnitude GetFillStyle GetLineColor GetLineWidth GetMarkerStyle GetTextAlign GetTextColor GetTextSize void input
These classes encapsulate the necessary data for the computations.
This class implements the interface to execute the same task multiple times, sequentially or in paral...
Definition TExecutor.hxx:38
unsigned GetPoolSize() const
Return the number of pooled workers.
The Kahan summation is a compensated summation algorithm, which significantly reduces numerical error...
Definition Util.h:122
T Sum() const
Definition Util.h:240
static KahanSum< T, N > Accumulate(Iterator begin, Iterator end, T initialValue=T{})
Iterate over a range and return an instance of a KahanSum.
Definition Util.h:211
T Carry() const
Definition Util.h:250
void Add(T x)
Single-element accumulation. Will not vectorise.
Definition Util.h:165
Minimal configuration struct to steer the evaluation of a single node with the RooBatchCompute librar...
__roodevice__ std::size_t getNEvents() const
Definition Batches.h:69
void advance(std::size_t nEvents)
Definition Batches.h:75
void setNEvents(std::size_t n)
Definition Batches.h:74
This class overrides some RooBatchComputeInterface functions, for the purpose of providing a cuda spe...
void compute(Config const &, Computer computer, RestrictArr output, size_t nEvents, const VarVector &vars, ArgVector &extraArgs) override
Compute multiple values using optimized functions.
const std::vector< void(*)(BatchesHandle)> _computeFunctions
ReduceNLLOutput reduceNLL(RooBatchCompute::Config const &cfg, std::span< const double > probas, std::span< const double > weights, std::span< const double > offsetProbas) override
double reduceSum(Config const &, InputArr input, size_t n) override
Return the sum of an input array.
double reduceSum(RooBatchCompute::Config const &cfg, InputArr input, size_t n) override
Return the sum of an input array.
ReduceNLLOutput reduceNLL(Config const &, std::span< const double > probas, std::span< const double > weights, std::span< const double > offsetProbas) override
const Int_t n
Definition legend1.C:16
Double_t ex[n]
Definition legend1.C:17
void(off) SmallVectorTemplateBase< T
Bool_t IsImplicitMTEnabled()
Returns true if the implicit multi-threading in ROOT is enabled.
Definition TROOT.cxx:570
static RooBatchComputeClass computeObj
Static object to trigger the constructor which overwrites the dispatch pointer.
std::vector< void(*)(BatchesHandle)> getFunctions()
Returns a std::vector of pointers to the compute functions in this file.
Batches & BatchesHandle
Definition Batches.h:84
Namespace for dispatching RooFit computations to various backends.
R__EXTERN RooBatchComputeInterface * dispatchCPU
This dispatch pointer points to an implementation of the compute library, provided one has been loade...
std::vector< std::span< const double > > VarVector
constexpr std::size_t bufferSize
Definition Batches.h:32
const double *__restrict InputArr
std::vector< double > ArgVector
double *__restrict RestrictArr
void probas(TString dataset, TString fin="TMVA.root", Bool_t useTMVAStyle=kTRUE)
__roodevice__ static __roohost__ double packFloatIntoNaN(float payload)
Pack float into mantissa of a NaN.
static float unpackNaN(double val)
If val is NaN and a this NaN has been tagged as containing a payload, unpack the float from the manti...
static void output()