28#ifdef ROOBATCHCOMPUTE_USE_IMT
40#error "RF_ARCH should always be defined"
50 batches.extra = extraArgs.data();
51 batches.nEvents = nEvents;
52 batches.nBatches = nBatches;
53 batches.nExtra = extraArgs.size();
57void fillArrays(std::span<Batch> arrays,
VarSpan vars, std::size_t nEvents)
59 for (std::size_t i = 0; i < vars.size(); i++) {
60 arrays[i]._array = vars[i].data();
61 arrays[i]._isVector = vars[i].empty() || vars[i].size() >= nEvents;
65inline void advance(
Batches &batches, std::size_t nEvents)
67 for (std::size_t i = 0; i < batches.nBatches; i++) {
68 Batch &arg = batches.args[i];
69 arg._array += arg._isVector * nEvents;
71 batches.output += nEvents;
80class RooBatchComputeClass :
public RooBatchComputeInterface {
93#error "It's unexpected that _QUOTEVAL_ is defined at this point!"
95#define _QUOTEVAL_(x) _QUOTE_(x)
98 std::transform(out.begin(), out.end(), out.begin(), [](
unsigned char c) { return std::tolower(c); });
106 std::span<const double> offsetProbas)
override;
109#ifdef ROOBATCHCOMPUTE_USE_IMT
116#ifdef ROOBATCHCOMPUTE_USE_IMT
125 std::size_t nEventsPerThread = nEvents / nThreads + (nEvents % nThreads > 0);
128 nThreads = nEvents / nEventsPerThread + (nEvents % nEventsPerThread > 0);
130 auto task = [&](std::size_t idx) ->
int {
134 std::vector<Batch> arrays(vars.size());
135 fillBatches(batches,
output, nEventsPerThread, vars.size(), extraArgs);
136 fillArrays(arrays, vars, nEvents);
137 batches.
args = arrays.data();
138 advance(batches, batches.
nEvents * idx);
141 if (idx == nThreads - 1) {
145 std::size_t events = batches.
nEvents;
157 std::vector<std::size_t> indices(nThreads);
158 for (
unsigned int i = 1; i < nThreads; i++) {
161 ex.Map(task, indices);
195#ifdef ROOBATCHCOMPUTE_USE_IMT
197 computeIMT(computer,
output, nEvents, vars, extraArgs);
204 std::vector<Batch> arrays(vars.size());
205 fillBatches(batches,
output, nEvents, vars.size(), extraArgs);
206 fillArrays(arrays, vars, nEvents);
207 batches.args = arrays.data();
209 std::size_t events = batches.nEvents;
216 batches.nEvents = events;
222inline std::pair<double, double> getLog(
double prob, ReduceNLLOutput &out)
224 if (std::abs(prob) > 1e6) {
229 out.nNonPositiveValues++;
230 return {std::log(prob), -prob};
233 if (std::isnan(prob)) {
238 return {std::log(prob), 0.0};
249 std::span<const double> weights, std::span<const double> offsetProbas)
253 double badness = 0.0;
257 for (std::size_t i = 0; i <
probas.size(); ++i) {
259 const double eventWeight = weights.size() > 1 ? weights[i] : weights[0];
261 if (0. == eventWeight)
264 std::pair<double, double> logOut = getLog(probas[i], out);
265 double term = logOut.first;
266 badness += logOut.second;
268 if (!offsetProbas.empty()) {
269 term -= std::log(offsetProbas[i]);
272 term *= -eventWeight;
277 out.nllSum = nllSum.
Sum();
283 out.nllSumCarry = 0.0;
Option_t Option_t TPoint TPoint const char GetTextMagnitude GetFillStyle GetLineColor GetLineWidth GetMarkerStyle GetTextAlign GetTextColor GetTextSize void input
These classes encapsulate the necessary data for the computations.
This class implements the interface to execute the same task multiple times, sequentially or in paral...
unsigned GetPoolSize() const
Return the number of pooled workers.
The Kahan summation is a compensated summation algorithm, which significantly reduces numerical error...
static KahanSum< T, N > Accumulate(Iterator begin, Iterator end, T initialValue=T{})
Iterate over a range and return an instance of a KahanSum.
void Add(T x)
Single-element accumulation. Will not vectorise.
Minimal configuration struct to steer the evaluation of a single node with the RooBatchCompute librar...
This class overrides some RooBatchComputeInterface functions, for the purpose of providing a cuda spe...
std::string architectureName() const override
void compute(Config const &, Computer computer, RestrictArr output, size_t nEvents, VarSpan vars, ArgSpan extraArgs) override
ReduceNLLOutput reduceNLL(RooBatchCompute::Config const &cfg, std::span< const double > probas, std::span< const double > weights, std::span< const double > offsetProbas) override
double reduceSum(Config const &, InputArr input, size_t n) override
const std::vector< void(*)(Batches &)> _computeFunctions
double reduceSum(RooBatchCompute::Config const &cfg, InputArr input, size_t n) override
Return the sum of an input array.
void compute(RooBatchCompute::Config const &cfg, Computer computer, RestrictArr output, size_t nEvents, VarSpan vars, ArgSpan extraArgs) override
Compute multiple values using cuda kernels.
Architecture architecture() const override
ReduceNLLOutput reduceNLL(Config const &, std::span< const double > probas, std::span< const double > weights, std::span< const double > offsetProbas) override
void(off) SmallVectorTemplateBase< T
Bool_t IsImplicitMTEnabled()
Returns true if the implicit multi-threading in ROOT is enabled.
std::vector< void(*)(Batches &)> getFunctions()
Returns a std::vector of pointers to the compute functions in this file.
static RooBatchComputeClass computeObj
Static object to trigger the constructor which overwrites the dispatch pointer.
Namespace for dispatching RooFit computations to various backends.
std::span< double > ArgSpan
R__EXTERN RooBatchComputeInterface * dispatchCPU
This dispatch pointer points to an implementation of the compute library, provided one has been loade...
constexpr std::size_t bufferSize
const double *__restrict InputArr
std::span< const std::span< const double > > VarSpan
double *__restrict RestrictArr
void probas(TString dataset, TString fin="TMVA.root", Bool_t useTMVAStyle=kTRUE)
__roodevice__ static __roohost__ double packFloatIntoNaN(float payload)
Pack float into mantissa of a NaN.
static float unpackNaN(double val)
If val is NaN and a this NaN has been tagged as containing a payload, unpack the float from the manti...