Logo ROOT  
Reference Guide
 
Loading...
Searching...
No Matches
RooBatchCompute.cxx
Go to the documentation of this file.
1/*
2 * Project: RooFit
3 * Authors:
4 * Emmanouil Michalainas, CERN, September 2020
5 *
6 * Copyright (c) 2021, CERN
7 *
8 * Redistribution and use in source and binary forms,
9 * with or without modification, are permitted according to the terms
10 * listed in LICENSE (http://roofit.sourceforge.net/license.txt)
11 */
12
13/**
14\file RooBatchCompute.cxx
15\class RbcClass
16\ingroup Roobatchcompute
17
18This file contains the code for cpu computations using the RooBatchCompute library.
19**/
20
21#include "RooBatchCompute.h"
22#include "RooNaNPacker.h"
23#include "RooVDTHeaders.h"
24#include "Batches.h"
25
26#include <ROOT/RConfig.hxx>
27#include <ROOT/TExecutor.hxx>
28
29#include <Math/Util.h>
30
31#include <algorithm>
32#include <sstream>
33#include <stdexcept>
34
35#ifndef RF_ARCH
36#error "RF_ARCH should always be defined"
37#endif
38
39namespace RooBatchCompute {
40namespace RF_ARCH {
41
42std::vector<void (*)(BatchesHandle)> getFunctions();
43
44/// This class overrides some RooBatchComputeInterface functions, for the
45/// purpose of providing a CPU specific implementation of the library.
47private:
48 const std::vector<void (*)(BatchesHandle)> _computeFunctions;
49
50public:
52 {
53 // Set the dispatch pointer to this instance of the library upon loading
54 dispatchCPU = this;
55 }
56
57 Architecture architecture() const override { return Architecture::RF_ARCH; };
58 std::string architectureName() const override
59 {
60 // transform to lower case to match the original architecture name passed to the compiler
61#ifdef _QUOTEVAL_ // to quote the value of the preprocessor macro instead of the name
62#error "It's unexpected that _QUOTEVAL_ is defined at this point!"
63#endif
64#define _QUOTEVAL_(x) _QUOTE_(x)
65 std::string out = _QUOTEVAL_(RF_ARCH);
66#undef _QUOTEVAL_
67 std::transform(out.begin(), out.end(), out.begin(), [](unsigned char c) { return std::tolower(c); });
68 ;
69 return out;
70 };
71
72 /** Compute multiple values using optimized functions.
73 This method creates a Batches object and passes it to the correct compute function.
74 In case Implicit Multithreading is enabled, the events to be processed are equally
75 divided among the tasks to be generated and computed in parallel.
76 \param computer An enum specifying the compute function to be used.
77 \param output The array where the computation results are stored.
78 \param nEvents The number of events to be processed.
79 \param vars A std::vector containing pointers to the variables involved in the computation.
80 \param extraArgs An optional std::vector containing extra double values that may participate in the computation. **/
81 void compute(cudaStream_t *, Computer computer, RestrictArr output, size_t nEvents, const VarVector &vars,
82 ArgVector &extraArgs) override
83 {
84 static std::vector<double> buffer;
85 buffer.resize(vars.size() * bufferSize);
86
89 std::size_t nThreads = ex.GetPoolSize();
90
91 std::size_t nEventsPerThread = nEvents / nThreads + (nEvents % nThreads > 0);
92
93 // Reset the number of threads to the number we actually need given nEventsPerThread
94 nThreads = nEvents / nEventsPerThread + (nEvents % nEventsPerThread > 0);
95
96 auto task = [&](std::size_t idx) -> int {
97 // Fill a std::vector<Batches> with the same object and with ~nEvents/nThreads
98 // Then advance every object but the first to split the work between threads
99 Batches batches(output, nEventsPerThread, vars, extraArgs, buffer.data());
100 batches.advance(batches.getNEvents() * idx);
101
102 // Set the number of events of the last Batches object as the remaining events
103 if (idx == nThreads - 1) {
104 batches.setNEvents(nEvents - idx * batches.getNEvents());
105 }
106
107 std::size_t events = batches.getNEvents();
108 batches.setNEvents(bufferSize);
109 while (events > bufferSize) {
110 _computeFunctions[computer](batches);
111 batches.advance(bufferSize);
112 events -= bufferSize;
113 }
114 batches.setNEvents(events);
115 _computeFunctions[computer](batches);
116 return 0;
117 };
118
119 std::vector<std::size_t> indices(nThreads);
120 for (unsigned int i = 1; i < nThreads; i++) {
121 indices[i] = i;
122 }
123 ex.Map(task, indices);
124 } else {
125 // Fill a std::vector<Batches> with the same object and with ~nEvents/nThreads
126 // Then advance every object but the first to split the work between threads
127 Batches batches(output, nEvents, vars, extraArgs, buffer.data());
128
129 std::size_t events = batches.getNEvents();
130 batches.setNEvents(bufferSize);
131 while (events > bufferSize) {
132 _computeFunctions[computer](batches);
133 batches.advance(bufferSize);
134 events -= bufferSize;
135 }
136 batches.setNEvents(events);
137 _computeFunctions[computer](batches);
138 }
139 }
140 /// Return the sum of an input array
141 double reduceSum(cudaStream_t *, InputArr input, size_t n) override;
142 ReduceNLLOutput reduceNLL(cudaStream_t *, RooSpan<const double> probas, RooSpan<const double> weightSpan,
143 RooSpan<const double> weights, double weightSum,
144 RooSpan<const double> binVolumes) override;
145}; // End class RooBatchComputeClass
146
147namespace {
148
149inline std::pair<double, double> getLog(double prob, ReduceNLLOutput &out)
150{
151 if (std::abs(prob) > 1e6) {
152 out.nLargeValues++;
153 }
154
155 if (prob <= 0.0) {
156 out.nNonPositiveValues++;
157 return {std::log(prob), -prob};
158 }
159
160 if (std::isnan(prob)) {
161 out.nNaNValues++;
162 return {prob, RooNaNPacker::unpackNaN(prob)};
163 }
164
165 return {std::log(prob), 0.0};
166}
167
168} // namespace
169
170double RooBatchComputeClass::reduceSum(cudaStream_t *, InputArr input, size_t n)
171{
173}
174
177 double weightSum, RooSpan<const double> binVolumes)
178{
179 ReduceNLLOutput out;
180
181 double badness = 0.0;
182
183 for (std::size_t i = 0; i < probas.size(); ++i) {
184
185 const double eventWeight = weightSpan.size() > 1 ? weightSpan[i] : weightSpan[0];
186
187 if (0. == eventWeight)
188 continue;
189
190 std::pair<double, double> logOut = getLog(probas[i], out);
191 double term = logOut.first;
192 badness += logOut.second;
193
194 if (!binVolumes.empty()) {
195 term -= std::log(weights[i]) - std::log(binVolumes[i]) - std::log(weightSum);
196 }
197
198 term *= -eventWeight;
199
200 out.nllSum.Add(term);
201 }
202
203 if (badness != 0.) {
204 // Some events with evaluation errors. Return "badness" of errors.
206 }
207
208 return out;
209}
210
211/// Static object to trigger the constructor which overwrites the dispatch pointer.
213
214/** Construct a Batches object
215\param output The array where the computation results are stored.
216\param nEvents The number of events to be processed.
217\param vars A std::vector containing pointers to the variables involved in the computation.
218\param extraArgs An optional std::vector containing extra double values that may participate in the computation.
219\param buffer A 2D array that is used as a buffer for scalar variables.
220For every scalar parameter a buffer (one row of the buffer) is filled with copies of the scalar
221value, so that it behaves as a batch and facilitates auto-vectorization. The Batches object can be
222passed by value to a compute function to perform efficient computations. **/
223Batches::Batches(RestrictArr output, size_t nEvents, const VarVector &vars, ArgVector &extraArgs, double *buffer)
224 : _extraArgs{extraArgs.data()},
225 _nEvents(nEvents),
226 _nBatches(vars.size()),
227 _nExtraArgs(extraArgs.size()),
228 _output(output)
229{
230 _arrays.resize(vars.size());
231 for (size_t i = 0; i < vars.size(); i++) {
232 const RooSpan<const double> &span = vars[i];
233 if (span.empty()) {
234 std::stringstream ss;
235 ss << "The span number " << i << " passed to Batches::Batches() is empty!";
236 throw std::runtime_error(ss.str());
237 } else if (span.size() > 1)
238 _arrays[i].set(span.data()[0], span.data(), true);
239 else {
240 std::fill_n(&buffer[i * bufferSize], bufferSize, span.data()[0]);
241 _arrays[i].set(span.data()[0], &buffer[i * bufferSize], false);
242 }
243 }
244}
245
246} // End namespace RF_ARCH
247} // End namespace RooBatchCompute
#define c(i)
Definition RSha256.hxx:101
#define _QUOTEVAL_(x)
size_t size(const MatrixT &matrix)
retrieve the size of a square matrix
Option_t Option_t TPoint TPoint const char GetTextMagnitude GetFillStyle GetLineColor GetLineWidth GetMarkerStyle GetTextAlign GetTextColor GetTextSize void data
Option_t Option_t TPoint TPoint const char GetTextMagnitude GetFillStyle GetLineColor GetLineWidth GetMarkerStyle GetTextAlign GetTextColor GetTextSize void input
This class implements the interface to execute the same task multiple times, sequentially or in paral...
Definition TExecutor.hxx:38
unsigned GetPoolSize() const
Return the number of pooled workers.
The Kahan summation is a compensated summation algorithm, which significantly reduces numerical error...
Definition Util.h:122
static KahanSum< T, N > Accumulate(Iterator begin, Iterator end, T initialValue=T{})
Iterate over a range and return an instance of a KahanSum.
Definition Util.h:211
void Add(T x)
Single-element accumulation. Will not vectorise.
Definition Util.h:165
Batches(RestrictArr output, std::size_t nEvents, const VarVector &vars, ArgVector &extraArgs, double *buffer=nullptr)
__roodevice__ std::size_t getNEvents() const
Definition Batches.h:99
void advance(std::size_t nEvents)
Definition Batches.h:105
void setNEvents(std::size_t n)
Definition Batches.h:104
This class overrides some RooBatchComputeInterface functions, for the purpose of providing a CPU spec...
ReduceNLLOutput reduceNLL(cudaStream_t *, RooSpan< const double > probas, RooSpan< const double > weightSpan, RooSpan< const double > weights, double weightSum, RooSpan< const double > binVolumes) override
double reduceSum(cudaStream_t *, InputArr input, size_t n) override
Return the sum of an input array.
void compute(cudaStream_t *, Computer computer, RestrictArr output, size_t nEvents, const VarVector &vars, ArgVector &extraArgs) override
Compute multiple values using optimized functions.
const std::vector< void(*)(BatchesHandle)> _computeFunctions
The interface which should be implemented to provide optimised computation functions for implementati...
A simple container to hold a batch of data values.
Definition RooSpan.h:34
constexpr std::size_t size() const noexcept
Definition RooSpan.h:119
constexpr std::span< T >::pointer data() const
Definition RooSpan.h:102
constexpr bool empty() const noexcept
Definition RooSpan.h:124
const Int_t n
Definition legend1.C:16
Double_t ex[n]
Definition legend1.C:17
Bool_t IsImplicitMTEnabled()
Returns true if the implicit multi-threading in ROOT is enabled.
Definition TROOT.cxx:558
static RooBatchComputeClass computeObj
Static object to trigger the constructor which overwrites the dispatch pointer.
std::vector< void(*)(BatchesHandle)> getFunctions()
Returns a std::vector of pointers to the compute functions in this file.
Namespace for dispatching RooFit computations to various backends.
std::vector< RooSpan< const double > > VarVector
R__EXTERN RooBatchComputeInterface * dispatchCPU
This dispatch pointer points to an implementation of the compute library, provided one has been loade...
constexpr std::size_t bufferSize
Definition Batches.h:38
const double *__restrict InputArr
std::vector< double > ArgVector
double *__restrict RestrictArr
ROOT::Math::KahanSum< double > nllSum
__roodevice__ static __roohost__ double packFloatIntoNaN(float payload)
Pack float into mantissa of a NaN.
static float unpackNaN(double val)
If val is NaN and a this NaN has been tagged as containing a payload, unpack the float from the manti...
static void output()