28#ifdef ROOBATCHCOMPUTE_USE_IMT 
   44#error "RF_ARCH should always be defined" 
   63   for (std::size_t i = 0; i < vars.size(); i++) {
 
   64      arrays[i]._array = vars[i].data();
 
   65      arrays[i]._isVector = vars[i].empty() || vars[i].size() >= nEvents;
 
   71   for (std::size_t i = 0; i < 
batches.nBatches; i++) {
 
   73      arg._array += arg._isVector * nEvents;
 
   84class RooBatchComputeClass : 
public RooBatchComputeInterface {
 
   97#error "It's unexpected that _QUOTEVAL_ is defined at this point!" 
   99#define _QUOTEVAL_(x) _QUOTE_(x) 
  102      std::transform(out.begin(), out.end(), out.begin(), [](
unsigned char c) { return std::tolower(c); });
 
 
  119      throw std::bad_function_call();
 
 
  123      throw std::bad_function_call();
 
 
  128#ifdef ROOBATCHCOMPUTE_USE_IMT 
  132   const std::vector<void (*)(
Batches &)> _computeFunctions;
 
  135#ifdef ROOBATCHCOMPUTE_USE_IMT 
  138   std::size_t nEvents = 
output.size();
 
  150   auto task = [&](std::size_t idx) -> 
int {
 
  154      std::vector<Batch> 
arrays(vars.size());
 
  165      std::size_t events = 
batches.nEvents;
 
  167      while (events > bufferSize) {
 
  170         events -= bufferSize;
 
  177   std::vector<std::size_t> indices(
nThreads);
 
  178   for (
unsigned int i = 1; i < 
nThreads; i++) {
 
  193void RooBatchComputeClass::compute(Config 
const &, Computer 
computer, std::span<double> 
output, VarSpan vars,
 
  214#ifdef ROOBATCHCOMPUTE_USE_IMT 
  220   std::size_t nEvents = 
output.size();
 
  225   std::vector<Batch> 
arrays(vars.size());
 
  230   std::size_t events = 
batches.nEvents;
 
  232   while (events > bufferSize) {
 
  243inline std::pair<double, double> 
getLog(
double prob, ReduceNLLOutput &out)
 
  246      out.nNonPositiveValues++;
 
  250   if (std::isinf(
prob)) {
 
  251      out.nInfiniteValues++;
 
  254   if (std::isnan(
prob)) {
 
  259   return {std::log(
prob), 0.0};
 
  264double RooBatchComputeClass::reduceSum(Config 
const &, InputArr 
input, 
size_t n)
 
  269ReduceNLLOutput RooBatchComputeClass::reduceNLL(Config 
const &, std::span<const double> probas,
 
  270                                                std::span<const double> weights, std::span<const double> 
offsetProbas)
 
  278   for (std::size_t i = 0; i < weights.size(); ++i) {
 
  280      if (0. == weights[i])
 
  283      std::pair<double, double> out);
 
  302      out.nllSumCarry = 0.0;
 
  310class ScalarBufferContainer {
 
  312   ScalarBufferContainer() {}
 
  313   ScalarBufferContainer(std::size_t 
size)
 
  316         throw std::runtime_error(
"ScalarBufferContainer can only be of size 1");
 
  319   double const *hostReadPtr()
 const { 
return &
_val; }
 
  320   double const *deviceReadPtr()
 const { 
return &
_val; }
 
  322   double *hostWritePtr() { 
return &
_val; }
 
  323   double *deviceWritePtr() { 
return &
_val; }
 
  325   void assignFromHost(std::span<const double> 
input) { 
_val = 
input[0]; }
 
  326   void assignFromDevice(std::span<const double>) { 
throw std::bad_function_call(); }
 
  332class CPUBufferContainer {
 
  336   double const *hostReadPtr()
 const { 
return _vec.data(); }
 
  337   double const *deviceReadPtr()
 const 
  339      throw std::bad_function_call();
 
  343   double *hostWritePtr() { 
return _vec.data(); }
 
  344   double *deviceWritePtr()
 
  346      throw std::bad_function_call();
 
  350   void assignFromHost(std::span<const double> 
input) { 
_vec.assign(
input.begin(), 
input.end()); }
 
  351   void assignFromDevice(std::span<const double>) { 
throw std::bad_function_call(); }
 
  357template <
class Container>
 
  358class BufferImpl : 
public AbsBuffer {
 
  360   using Queue = std::queue<std::unique_ptr<Container>>;
 
  362   BufferImpl(std::size_t 
size, Queue &queue) : 
_queue{queue}
 
  365         _vec = std::make_unique<Container>(
size);
 
  374   double const *hostReadPtr()
 const override { 
return _vec->hostReadPtr(); }
 
  375   double const *deviceReadPtr()
 const override { 
return _vec->deviceReadPtr(); }
 
  377   double *hostWritePtr()
 override { 
return _vec->hostWritePtr(); }
 
  378   double *deviceWritePtr()
 override { 
return _vec->deviceWritePtr(); }
 
  380   void assignFromHost(std::span<const double> 
input)
 override { 
_vec->assignFromHost(
input); }
 
  381   void assignFromDevice(std::span<const double> 
input)
 override { 
_vec->assignFromDevice(
input); }
 
  386   std::unique_ptr<Container> 
_vec;
 
  393struct BufferQueuesMaps {
 
  398class BufferManager : 
public AbsBufferManager {
 
  401   BufferManager() : 
_queuesMaps{std::make_unique<BufferQueuesMaps>()} {}
 
  403   std::unique_ptr<AbsBuffer> makeScalarBuffer()
 override 
  405      return std::make_unique<ScalarBuffer>(1, 
_queuesMaps->scalarBufferQueuesMap[1]);
 
  407   std::unique_ptr<AbsBuffer> makeCpuBuffer(std::size_t 
size)
 override 
  411   std::unique_ptr<AbsBuffer> makeGpuBuffer(std::size_t)
 override { 
throw std::bad_function_call(); }
 
  412   std::unique_ptr<AbsBuffer> makePinnedBuffer(std::size_t, CudaInterface::CudaStream * = 
nullptr)
 override 
  414      throw std::bad_function_call();
 
  423std::unique_ptr<AbsBufferManager> RooBatchComputeClass::createBufferManager()
 const 
  425   return std::make_unique<BufferManager>();
 
std::vector< double > _vec
 
std::map< std::size_t, CPUBuffer::Queue > cpuBufferQueuesMap
 
std::map< std::size_t, ScalarBuffer::Queue > scalarBufferQueuesMap
 
std::unique_ptr< BufferQueuesMaps > _queuesMaps
 
size_t size(const MatrixT &matrix)
retrieve the size of a square matrix
 
ROOT::Detail::TRangeCast< T, true > TRangeDynCast
TRangeDynCast is an adapter class that allows the typed iteration through a TCollection.
 
Option_t Option_t TPoint TPoint const char GetTextMagnitude GetFillStyle GetLineColor GetLineWidth GetMarkerStyle GetTextAlign GetTextColor GetTextSize void input
 
These classes encapsulate the necessary data for the computations.
 
This class implements the interface to execute the same task multiple times, sequentially or in paral...
 
The Kahan summation is a compensated summation algorithm, which significantly reduces numerical error...
 
static KahanSum< T, N > Accumulate(Iterator begin, Iterator end, T initialValue=T{})
Iterate over a range and return an instance of a KahanSum.
 
void Add(T x)
Single-element accumulation. Will not vectorise.
 
This class overrides some RooBatchComputeInterface functions, for the purpose of providing a cuda spe...
 
double reduceSum(Config const &, InputArr input, size_t n) override
 
void deleteCudaStream(CudaInterface::CudaStream *) const override
 
CudaInterface::CudaStream * newCudaStream() const override
 
std::unique_ptr< AbsBufferManager > createBufferManager() const override
 
CudaInterface::CudaEvent * newCudaEvent(bool) const override
 
bool cudaStreamIsActive(CudaInterface::CudaStream *) const override
 
ReduceNLLOutput reduceNLL(Config const &, std::span< const double > probas, std::span< const double > weights, std::span< const double > offsetProbas) override
 
void cudaStreamWaitForEvent(CudaInterface::CudaStream *, CudaInterface::CudaEvent *) const override
 
std::string architectureName() const override
 
void cudaEventRecord(CudaInterface::CudaEvent *, CudaInterface::CudaStream *) const override
 
void compute(Config const &, Computer computer, std::span< double > output, VarSpan vars, ArgSpan extraArgs) override
 
void deleteCudaEvent(CudaInterface::CudaEvent *) const override
 
Architecture architecture() const override
 
Minimal configuration struct to steer the evaluation of a single node with the RooBatchCompute librar...
 
Bool_t IsImplicitMTEnabled()
Returns true if the implicit multi-threading in ROOT is enabled.
 
std::vector< void(*)(Batches &)> getFunctions()
Returns a std::vector of pointers to the compute functions in this file.
 
static RooBatchComputeClass computeObj
Static object to trigger the constructor which overwrites the dispatch pointer.
 
Namespace for dispatching RooFit computations to various backends.
 
std::span< double > ArgSpan
 
R__EXTERN RooBatchComputeInterface * dispatchCPU
This dispatch pointer points to an implementation of the compute library, provided one has been loade...
 
constexpr std::size_t bufferSize
 
const double *__restrict InputArr
 
std::span< const std::span< const double > > VarSpan
 
void probas(TString dataset, TString fin="TMVA.root", Bool_t useTMVAStyle=kTRUE)
 
static double packFloatIntoNaN(float payload)
Pack float into mantissa of a NaN.
 
static float unpackNaN(double val)
If val is NaN and a this NaN has been tagged as containing a payload, unpack the float from the manti...