Logo ROOT  
Reference Guide
 
Loading...
Searching...
No Matches
RooBatchCompute.cxx
Go to the documentation of this file.
1/*
2 * Project: RooFit
3 * Authors:
4 * Emmanouil Michalainas, CERN, September 2020
5 *
6 * Copyright (c) 2021, CERN
7 *
8 * Redistribution and use in source and binary forms,
9 * with or without modification, are permitted according to the terms
10 * listed in LICENSE (http://roofit.sourceforge.net/license.txt)
11 */
12
13/**
14\file RooBatchCompute.cxx
15\class RbcClass
16\ingroup Roobatchcompute
17
18This file contains the code for cpu computations using the RooBatchCompute library.
19**/
20
21#include "RooBatchCompute.h"
22#include "RooVDTHeaders.h"
23#include "Batches.h"
24
25#include "ROOT/RConfig.h"
26#include "ROOT/TExecutor.hxx"
27
28#include <algorithm>
29#include <sstream>
30#include <stdexcept>
31
32#ifndef RF_ARCH
33#error "RF_ARCH should always be defined"
34#endif
35
36namespace RooBatchCompute {
37namespace RF_ARCH {
38
39std::vector<void (*)(BatchesHandle)> getFunctions();
40
41/// This class overrides some RooBatchComputeInterface functions, for the
42/// purpose of providing a CPU specific implementation of the library.
44private:
45 const std::vector<void (*)(BatchesHandle)> _computeFunctions;
46
47public:
49 {
50 // Set the dispatch pointer to this instance of the library upon loading
51 dispatchCPU = this;
52 }
53
54 Architecture architecture() const override { return Architecture::RF_ARCH; };
55 std::string architectureName() const override
56 {
57 // transform to lower case to match the original architecture name passed to the compiler
58#ifdef _QUOTEVAL_ // to quote the value of the preprocessor macro instead of the name
59#error "It's unexpected that _QUOTEVAL_ is defined at this point!"
60#endif
61#define _QUOTEVAL_(x) _QUOTE_(x)
62 std::string out = _QUOTEVAL_(RF_ARCH);
63#undef _QUOTEVAL_
64 std::transform(out.begin(), out.end(), out.begin(), [](unsigned char c) { return std::tolower(c); });
65 ;
66 return out;
67 };
68
69 /** Compute multiple values using optimized functions.
70 This method creates a Batches object and passes it to the correct compute function.
71 In case Implicit Multithreading is enabled, the events to be processed are equally
72 divided among the tasks to be generated and computed in parallel.
73 \param computer An enum specifying the compute function to be used.
74 \param output The array where the computation results are stored.
75 \param nEvents The number of events to be processed.
76 \param vars A std::vector containing pointers to the variables involved in the computation.
77 \param extraArgs An optional std::vector containing extra double values that may participate in the computation. **/
78 void compute(cudaStream_t *, Computer computer, RestrictArr output, size_t nEvents, const VarVector &vars,
79 const ArgVector &extraArgs) override
80 {
81 static std::vector<double> buffer;
82 buffer.resize(vars.size() * bufferSize);
83
86 std::size_t nThreads = ex.GetPoolSize();
87
88 std::size_t nEventsPerThread = nEvents / nThreads + (nEvents % nThreads > 0);
89
90 // Reset the number of threads to the number we actually need given nEventsPerThread
91 nThreads = nEvents / nEventsPerThread + (nEvents % nEventsPerThread > 0);
92
93 auto task = [&](std::size_t idx) -> int {
94
95 // Fill a std::vector<Batches> with the same object and with ~nEvents/nThreads
96 // Then advance every object but the first to split the work between threads
97 Batches batches(output, nEventsPerThread, vars, extraArgs, buffer.data());
98 batches.advance(batches.getNEvents() * idx);
99
100 // Set the number of events of the last Batches object as the remaining events
101 if (idx == nThreads - 1) {
102 batches.setNEvents(nEvents - idx * batches.getNEvents());
103 }
104
105 int events = batches.getNEvents();
106 batches.setNEvents(bufferSize);
107 while (events > bufferSize) {
108 _computeFunctions[computer](batches);
109 batches.advance(bufferSize);
110 events -= bufferSize;
111 }
112 batches.setNEvents(events);
113 _computeFunctions[computer](batches);
114 return 0;
115 };
116
117 std::vector<std::size_t> indices(nThreads);
118 for (unsigned int i = 1; i < nThreads; i++) {
119 indices[i] = i;
120 }
121 ex.Map(task, indices);
122 } else {
123 // Fill a std::vector<Batches> with the same object and with ~nEvents/nThreads
124 // Then advance every object but the first to split the work between threads
125 Batches batches(output, nEvents, vars, extraArgs, buffer.data());
126
127 int events = batches.getNEvents();
128 batches.setNEvents(bufferSize);
129 while (events > bufferSize) {
130 _computeFunctions[computer](batches);
131 batches.advance(bufferSize);
132 events -= bufferSize;
133 }
134 batches.setNEvents(events);
135 _computeFunctions[computer](batches);
136 }
137 }
138 /// Return the sum of an input array
139 double sumReduce(cudaStream_t *, InputArr input, size_t n) override
140 {
141 long double sum = 0.0;
142 for (size_t i = 0; i < n; i++)
143 sum += input[i];
144 return sum;
145 }
146}; // End class RooBatchComputeClass
147
148/// Static object to trigger the constructor which overwrites the dispatch pointer.
150
151/** Construct a Batches object
152\param output The array where the computation results are stored.
153\param nEvents The number of events to be processed.
154\param vars A std::vector containing pointers to the variables involved in the computation.
155\param extraArgs An optional std::vector containing extra double values that may participate in the computation.
156\param buffer A 2D array that is used as a buffer for scalar variables.
157For every scalar parameter a buffer (one row of the buffer) is filled with copies of the scalar
158value, so that it behaves as a batch and facilitates auto-vectorization. The Batches object can be
159passed by value to a compute function to perform efficient computations. **/
160Batches::Batches(RestrictArr output, size_t nEvents, const VarVector &vars, const ArgVector &extraArgs, double *buffer)
161 : _nEvents(nEvents), _nBatches(vars.size()), _nExtraArgs(extraArgs.size()), _output(output)
162{
163 _arrays.resize(vars.size());
164 for (size_t i = 0; i < vars.size(); i++) {
165 const RooSpan<const double> &span = vars[i];
166 if (span.empty()) {
167 std::stringstream ss;
168 ss << "The span number " << i << " passed to Batches::Batches() is empty!";
169 throw std::runtime_error(ss.str());
170 } else if (span.size() > 1)
171 _arrays[i].set(span.data()[0], span.data(), true);
172 else {
173 std::fill_n(&buffer[i * bufferSize], bufferSize, span.data()[0]);
174 _arrays[i].set(span.data()[0], &buffer[i * bufferSize], false);
175 }
176 }
177 _extraArgs = extraArgs;
178}
179
180} // End namespace RF_ARCH
181} // End namespace RooBatchCompute
typedef void(GLAPIENTRYP _GLUfuncptr)(void)
#define c(i)
Definition RSha256.hxx:101
#define _QUOTEVAL_(x)
size_t size(const MatrixT &matrix)
retrieve the size of a square matrix
This class implements the interface to execute the same task multiple times, sequentially or in paral...
Definition TExecutor.hxx:37
unsigned GetPoolSize() const
Return the number of pooled workers.
void advance(size_t nEvents)
Definition Batches.h:105
std::vector< Batch > _arrays
Definition Batches.h:76
std::vector< double > _extraArgs
Definition Batches.h:77
__roodevice__ size_t getNEvents() const
Definition Batches.h:99
Batches(RestrictArr output, size_t nEvents, const VarVector &vars, const ArgVector &extraArgs={}, double *buffer=nullptr)
Construct a Batches object.
This class overrides some RooBatchComputeInterface functions, for the purpose of providing a CPU spec...
void compute(cudaStream_t *, Computer computer, RestrictArr output, size_t nEvents, const VarVector &vars, const ArgVector &extraArgs) override
Compute multiple values using optimized functions.
double sumReduce(cudaStream_t *, InputArr input, size_t n) override
Return the sum of an input array.
const std::vector< void(*)(BatchesHandle)> _computeFunctions
The interface which should be implemented to provide optimised computation functions for implementati...
A simple container to hold a batch of data values.
Definition RooSpan.h:34
constexpr std::span< T >::pointer data() const
Definition RooSpan.h:106
constexpr std::span< T >::index_type size() const noexcept
Definition RooSpan.h:121
constexpr bool empty() const noexcept
Definition RooSpan.h:125
const Int_t n
Definition legend1.C:16
Double_t ex[n]
Definition legend1.C:17
Bool_t IsImplicitMTEnabled()
Returns true if the implicit multi-threading in ROOT is enabled.
Definition TROOT.cxx:558
static RooBatchComputeClass computeObj
Static object to trigger the constructor which overwrites the dispatch pointer.
std::vector< void(*)(BatchesHandle)> getFunctions()
Returns a std::vector of pointers to the compute functions in this file.
Namespace for dispatching RooFit computations to various backends.
std::vector< RooSpan< const double > > VarVector
R__EXTERN RooBatchComputeInterface * dispatchCPU
This dispatch pointer points to an implementation of the compute library, provided one has been loade...
const double *__restrict InputArr
constexpr uint16_t bufferSize
Definition Batches.h:38
std::vector< double > ArgVector
double *__restrict RestrictArr
static uint64_t sum(uint64_t i)
Definition Factory.cxx:2345
static void output(int code)
Definition gifencode.c:226