12#ifndef ROOT_TThreadExecutor
13#define ROOT_TThreadExecutor
15#include "RConfigure.h"
20# if !defined(__ROOTCLING__) && !defined(G__DICTIONARY)
21# error "Cannot use ROOT::TThreadExecutor without defining R__USE_IMT."
32#include <initializer_list>
54 void Foreach(
F func,
unsigned nTimes,
unsigned nChunks = 0);
55 template<
class F,
class INTEGER>
57 template<
class F,
class T>
58 void Foreach(
F func, std::initializer_list<T> args,
unsigned nChunks = 0);
59 template<
class F,
class T>
60 void Foreach(
F func, std::vector<T> &args,
unsigned nChunks = 0);
61 template<
class F,
class T>
62 void Foreach(
F func,
const std::vector<T> &args,
unsigned nChunks = 0);
69 template <
class F,
class R,
class Cond = val
idMapReturnCond<F>>
70 auto Map(
F func,
unsigned nTimes,
R redfunc,
unsigned nChunks) -> std::vector<InvokeResult_t<F>>;
71 template <
class F,
class INTEGER,
class R,
class Cond = val
idMapReturnCond<F, INTEGER>>
73 -> std::vector<InvokeResult_t<F, INTEGER>>;
74 template <
class F,
class T,
class R,
class Cond = val
idMapReturnCond<F, T>>
75 auto Map(
F func, std::initializer_list<T> args,
R redfunc,
unsigned nChunks) -> std::vector<InvokeResult_t<F, T>>;
76 template <
class F,
class T,
class R,
class Cond = val
idMapReturnCond<F, T>>
77 auto Map(
F func, std::vector<T> &args,
R redfunc,
unsigned nChunks) -> std::vector<InvokeResult_t<F, T>>;
78 template <
class F,
class T,
class R,
class Cond = val
idMapReturnCond<F, T>>
79 auto Map(
F func,
const std::vector<T> &args,
R redfunc,
unsigned nChunks) -> std::vector<InvokeResult_t<F, T>>;
90 template <
class F,
class R,
class Cond = val
idMapReturnCond<F>>
92 template <
class F,
class R,
class Cond = val
idMapReturnCond<F>>
94 template <
class F,
class INTEGER,
class R,
class Cond = val
idMapReturnCond<F, INTEGER>>
96 template <
class F,
class T,
class R,
class Cond = val
idMapReturnCond<F, T>>
98 template <
class F,
class T,
class R,
class Cond = val
idMapReturnCond<F, T>>
100 template <
class F,
class T,
class R,
class Cond = val
idMapReturnCond<F, T>>
102 template <
class F,
class T,
class R,
class Cond = val
idMapReturnCond<F, T>>
104 template <
class F,
class T,
class R,
class Cond = val
idMapReturnCond<F, T>>
108 template<
class T,
class R>
auto Reduce(
const std::vector<T> &objs,
R redfunc) ->
decltype(redfunc(objs));
109 template<
class T,
class BINARYOP>
auto Reduce(
const std::vector<T> &objs, BINARYOP redfunc) ->
decltype(redfunc(objs.front(), objs.front()));
116 template <
class F,
class Cond = val
idMapReturnCond<F>>
117 auto MapImpl(
F func,
unsigned nTimes) -> std::vector<InvokeResult_t<F>>;
118 template <
class F,
class INTEGER,
class Cond = val
idMapReturnCond<F, INTEGER>>
120 template <
class F,
class T,
class Cond = val
idMapReturnCond<F, T>>
121 auto MapImpl(
F func, std::vector<T> &args) -> std::vector<InvokeResult_t<F, T>>;
122 template <
class F,
class T,
class Cond = val
idMapReturnCond<F, T>>
123 auto MapImpl(
F func,
const std::vector<T> &args) -> std::vector<InvokeResult_t<F, T>>;
126 void ParallelFor(
unsigned start,
unsigned end,
unsigned step,
const std::function<
void(
unsigned int i)> &
f);
127 double ParallelReduce(
const std::vector<double> &objs,
const std::function<
double(
double a,
double b)> &redfunc);
128 float ParallelReduce(
const std::vector<float> &objs,
const std::function<
float(
float a,
float b)> &redfunc);
129 template<
class T,
class R>
130 auto SeqReduce(
const std::vector<T> &objs,
R redfunc) ->
decltype(redfunc(objs));
133 std::shared_ptr<ROOT::Internal::RTaskArenaWrapper>
fTaskArenaW =
nullptr;
147 ParallelFor(0U, nTimes, 1, [&](
unsigned int){func();});
151 unsigned step = (nTimes + nChunks - 1) / nChunks;
152 auto lambda = [&](
unsigned int i)
154 for (
unsigned j = 0; j < step && (i + j) < nTimes; j++) {
167 template<
class F,
class INTEGER>
174 unsigned end = *args.
end();
175 unsigned seqStep = args.
step();
176 unsigned step = (end -
start + nChunks - 1) / nChunks;
178 auto lambda = [&](
unsigned int i)
180 for (
unsigned j = 0; j < step && (i + j) < end; j+=seqStep) {
193 template<
class F,
class T>
195 std::vector<T> vargs(std::move(args));
205 template<
class F,
class T>
207 unsigned int nToProcess = args.size();
209 ParallelFor(0U, nToProcess, 1, [&](
unsigned int i){func(args[i]);});
213 unsigned step = (nToProcess + nChunks - 1) / nChunks;
214 auto lambda = [&](
unsigned int i)
216 for (
unsigned j = 0; j < step && (i + j) < nToProcess; j++) {
229 template<
class F,
class T>
231 unsigned int nToProcess = args.size();
233 ParallelFor(0U, nToProcess, 1, [&](
unsigned int i){func(args[i]);});
237 unsigned step = (nToProcess + nChunks - 1) / nChunks;
238 auto lambda = [&](
unsigned int i)
240 for (
unsigned j = 0; j < step && (i + j) < nToProcess; j++) {
252 template <
class F,
class Cond>
255 using retType =
decltype(func());
256 std::vector<retType> reslist(nTimes);
257 auto lambda = [&](
unsigned int i)
271 template <
class F,
class INTEGER,
class Cond>
274 using retType =
decltype(func(*args.begin()));
275 std::vector<retType> reslist(args.size());
276 auto lambda = [&](
unsigned int i) { reslist[i] = func(args[i]); };
287 template <
class F,
class R,
class Cond>
292 return Map(func, nTimes);
295 unsigned step = (nTimes + nChunks - 1) / nChunks;
297 unsigned actualChunks = (nTimes + step - 1) / step;
298 using retType =
decltype(func());
299 std::vector<retType> reslist(actualChunks);
300 auto lambda = [&](
unsigned int i)
302 std::vector<retType> partialResults(std::min(nTimes-i, step));
303 for (
unsigned j = 0; j < step && (i + j) < nTimes; j++) {
304 partialResults[j] = func();
306 reslist[i / step] =
Reduce(partialResults, redfunc);
318 template <
class F,
class T,
class Cond>
322 using retType =
decltype(func(args.front()));
324 unsigned int nToProcess = args.size();
325 std::vector<retType> reslist(nToProcess);
327 auto lambda = [&](
unsigned int i)
329 reslist[i] = func(args[i]);
342 template <
class F,
class T,
class Cond>
346 using retType =
decltype(func(args.front()));
348 unsigned int nToProcess = args.size();
349 std::vector<retType> reslist(nToProcess);
351 auto lambda = [&](
unsigned int i)
353 reslist[i] = func(args[i]);
366 template <
class F,
class INTEGER,
class R,
class Cond>
368 -> std::vector<InvokeResult_t<F, INTEGER>>
372 return Map(func, args);
375 unsigned nToProcess = args.size();
376 unsigned step = (nToProcess + nChunks - 1) / nChunks;
378 unsigned actualChunks = (nToProcess + step - 1) / step;
380 using retType =
decltype(func(*args.begin()));
381 std::vector<retType> reslist(actualChunks);
382 auto lambda = [&](
unsigned int i) {
383 std::vector<retType> partialResults(std::min(step, nToProcess - i));
384 for (
unsigned j = 0; j < partialResults.size(); j++) {
385 partialResults[j] = func(args[i + j]);
387 reslist[i / step] =
Reduce(partialResults, redfunc);
400 template <
class F,
class T,
class R,
class Cond>
402 -> std::vector<InvokeResult_t<F, T>>
406 return Map(func, args);
409 unsigned int nToProcess = args.size();
410 unsigned step = (nToProcess + nChunks - 1) / nChunks;
412 unsigned actualChunks = (nToProcess + step - 1) / step;
414 using retType =
decltype(func(args.front()));
415 std::vector<retType> reslist(actualChunks);
416 auto lambda = [&](
unsigned int i) {
417 std::vector<retType> partialResults(std::min(step, nToProcess - i));
418 for (
unsigned j = 0; j < partialResults.size(); j++) {
419 partialResults[j] = func(args[i + j]);
421 reslist[i / step] =
Reduce(partialResults, redfunc);
434 template <
class F,
class T,
class R,
class Cond>
436 -> std::vector<InvokeResult_t<F, T>>
440 return Map(func, args);
443 unsigned int nToProcess = args.size();
444 unsigned step = (nToProcess + nChunks - 1) / nChunks;
446 unsigned actualChunks = (nToProcess + step - 1) / step;
448 using retType =
decltype(func(args.front()));
449 std::vector<retType> reslist(actualChunks);
450 auto lambda = [&](
unsigned int i) {
451 std::vector<retType> partialResults(std::min(step, nToProcess - i));
452 for (
unsigned j = 0; j < partialResults.size(); j++) {
453 partialResults[j] = func(args[i + j]);
455 reslist[i / step] =
Reduce(partialResults, redfunc);
468 template <
class F,
class T,
class R,
class Cond>
470 -> std::vector<InvokeResult_t<F, T>>
472 std::vector<T> vargs(std::move(args));
473 const auto &reslist =
Map(func, vargs, redfunc, nChunks);
480 template <
class F,
class R,
class Cond>
483 return Reduce(
Map(func, nTimes), redfunc);
491 template <
class F,
class R,
class Cond>
494 return Reduce(
Map(func, nTimes, redfunc, nChunks), redfunc);
502 template <
class F,
class INTEGER,
class R,
class Cond>
506 return Reduce(
Map(func, args, redfunc, nChunks), redfunc);
514 template <
class F,
class T,
class R,
class Cond>
518 return Reduce(
Map(func, args, redfunc, nChunks), redfunc);
524 template <
class F,
class T,
class R,
class Cond>
533 template <
class F,
class T,
class R,
class Cond>
544 template <
class F,
class T,
class R,
class Cond>
547 return Reduce(
Map(func, args, redfunc, nChunks), redfunc);
555 template <
class F,
class T,
class R,
class Cond>
559 return Reduce(
Map(func, args, redfunc, nChunks), redfunc);
564 template<
class T,
class R>
568 static_assert(std::is_same<
decltype(redfunc(objs)), T>::value,
"redfunc does not have the correct signature");
579 template<
class T,
class BINARYOP>
583 static_assert(std::is_same<
decltype(redfunc(objs.front(), objs.front())), T>::value,
"redfunc does not have the correct signature");
593 template<
class T,
class R>
596 return redfunc(objs);
unsigned int UInt_t
Unsigned integer 4 bytes (unsigned int).
ROOT::TypeTraits::InvokeResult_t< F, Args... > InvokeResult_t
A pseudo container class which is a generator of indices.
auto SeqReduce(const std::vector< T > &objs, R redfunc) -> decltype(redfunc(objs))
"Reduce", sequentially, an std::vector into a single object
auto Map(F func, unsigned nTimes, R redfunc, unsigned nChunks) -> std::vector< InvokeResult_t< F > >
Execute a function nTimes in parallel, dividing the execution in nChunks and providing a result per c...
void ParallelFor(unsigned start, unsigned end, unsigned step, const std::function< void(unsigned int i)> &f)
Execute a function in parallel over the indices of a loop.
unsigned GetPoolSize() const
Returns the number of worker threads in the task arena.
auto MapReduce(F func, unsigned nTimes, R redfunc) -> InvokeResult_t< F >
Execute a function nTimes in parallel (Map) and accumulate the results into a single value (Reduce).
std::shared_ptr< ROOT::Internal::RTaskArenaWrapper > fTaskArenaW
Pointer to the TBB task arena wrapper.
auto Reduce(const std::vector< T > &objs, R redfunc) -> decltype(redfunc(objs))
void Foreach(F func, unsigned nTimes, unsigned nChunks=0)
Execute a function without arguments several times in parallel, dividing the execution in nChunks.
TThreadExecutor(UInt_t nThreads=0u)
Class constructor.
double ParallelReduce(const std::vector< double > &objs, const std::function< double(double a, double b)> &redfunc)
"Reduce" in parallel an std::vector<double> into a single double value
TThreadExecutor & operator=(const TThreadExecutor &)=delete
TThreadExecutor(const TThreadExecutor &)=delete
auto MapImpl(F func, unsigned nTimes) -> std::vector< InvokeResult_t< F > >
Execute a function without arguments several times in parallel.
namespace associated R package for ROOT.