12#ifndef ROOT_TThreadExecutor
13#define ROOT_TThreadExecutor
15#include "RConfigure.h"
20# if !defined(__ROOTCLING__) && !defined(G__DICTIONARY)
21# error "Cannot use ROOT::TThreadExecutor without defining R__USE_IMT."
32#include <initializer_list>
54 void Foreach(
F func,
unsigned nTimes,
unsigned nChunks = 0);
55 template<
class F,
class INTEGER>
57 template<
class F,
class T>
58 void Foreach(
F func, std::initializer_list<T> args,
unsigned nChunks = 0);
59 template<
class F,
class T>
60 void Foreach(
F func, std::vector<T> &args,
unsigned nChunks = 0);
61 template<
class F,
class T>
62 void Foreach(
F func,
const std::vector<T> &args,
unsigned nChunks = 0);
77 template <
class F,
class R,
class Cond = val
idMapReturnCond<F>>
79 template <
class F,
class R,
class Cond = val
idMapReturnCond<F>>
81 template <
class F,
class INTEGER,
class R,
class Cond = val
idMapReturnCond<F, INTEGER>>
83 template <
class F,
class T,
class R,
class Cond = val
idMapReturnCond<F, T>>
85 template <
class F,
class T,
class R,
class Cond = val
idMapReturnCond<F, T>>
87 template <
class F,
class T,
class R,
class Cond = val
idMapReturnCond<F, T>>
89 template <
class F,
class T,
class R,
class Cond = val
idMapReturnCond<F, T>>
91 template <
class F,
class T,
class R,
class Cond = val
idMapReturnCond<F, T>>
95 template<
class T,
class R>
auto Reduce(
const std::vector<T> &objs,
R redfunc) ->
decltype(redfunc(objs));
96 template<
class T,
class BINARYOP>
auto Reduce(
const std::vector<T> &objs, BINARYOP redfunc) ->
decltype(redfunc(objs.front(), objs.front()));
103 template <
class F,
class Cond = val
idMapReturnCond<F>>
104 auto MapImpl(
F func,
unsigned nTimes) -> std::vector<InvokeResult_t<F>>;
105 template <
class F,
class INTEGER,
class Cond = val
idMapReturnCond<F, INTEGER>>
107 template <
class F,
class T,
class Cond = val
idMapReturnCond<F, T>>
108 auto MapImpl(
F func, std::vector<T> &args) -> std::vector<InvokeResult_t<F, T>>;
109 template <
class F,
class T,
class Cond = val
idMapReturnCond<F, T>>
110 auto MapImpl(
F func,
const std::vector<T> &args) -> std::vector<InvokeResult_t<F, T>>;
114 template <
class F,
class R,
class Cond = val
idMapReturnCond<F>>
115 auto Map(
F func,
unsigned nTimes,
R redfunc,
unsigned nChunks) -> std::vector<InvokeResult_t<F>>;
116 template <
class F,
class INTEGER,
class R,
class Cond = val
idMapReturnCond<F, INTEGER>>
118 -> std::vector<InvokeResult_t<F, INTEGER>>;
119 template <
class F,
class T,
class R,
class Cond = val
idMapReturnCond<F, T>>
120 auto Map(
F func, std::initializer_list<T> args,
R redfunc,
unsigned nChunks) -> std::vector<InvokeResult_t<F, T>>;
121 template <
class F,
class T,
class R,
class Cond = val
idMapReturnCond<F, T>>
122 auto Map(
F func, std::vector<T> &args,
R redfunc,
unsigned nChunks) -> std::vector<InvokeResult_t<F, T>>;
123 template <
class F,
class T,
class R,
class Cond = val
idMapReturnCond<F, T>>
124 auto Map(
F func,
const std::vector<T> &args,
R redfunc,
unsigned nChunks) -> std::vector<InvokeResult_t<F, T>>;
127 void ParallelFor(
unsigned start,
unsigned end,
unsigned step,
const std::function<
void(
unsigned int i)> &
f);
128 double ParallelReduce(
const std::vector<double> &objs,
const std::function<
double(
double a,
double b)> &redfunc);
129 float ParallelReduce(
const std::vector<float> &objs,
const std::function<
float(
float a,
float b)> &redfunc);
130 template<
class T,
class R>
131 auto SeqReduce(
const std::vector<T> &objs,
R redfunc) ->
decltype(redfunc(objs));
134 std::shared_ptr<ROOT::Internal::RTaskArenaWrapper>
fTaskArenaW =
nullptr;
148 ParallelFor(0U, nTimes, 1, [&](
unsigned int){func();});
152 unsigned step = (nTimes + nChunks - 1) / nChunks;
153 auto lambda = [&](
unsigned int i)
155 for (
unsigned j = 0; j < step && (
i + j) < nTimes; j++) {
168 template<
class F,
class INTEGER>
174 unsigned start = *args.
begin();
175 unsigned end = *args.
end();
176 unsigned seqStep = args.
step();
177 unsigned step = (end - start + nChunks - 1) / nChunks;
179 auto lambda = [&](
unsigned int i)
181 for (
unsigned j = 0; j < step && (
i + j) < end; j+=seqStep) {
194 template<
class F,
class T>
196 std::vector<T> vargs(std::move(args));
206 template<
class F,
class T>
208 unsigned int nToProcess = args.size();
210 ParallelFor(0U, nToProcess, 1, [&](
unsigned int i){func(args[
i]);});
214 unsigned step = (nToProcess + nChunks - 1) / nChunks;
215 auto lambda = [&](
unsigned int i)
217 for (
unsigned j = 0; j < step && (
i + j) < nToProcess; j++) {
230 template<
class F,
class T>
232 unsigned int nToProcess = args.size();
234 ParallelFor(0U, nToProcess, 1, [&](
unsigned int i){func(args[
i]);});
238 unsigned step = (nToProcess + nChunks - 1) / nChunks;
239 auto lambda = [&](
unsigned int i)
241 for (
unsigned j = 0; j < step && (
i + j) < nToProcess; j++) {
253 template <
class F,
class Cond>
256 using retType =
decltype(func());
257 std::vector<retType> reslist(nTimes);
258 auto lambda = [&](
unsigned int i)
272 template <
class F,
class INTEGER,
class Cond>
275 using retType =
decltype(func(*args.begin()));
276 std::vector<retType> reslist(args.size());
277 auto lambda = [&](
unsigned int i) { reslist[
i] = func(args[
i]); };
288 template <
class F,
class R,
class Cond>
293 return Map(func, nTimes);
296 unsigned step = (nTimes + nChunks - 1) / nChunks;
298 unsigned actualChunks = (nTimes + step - 1) / step;
299 using retType =
decltype(func());
300 std::vector<retType> reslist(actualChunks);
301 auto lambda = [&](
unsigned int i)
303 std::vector<retType> partialResults(std::min(nTimes-
i, step));
304 for (
unsigned j = 0; j < step && (
i + j) < nTimes; j++) {
305 partialResults[j] = func();
307 reslist[
i / step] =
Reduce(partialResults, redfunc);
319 template <
class F,
class T,
class Cond>
323 using retType =
decltype(func(args.front()));
325 unsigned int nToProcess = args.size();
326 std::vector<retType> reslist(nToProcess);
328 auto lambda = [&](
unsigned int i)
330 reslist[
i] = func(args[
i]);
343 template <
class F,
class T,
class Cond>
347 using retType =
decltype(func(args.front()));
349 unsigned int nToProcess = args.size();
350 std::vector<retType> reslist(nToProcess);
352 auto lambda = [&](
unsigned int i)
354 reslist[
i] = func(args[
i]);
367 template <
class F,
class INTEGER,
class R,
class Cond>
369 -> std::vector<InvokeResult_t<F, INTEGER>>
373 return Map(func, args);
376 unsigned nToProcess = args.size();
377 unsigned step = (nToProcess + nChunks - 1) / nChunks;
379 unsigned actualChunks = (nToProcess + step - 1) / step;
381 using retType =
decltype(func(*args.begin()));
382 std::vector<retType> reslist(actualChunks);
383 auto lambda = [&](
unsigned int i) {
384 std::vector<retType> partialResults(std::min(step, nToProcess -
i));
385 for (
unsigned j = 0; j < partialResults.size(); j++) {
386 partialResults[j] = func(args[
i + j]);
388 reslist[
i / step] =
Reduce(partialResults, redfunc);
401 template <
class F,
class T,
class R,
class Cond>
403 -> std::vector<InvokeResult_t<F, T>>
407 return Map(func, args);
410 unsigned int nToProcess = args.size();
411 unsigned step = (nToProcess + nChunks - 1) / nChunks;
413 unsigned actualChunks = (nToProcess + step - 1) / step;
415 using retType =
decltype(func(args.front()));
416 std::vector<retType> reslist(actualChunks);
417 auto lambda = [&](
unsigned int i) {
418 std::vector<retType> partialResults(std::min(step, nToProcess -
i));
419 for (
unsigned j = 0; j < partialResults.size(); j++) {
420 partialResults[j] = func(args[
i + j]);
422 reslist[
i / step] =
Reduce(partialResults, redfunc);
435 template <
class F,
class T,
class R,
class Cond>
437 -> std::vector<InvokeResult_t<F, T>>
441 return Map(func, args);
444 unsigned int nToProcess = args.size();
445 unsigned step = (nToProcess + nChunks - 1) / nChunks;
447 unsigned actualChunks = (nToProcess + step - 1) / step;
449 using retType =
decltype(func(args.front()));
450 std::vector<retType> reslist(actualChunks);
451 auto lambda = [&](
unsigned int i) {
452 std::vector<retType> partialResults(std::min(step, nToProcess -
i));
453 for (
unsigned j = 0; j < partialResults.size(); j++) {
454 partialResults[j] = func(args[
i + j]);
456 reslist[
i / step] =
Reduce(partialResults, redfunc);
469 template <
class F,
class T,
class R,
class Cond>
471 -> std::vector<InvokeResult_t<F, T>>
473 std::vector<T> vargs(std::move(args));
474 const auto &reslist =
Map(func, vargs, redfunc, nChunks);
481 template <
class F,
class R,
class Cond>
484 return Reduce(
Map(func, nTimes), redfunc);
492 template <
class F,
class R,
class Cond>
495 return Reduce(
Map(func, nTimes, redfunc, nChunks), redfunc);
503 template <
class F,
class INTEGER,
class R,
class Cond>
507 return Reduce(
Map(func, args, redfunc, nChunks), redfunc);
515 template <
class F,
class T,
class R,
class Cond>
519 return Reduce(
Map(func, args, redfunc, nChunks), redfunc);
525 template <
class F,
class T,
class R,
class Cond>
534 template <
class F,
class T,
class R,
class Cond>
545 template <
class F,
class T,
class R,
class Cond>
548 return Reduce(
Map(func, args, redfunc, nChunks), redfunc);
556 template <
class F,
class T,
class R,
class Cond>
560 return Reduce(
Map(func, args, redfunc, nChunks), redfunc);
565 template<
class T,
class R>
569 static_assert(std::is_same<
decltype(redfunc(objs)), T>
::value,
"redfunc does not have the correct signature");
580 template<
class T,
class BINARYOP>
584 static_assert(std::is_same<
decltype(redfunc(objs.front(), objs.front())), T>
::value,
"redfunc does not have the correct signature");
594 template<
class T,
class R>
597 return redfunc(objs);
Option_t Option_t TPoint TPoint const char GetTextMagnitude GetFillStyle GetLineColor GetLineWidth GetMarkerStyle GetTextAlign GetTextColor GetTextSize void value
ROOT::TypeTraits::InvokeResult_t< F, Args... > InvokeResult_t
A pseudo container class which is a generator of indices.
auto SeqReduce(const std::vector< T > &objs, R redfunc) -> decltype(redfunc(objs))
"Reduce", sequentially, an std::vector into a single object
auto Map(F func, unsigned nTimes, R redfunc, unsigned nChunks) -> std::vector< InvokeResult_t< F > >
Execute a function nTimes in parallel, dividing the execution in nChunks and providing a result per c...
void ParallelFor(unsigned start, unsigned end, unsigned step, const std::function< void(unsigned int i)> &f)
Execute a function in parallel over the indices of a loop.
unsigned GetPoolSize() const
Returns the number of worker threads in the task arena.
auto MapReduce(F func, unsigned nTimes, R redfunc) -> InvokeResult_t< F >
Execute a function nTimes in parallel (Map) and accumulate the results into a single value (Reduce).
std::shared_ptr< ROOT::Internal::RTaskArenaWrapper > fTaskArenaW
Pointer to the TBB task arena wrapper.
auto Reduce(const std::vector< T > &objs, R redfunc) -> decltype(redfunc(objs))
"Reduce" an std::vector into a single object by passing a function as the second argument defining th...
void Foreach(F func, unsigned nTimes, unsigned nChunks=0)
Execute a function without arguments several times in parallel, dividing the execution in nChunks.
TThreadExecutor(UInt_t nThreads=0u)
Class constructor.
double ParallelReduce(const std::vector< double > &objs, const std::function< double(double a, double b)> &redfunc)
"Reduce" in parallel an std::vector<double> into a single double value
TThreadExecutor & operator=(const TThreadExecutor &)=delete
TThreadExecutor(const TThreadExecutor &)=delete
auto MapImpl(F func, unsigned nTimes) -> std::vector< InvokeResult_t< F > >
Execute a function without arguments several times in parallel.
namespace associated R package for ROOT.
tbb::task_arena is an alias of tbb::interface7::task_arena, which doesn't allow to forward declare tb...