12#ifndef ROOT_TThreadExecutor
13#define ROOT_TThreadExecutor
15#include "RConfigure.h"
20# if !defined(__ROOTCLING__) && !defined(G__DICTIONARY)
21# error "Cannot use ROOT::TThreadExecutor without defining R__USE_IMT."
32#include <initializer_list>
44 template <
typename F,
typename... Args>
57 void Foreach(
F func,
unsigned nTimes,
unsigned nChunks = 0);
58 template<
class F,
class INTEGER>
60 template<
class F,
class T>
61 void Foreach(
F func, std::initializer_list<T> args,
unsigned nChunks = 0);
62 template<
class F,
class T>
63 void Foreach(
F func, std::vector<T> &args,
unsigned nChunks = 0);
64 template<
class F,
class T>
65 void Foreach(
F func,
const std::vector<T> &args,
unsigned nChunks = 0);
80 template <
class F,
class R,
class Cond = noReferenceCond<F>>
82 template <
class F,
class R,
class Cond = noReferenceCond<F>>
84 template <
class F,
class INTEGER,
class R,
class Cond = noReferenceCond<F, INTEGER>>
86 template <
class F,
class T,
class R,
class Cond = noReferenceCond<F, T>>
88 template <
class F,
class T,
class R,
class Cond = noReferenceCond<F, T>>
90 template <
class F,
class T,
class R,
class Cond = noReferenceCond<F, T>>
92 template <
class F,
class T,
class R,
class Cond = noReferenceCond<F, T>>
94 template <
class F,
class T,
class R,
class Cond = noReferenceCond<F, T>>
98 template<
class T,
class R>
auto Reduce(
const std::vector<T> &objs,
R redfunc) ->
decltype(redfunc(objs));
99 template<
class T,
class BINARYOP>
auto Reduce(
const std::vector<T> &objs, BINARYOP redfunc) ->
decltype(redfunc(objs.front(), objs.front()));
106 template <
class F,
class Cond = noReferenceCond<F>>
107 auto MapImpl(
F func,
unsigned nTimes) -> std::vector<InvokeResult_t<F>>;
108 template <
class F,
class INTEGER,
class Cond = noReferenceCond<F, INTEGER>>
110 template <
class F,
class T,
class Cond = noReferenceCond<F, T>>
111 auto MapImpl(
F func, std::vector<T> &args) -> std::vector<InvokeResult_t<F, T>>;
112 template <
class F,
class T,
class Cond = noReferenceCond<F, T>>
113 auto MapImpl(
F func,
const std::vector<T> &args) -> std::vector<InvokeResult_t<F, T>>;
117 template <
class F,
class R,
class Cond = noReferenceCond<F>>
118 auto Map(
F func,
unsigned nTimes,
R redfunc,
unsigned nChunks) -> std::vector<InvokeResult_t<F>>;
119 template <
class F,
class INTEGER,
class R,
class Cond = noReferenceCond<F, INTEGER>>
121 -> std::vector<InvokeResult_t<F, INTEGER>>;
122 template <
class F,
class T,
class R,
class Cond = noReferenceCond<F, T>>
123 auto Map(
F func, std::initializer_list<T> args,
R redfunc,
unsigned nChunks) -> std::vector<InvokeResult_t<F, T>>;
124 template <
class F,
class T,
class R,
class Cond = noReferenceCond<F, T>>
125 auto Map(
F func, std::vector<T> &args,
R redfunc,
unsigned nChunks) -> std::vector<InvokeResult_t<F, T>>;
126 template <
class F,
class T,
class R,
class Cond = noReferenceCond<F, T>>
127 auto Map(
F func,
const std::vector<T> &args,
R redfunc,
unsigned nChunks) -> std::vector<InvokeResult_t<F, T>>;
130 void ParallelFor(
unsigned start,
unsigned end,
unsigned step,
const std::function<
void(
unsigned int i)> &
f);
131 double ParallelReduce(
const std::vector<double> &objs,
const std::function<
double(
double a,
double b)> &redfunc);
132 float ParallelReduce(
const std::vector<float> &objs,
const std::function<
float(
float a,
float b)> &redfunc);
133 template<
class T,
class R>
134 auto SeqReduce(
const std::vector<T> &objs,
R redfunc) ->
decltype(redfunc(objs));
137 std::shared_ptr<ROOT::Internal::RTaskArenaWrapper>
fTaskArenaW =
nullptr;
151 ParallelFor(0U, nTimes, 1, [&](
unsigned int){func();});
155 unsigned step = (nTimes + nChunks - 1) / nChunks;
156 auto lambda = [&](
unsigned int i)
158 for (
unsigned j = 0; j < step && (i + j) < nTimes; j++) {
171 template<
class F,
class INTEGER>
177 unsigned start = *args.
begin();
178 unsigned end = *args.
end();
179 unsigned seqStep = args.
step();
180 unsigned step = (end - start + nChunks - 1) / nChunks;
182 auto lambda = [&](
unsigned int i)
184 for (
unsigned j = 0; j < step && (i + j) < end; j+=seqStep) {
197 template<
class F,
class T>
199 std::vector<T> vargs(std::move(args));
209 template<
class F,
class T>
211 unsigned int nToProcess = args.size();
213 ParallelFor(0U, nToProcess, 1, [&](
unsigned int i){func(args[i]);});
217 unsigned step = (nToProcess + nChunks - 1) / nChunks;
218 auto lambda = [&](
unsigned int i)
220 for (
unsigned j = 0; j < step && (i + j) < nToProcess; j++) {
233 template<
class F,
class T>
235 unsigned int nToProcess = args.size();
237 ParallelFor(0U, nToProcess, 1, [&](
unsigned int i){func(args[i]);});
241 unsigned step = (nToProcess + nChunks - 1) / nChunks;
242 auto lambda = [&](
unsigned int i)
244 for (
unsigned j = 0; j < step && (i + j) < nToProcess; j++) {
256 template <
class F,
class Cond>
259 using retType =
decltype(func());
260 std::vector<retType> reslist(nTimes);
261 auto lambda = [&](
unsigned int i)
265 ParallelFor(0U, nTimes, 1, lambda);
275 template <
class F,
class INTEGER,
class Cond>
278 using retType =
decltype(func(*args.begin()));
279 std::vector<retType> reslist(args.size());
280 auto lambda = [&](
unsigned int i) { reslist[i] = func(args[i]); };
281 ParallelFor(0U, args.size(), 1, lambda);
291 template <
class F,
class R,
class Cond>
296 return Map(func, nTimes);
299 unsigned step = (nTimes + nChunks - 1) / nChunks;
301 unsigned actualChunks = (nTimes + step - 1) / step;
302 using retType =
decltype(func());
303 std::vector<retType> reslist(actualChunks);
304 auto lambda = [&](
unsigned int i)
306 std::vector<retType> partialResults(std::min(nTimes-i, step));
307 for (
unsigned j = 0; j < step && (i + j) < nTimes; j++) {
308 partialResults[j] = func();
310 reslist[i / step] = Reduce(partialResults, redfunc);
312 ParallelFor(0U, nTimes, step, lambda);
322 template <
class F,
class T,
class Cond>
326 using retType =
decltype(func(args.front()));
328 unsigned int nToProcess = args.size();
329 std::vector<retType> reslist(nToProcess);
331 auto lambda = [&](
unsigned int i)
333 reslist[i] = func(args[i]);
336 ParallelFor(0U, nToProcess, 1, lambda);
346 template <
class F,
class T,
class Cond>
350 using retType =
decltype(func(args.front()));
352 unsigned int nToProcess = args.size();
353 std::vector<retType> reslist(nToProcess);
355 auto lambda = [&](
unsigned int i)
357 reslist[i] = func(args[i]);
360 ParallelFor(0U, nToProcess, 1, lambda);
370 template <
class F,
class INTEGER,
class R,
class Cond>
372 -> std::vector<InvokeResult_t<F, INTEGER>>
376 return Map(func, args);
379 unsigned nToProcess = args.size();
380 unsigned step = (nToProcess + nChunks - 1) / nChunks;
382 unsigned actualChunks = (nToProcess + step - 1) / step;
384 using retType =
decltype(func(*args.begin()));
385 std::vector<retType> reslist(actualChunks);
386 auto lambda = [&](
unsigned int i) {
387 std::vector<retType> partialResults(std::min(step, nToProcess - i));
388 for (
unsigned j = 0; j < partialResults.size(); j++) {
389 partialResults[j] = func(args[i + j]);
391 reslist[i / step] = Reduce(partialResults, redfunc);
394 ParallelFor(0U, nToProcess, step, lambda);
404 template <
class F,
class T,
class R,
class Cond>
406 -> std::vector<InvokeResult_t<F, T>>
410 return Map(func, args);
413 unsigned int nToProcess = args.size();
414 unsigned step = (nToProcess + nChunks - 1) / nChunks;
416 unsigned actualChunks = (nToProcess + step - 1) / step;
418 using retType =
decltype(func(args.front()));
419 std::vector<retType> reslist(actualChunks);
420 auto lambda = [&](
unsigned int i) {
421 std::vector<retType> partialResults(std::min(step, nToProcess - i));
422 for (
unsigned j = 0; j < partialResults.size(); j++) {
423 partialResults[j] = func(args[i + j]);
425 reslist[i / step] = Reduce(partialResults, redfunc);
428 ParallelFor(0U, nToProcess, step, lambda);
438 template <
class F,
class T,
class R,
class Cond>
440 -> std::vector<InvokeResult_t<F, T>>
444 return Map(func, args);
447 unsigned int nToProcess = args.size();
448 unsigned step = (nToProcess + nChunks - 1) / nChunks;
450 unsigned actualChunks = (nToProcess + step - 1) / step;
452 using retType =
decltype(func(args.front()));
453 std::vector<retType> reslist(actualChunks);
454 auto lambda = [&](
unsigned int i) {
455 std::vector<retType> partialResults(std::min(step, nToProcess - i));
456 for (
unsigned j = 0; j < partialResults.size(); j++) {
457 partialResults[j] = func(args[i + j]);
459 reslist[i / step] = Reduce(partialResults, redfunc);
462 ParallelFor(0U, nToProcess, step, lambda);
472 template <
class F,
class T,
class R,
class Cond>
474 -> std::vector<InvokeResult_t<F, T>>
476 std::vector<T> vargs(std::move(args));
477 const auto &reslist = Map(func, vargs, redfunc, nChunks);
484 template <
class F,
class R,
class Cond>
487 return Reduce(Map(func, nTimes), redfunc);
495 template <
class F,
class R,
class Cond>
498 return Reduce(Map(func, nTimes, redfunc, nChunks), redfunc);
506 template <
class F,
class INTEGER,
class R,
class Cond>
510 return Reduce(Map(func, args, redfunc, nChunks), redfunc);
518 template <
class F,
class T,
class R,
class Cond>
522 return Reduce(Map(func, args, redfunc, nChunks), redfunc);
528 template <
class F,
class T,
class R,
class Cond>
531 return Reduce(Map(func, args), redfunc);
537 template <
class F,
class T,
class R,
class Cond>
540 return Reduce(Map(func, args), redfunc);
548 template <
class F,
class T,
class R,
class Cond>
551 return Reduce(Map(func, args, redfunc, nChunks), redfunc);
559 template <
class F,
class T,
class R,
class Cond>
563 return Reduce(Map(func, args, redfunc, nChunks), redfunc);
568 template<
class T,
class R>
572 static_assert(std::is_same<
decltype(redfunc(objs)), T>
::value,
"redfunc does not have the correct signature");
573 return SeqReduce(objs, redfunc);
583 template<
class T,
class BINARYOP>
587 static_assert(std::is_same<
decltype(redfunc(objs.front(), objs.front())), T>
::value,
"redfunc does not have the correct signature");
588 return ParallelReduce(objs, redfunc);
597 template<
class T,
class R>
600 return redfunc(objs);
Option_t Option_t TPoint TPoint const char GetTextMagnitude GetFillStyle GetLineColor GetLineWidth GetMarkerStyle GetTextAlign GetTextColor GetTextSize void value
This class defines an interface to execute the same task multiple times, possibly in parallel and wit...
A pseudo container class which is a generator of indices.
This class provides a simple interface to execute the same task multiple times in parallel threads,...
auto SeqReduce(const std::vector< T > &objs, R redfunc) -> decltype(redfunc(objs))
"Reduce", sequentially, an std::vector into a single object
auto Map(F func, unsigned nTimes, R redfunc, unsigned nChunks) -> std::vector< InvokeResult_t< F > >
Execute a function nTimes in parallel, dividing the execution in nChunks and providing a result per c...
void ParallelFor(unsigned start, unsigned end, unsigned step, const std::function< void(unsigned int i)> &f)
Execute a function in parallel over the indices of a loop.
unsigned GetPoolSize() const
Returns the number of worker threads in the task arena.
ROOT::TypeTraits::InvokeResult_t< F, Args... > InvokeResult_t
auto MapReduce(F func, unsigned nTimes, R redfunc) -> InvokeResult_t< F >
Execute a function nTimes in parallel (Map) and accumulate the results into a single value (Reduce).
std::shared_ptr< ROOT::Internal::RTaskArenaWrapper > fTaskArenaW
Pointer to the TBB task arena wrapper.
auto Reduce(const std::vector< T > &objs, R redfunc) -> decltype(redfunc(objs))
"Reduce" an std::vector into a single object by passing a function as the second argument defining th...
void Foreach(F func, unsigned nTimes, unsigned nChunks=0)
Execute a function without arguments several times in parallel, dividing the execution in nChunks.
double ParallelReduce(const std::vector< double > &objs, const std::function< double(double a, double b)> &redfunc)
"Reduce" in parallel an std::vector<double> into a single double value
TThreadExecutor & operator=(const TThreadExecutor &)=delete
TThreadExecutor(const TThreadExecutor &)=delete
auto MapImpl(F func, unsigned nTimes) -> std::vector< InvokeResult_t< F > >
Execute a function without arguments several times in parallel.
This file contains a specialised ROOT message handler to test for diagnostic in unit tests.