12#ifndef ROOT_TThreadExecutor
13#define ROOT_TThreadExecutor
15#include "RConfigure.h"
20# if !defined(__ROOTCLING__) && !defined(G__DICTIONARY)
21# error "Cannot use ROOT::TThreadExecutor without defining R__USE_IMT."
45 void Foreach(
F func,
unsigned nTimes,
unsigned nChunks = 0);
46 template<
class F,
class INTEGER>
49 template<
class F,
class T>
50 void Foreach(
F func, std::initializer_list<T> args,
unsigned nChunks = 0);
52 template<
class F,
class T>
53 void Foreach(
F func, std::vector<T> &args,
unsigned nChunks = 0);
54 template<
class F,
class T>
55 void Foreach(
F func,
const std::vector<T> &args,
unsigned nChunks = 0);
58 template<
class F,
class Cond = noReferenceCond<F>>
59 auto Map(
F func,
unsigned nTimes) -> std::vector<
typename std::result_of<
F()>
::type>;
60 template<
class F,
class INTEGER,
class Cond = noReferenceCond<F, INTEGER>>
62 template<
class F,
class T,
class Cond = noReferenceCond<F, T>>
63 auto Map(
F func, std::vector<T> &args) -> std::vector<
typename std::result_of<
F(
T)>
::type>;
70 template<
class F,
class R,
class Cond = noReferenceCond<F>>
71 auto MapReduce(
F func,
unsigned nTimes,
R redfunc) ->
typename std::result_of<
F()>
::type;
72 template<
class F,
class R,
class Cond = noReferenceCond<F>>
73 auto MapReduce(
F func,
unsigned nTimes,
R redfunc,
unsigned nChunks) ->
typename std::result_of<
F()>
::type;
74 template<
class F,
class INTEGER,
class R,
class Cond = noReferenceCond<F, INTEGER>>
77 template<
class F,
class T,
class R,
class Cond = noReferenceCond<F, T>>
78 auto MapReduce(
F func, std::initializer_list<T> args,
R redfunc,
unsigned nChunks) ->
typename std::result_of<
F(
T)>
::type;
80 template<
class F,
class T,
class R,
class Cond = noReferenceCond<F, T>>
81 auto MapReduce(
F func, std::vector<T> &args,
R redfunc) ->
typename std::result_of<
F(
T)>
::type;
82 template<
class F,
class T,
class R,
class Cond = noReferenceCond<F, T>>
83 auto MapReduce(
F func, std::vector<T> &args,
R redfunc,
unsigned nChunks) ->
typename std::result_of<
F(
T)>
::type;
86 template<
class T,
class BINARYOP>
auto Reduce(
const std::vector<T> &objs, BINARYOP redfunc) ->
decltype(redfunc(objs.front(), objs.front()));
87 template<
class T,
class R>
auto Reduce(
const std::vector<T> &objs,
R redfunc) ->
decltype(redfunc(objs));
92 template<
class F,
class R,
class Cond = noReferenceCond<F>>
93 auto Map(
F func,
unsigned nTimes,
R redfunc,
unsigned nChunks) -> std::vector<
typename std::result_of<
F()>
::type>;
94 template<
class F,
class INTEGER,
class R,
class Cond = noReferenceCond<F, INTEGER>>
96 template<
class F,
class T,
class R,
class Cond = noReferenceCond<F, T>>
97 auto Map(
F func, std::vector<T> &args,
R redfunc,
unsigned nChunks) -> std::vector<
typename std::result_of<
F(
T)>
::type>;
98 template<
class F,
class T,
class R,
class Cond = noReferenceCond<F, T>>
99 auto Map(
F func, std::initializer_list<T> args,
R redfunc,
unsigned nChunks) -> std::vector<
typename std::result_of<
F(
T)>
::type>;
105 template<
class T,
class R>
106 auto SeqReduce(
const std::vector<T> &objs,
R redfunc) ->
decltype(redfunc(objs));
108 std::shared_ptr<ROOT::Internal::TPoolManager>
fSched =
nullptr;
120 ParallelFor(0U, nTimes, 1, [&](
unsigned int){func();});
124 unsigned step = (nTimes + nChunks - 1) / nChunks;
125 auto lambda = [&](
unsigned int i)
127 for (
unsigned j = 0; j < step && (i + j) < nTimes; j++) {
137 template<
class F,
class INTEGER>
143 unsigned start = *args.
begin();
144 unsigned end = *args.
end();
145 unsigned seqStep = args.
step();
146 unsigned step = (end - start + nChunks - 1) / nChunks;
148 auto lambda = [&](
unsigned int i)
150 for (
unsigned j = 0; j < step && (i + j) < end; j+=seqStep) {
161 template<
class F,
class T>
163 std::vector<T> vargs(std::move(args));
171 template<
class F,
class T>
173 unsigned int nToProcess = args.size();
175 ParallelFor(0U, nToProcess, 1, [&](
unsigned int i){func(args[i]);});
179 unsigned step = (nToProcess + nChunks - 1) / nChunks;
180 auto lambda = [&](
unsigned int i)
182 for (
unsigned j = 0; j < step && (i + j) < nToProcess; j++) {
191 template<
class F,
class T>
193 unsigned int nToProcess = args.size();
195 ParallelFor(0U, nToProcess, 1, [&](
unsigned int i){func(args[i]);});
199 unsigned step = (nToProcess + nChunks - 1) / nChunks;
200 auto lambda = [&](
unsigned int i)
202 for (
unsigned j = 0; j < step && (i + j) < nToProcess; j++) {
214 template<
class F,
class Cond>
216 using retType =
decltype(func());
217 std::vector<retType> reslist(nTimes);
218 auto lambda = [&](
unsigned int i)
222 ParallelFor(0U, nTimes, 1, lambda);
231 template<
class F,
class INTEGER,
class Cond>
233 unsigned start = *args.begin();
234 unsigned end = *args.end();
235 unsigned seqStep = args.step();
237 using retType =
decltype(func(start));
238 std::vector<retType> reslist(args.size());
239 auto lambda = [&](
unsigned int i)
241 reslist[i] = func(i);
243 ParallelFor(start, end, seqStep, lambda);
252 template<
class F,
class R,
class Cond>
256 return Map(func, nTimes);
259 unsigned step = (nTimes + nChunks - 1) / nChunks;
261 unsigned actualChunks = (nTimes + step - 1) / step;
262 using retType =
decltype(func());
263 std::vector<retType> reslist(actualChunks);
264 auto lambda = [&](
unsigned int i)
266 std::vector<retType> partialResults(std::min(nTimes-i, step));
267 for (
unsigned j = 0; j < step && (i + j) < nTimes; j++) {
268 partialResults[j] = func();
270 reslist[i / step] = Reduce(partialResults, redfunc);
272 ParallelFor(0U, nTimes, step, lambda);
283 template<
class F,
class T,
class Cond>
286 using retType =
decltype(func(args.front()));
288 unsigned int nToProcess = args.size();
289 std::vector<retType> reslist(nToProcess);
291 auto lambda = [&](
unsigned int i)
293 reslist[i] = func(args[i]);
296 ParallelFor(0U, nToProcess, 1, lambda);
306 template<
class F,
class INTEGER,
class R,
class Cond>
313 unsigned start = *args.begin();
314 unsigned end = *args.end();
315 unsigned seqStep = args.step();
317 using retType =
decltype(func(start));
318 std::vector<retType> reslist(end - start);
319 auto lambda = [&](
unsigned int i)
321 reslist[i] = func(i);
323 ParallelFor(start, end, seqStep, lambda);
326 return Map(func, args);
330 unsigned start = *args.begin();
331 unsigned end = *args.end();
332 unsigned seqStep = args.step();
333 unsigned step = (end - start + nChunks - 1) / nChunks;
335 unsigned actualChunks = (end - start + step - 1) / step;
337 using retType =
decltype(func(start));
338 std::vector<retType> reslist(actualChunks);
339 auto lambda = [&](
unsigned int i)
341 std::vector<retType> partialResults(std::min(end-i, step));
342 for (
unsigned j = 0; j < step && (i + j) < end; j+=seqStep) {
343 partialResults[j] = func(i + j);
345 reslist[i / step] = Reduce(partialResults, redfunc);
347 ParallelFor(start, end, step, lambda);
358 template<
class F,
class T,
class R,
class Cond>
362 return Map(func, args);
365 unsigned int nToProcess = args.size();
366 unsigned step = (nToProcess + nChunks - 1) / nChunks;
368 unsigned actualChunks = (nToProcess + step - 1) / step;
370 using retType =
decltype(func(args.front()));
371 std::vector<retType> reslist(actualChunks);
372 auto lambda = [&](
unsigned int i)
374 std::vector<T> partialResults(step);
375 for (
unsigned j = 0; j < step && (i + j) < nToProcess; j++) {
376 partialResults[j] = func(args[i + j]);
378 reslist[i / step] = Reduce(partialResults, redfunc);
381 ParallelFor(0U, nToProcess, step, lambda);
391 template<
class F,
class T,
class R,
class Cond>
392 auto TThreadExecutor::Map(
F func, std::initializer_list<T> args,
R redfunc,
unsigned nChunks) -> std::vector<
typename std::result_of<
F(
T)>
::type> {
393 std::vector<T> vargs(std::move(args));
394 const auto &reslist =
Map(func, vargs, redfunc, nChunks);
407 template<
class F,
class R,
class Cond>
409 return Reduce(
Map(func, nTimes), redfunc);
412 template<
class F,
class R,
class Cond>
414 return Reduce(
Map(func, nTimes, redfunc, nChunks), redfunc);
417 template<
class F,
class INTEGER,
class R,
class Cond>
419 return Reduce(
Map(func, args, redfunc, nChunks), redfunc);
422 template<
class F,
class T,
class R,
class Cond>
424 return Reduce(
Map(func, args, redfunc, nChunks), redfunc);
428 template<
class F,
class T,
class R,
class Cond>
430 return Reduce(
Map(func, args), redfunc);
433 template<
class F,
class T,
class R,
class Cond>
435 return Reduce(
Map(func, args, redfunc, nChunks), redfunc);
441 template<
class T,
class BINARYOP>
445 static_assert(std::is_same<
decltype(redfunc(objs.front(), objs.front())),
T>::value,
"redfunc does not have the correct signature");
446 return ParallelReduce(objs, redfunc);
452 template<
class T,
class R>
456 static_assert(std::is_same<
decltype(redfunc(objs)),
T>::value,
"redfunc does not have the correct signature");
457 return SeqReduce(objs, redfunc);
460 template<
class T,
class R>
463 return redfunc(objs);
#define R(a, b, c, d, e, f, g, h, i)
This class defines an interface to execute the same task multiple times in parallel,...
A pseudo container class which is a generator of indices.
This class provides a simple interface to execute the same task multiple times in parallel,...
auto SeqReduce(const std::vector< T > &objs, R redfunc) -> decltype(redfunc(objs))
auto Map(F func, unsigned nTimes) -> std::vector< typename std::result_of< F()>::type >
Execute func (with no arguments) nTimes in parallel.
TThreadExecutor & operator=(TThreadExecutor &)=delete
std::shared_ptr< ROOT::Internal::TPoolManager > fSched
void ParallelFor(unsigned start, unsigned end, unsigned step, const std::function< void(unsigned int i)> &f)
auto Map(F func, std::vector< T > &args, R redfunc, unsigned nChunks) -> std::vector< typename std::result_of< F(T)>::type >
auto MapReduce(F func, unsigned nTimes, R redfunc) -> typename std::result_of< F()>::type
This method behaves just like Map, but an additional redfunc function must be provided.
auto Reduce(const std::vector< T > &objs, BINARYOP redfunc) -> decltype(redfunc(objs.front(), objs.front()))
"Reduce" an std::vector into a single object in parallel by passing a binary operator as the second a...
void Foreach(F func, unsigned nTimes, unsigned nChunks=0)
Execute func (with no arguments) nTimes in parallel.
TThreadExecutor()
Class constructor.
TThreadExecutor(TThreadExecutor &)=delete
double ParallelReduce(const std::vector< double > &objs, const std::function< double(double a, double b)> &redfunc)
auto Map(F func, std::initializer_list< T > args, R redfunc, unsigned nChunks) -> std::vector< typename std::result_of< F(T)>::type >
void function(const Char_t *name_, T fun, const Char_t *docstring=0)
auto Map(Args &&... args) -> decltype(ROOT::Detail::VecOps::MapFromTuple(std::forward_as_tuple(args...), std::make_index_sequence< sizeof...(args) - 1 >()))
Create new collection applying a callable to the elements of the input collection.
Namespace for new ROOT classes and functions.