Logo ROOT  
Reference Guide
 
Loading...
Searching...
No Matches
TThreadExecutor.hxx
Go to the documentation of this file.
1// @(#)root/thread:$Id$
2// Author: Xavier Valls March 2016
3
4/*************************************************************************
5 * Copyright (C) 1995-2020, Rene Brun and Fons Rademakers. *
6 * All rights reserved. *
7 * *
8 * For the licensing terms see $ROOTSYS/LICENSE. *
9 * For the list of contributors see $ROOTSYS/README/CREDITS. *
10 *************************************************************************/
11
12#ifndef ROOT_TThreadExecutor
13#define ROOT_TThreadExecutor
14
15#include "RConfigure.h"
16
17// exclude in case ROOT does not have IMT support
18#ifndef R__USE_IMT
19// No need to error out for dictionaries.
20# if !defined(__ROOTCLING__) && !defined(G__DICTIONARY)
21# error "Cannot use ROOT::TThreadExecutor without defining R__USE_IMT."
22# endif
23#else
24
26#include "ROOT/TSeq.hxx"
27#include "ROOT/TypeTraits.hxx" // InvokeResult
28#include "RTaskArena.hxx"
29#include "TError.h"
30
31#include <functional> //std::function
32#include <initializer_list>
33#include <memory>
34#include <numeric> //std::accumulate
35#include <type_traits> //std::enable_if
36#include <utility> //std::move
37#include <vector>
38
39namespace ROOT {
40
41 class TThreadExecutor: public TExecutorCRTP<TThreadExecutor> {
43
44 public:
45
46 explicit TThreadExecutor(UInt_t nThreads = 0u);
47
50
51 // ForEach
52 //
53 template<class F>
54 void Foreach(F func, unsigned nTimes, unsigned nChunks = 0);
55 template<class F, class INTEGER>
56 void Foreach(F func, ROOT::TSeq<INTEGER> args, unsigned nChunks = 0);
57 template<class F, class T>
58 void Foreach(F func, std::initializer_list<T> args, unsigned nChunks = 0);
59 template<class F, class T>
60 void Foreach(F func, std::vector<T> &args, unsigned nChunks = 0);
61 template<class F, class T>
62 void Foreach(F func, const std::vector<T> &args, unsigned nChunks = 0);
63
64 // Map
65 //
67
68 // MapReduce
69 //
70 // We need to reimplement the MapReduce interfaces to allow for parallel reduction, defined in
71 // this class but not in the base class.
72 //
73 // the late return types also check at compile-time whether redfunc is compatible with func,
74 // other than checking that func is compatible with the type of arguments.
75 // a static_assert check in TThreadExecutor::Reduce is used to check that redfunc is compatible with the type returned by func
77 template <class F, class R, class Cond = validMapReturnCond<F>>
78 auto MapReduce(F func, unsigned nTimes, R redfunc) -> InvokeResult_t<F>;
79 template <class F, class R, class Cond = validMapReturnCond<F>>
80 auto MapReduce(F func, unsigned nTimes, R redfunc, unsigned nChunks) -> InvokeResult_t<F>;
81 template <class F, class INTEGER, class R, class Cond = validMapReturnCond<F, INTEGER>>
82 auto MapReduce(F func, ROOT::TSeq<INTEGER> args, R redfunc, unsigned nChunks) -> InvokeResult_t<F, INTEGER>;
83 template <class F, class T, class R, class Cond = validMapReturnCond<F, T>>
84 auto MapReduce(F func, std::initializer_list<T> args, R redfunc, unsigned nChunks) -> InvokeResult_t<F, T>;
85 template <class F, class T, class R, class Cond = validMapReturnCond<F, T>>
86 auto MapReduce(F func, std::vector<T> &args, R redfunc) -> InvokeResult_t<F, T>;
87 template <class F, class T, class R, class Cond = validMapReturnCond<F, T>>
88 auto MapReduce(F func, const std::vector<T> &args, R redfunc) -> InvokeResult_t<F, T>;
89 template <class F, class T, class R, class Cond = validMapReturnCond<F, T>>
90 auto MapReduce(F func, std::vector<T> &args, R redfunc, unsigned nChunks) -> InvokeResult_t<F, T>;
91 template <class F, class T, class R, class Cond = validMapReturnCond<F, T>>
92 auto MapReduce(F func, const std::vector<T> &args, R redfunc, unsigned nChunks) -> InvokeResult_t<F, T>;
93
95 template<class T, class R> auto Reduce(const std::vector<T> &objs, R redfunc) -> decltype(redfunc(objs));
96 template<class T, class BINARYOP> auto Reduce(const std::vector<T> &objs, BINARYOP redfunc) -> decltype(redfunc(objs.front(), objs.front()));
97
98 unsigned GetPoolSize() const;
99
100 private:
101 // Implementation of the Map functions declared in the parent class (TExecutorCRTP)
102 //
103 template <class F, class Cond = validMapReturnCond<F>>
104 auto MapImpl(F func, unsigned nTimes) -> std::vector<InvokeResult_t<F>>;
105 template <class F, class INTEGER, class Cond = validMapReturnCond<F, INTEGER>>
106 auto MapImpl(F func, ROOT::TSeq<INTEGER> args) -> std::vector<InvokeResult_t<F, INTEGER>>;
107 template <class F, class T, class Cond = validMapReturnCond<F, T>>
108 auto MapImpl(F func, std::vector<T> &args) -> std::vector<InvokeResult_t<F, T>>;
109 template <class F, class T, class Cond = validMapReturnCond<F, T>>
110 auto MapImpl(F func, const std::vector<T> &args) -> std::vector<InvokeResult_t<F, T>>;
111
112 // Extension of the Map interfaces with chunking, specific to this class and
113 // only available from a MapReduce call.
114 template <class F, class R, class Cond = validMapReturnCond<F>>
115 auto Map(F func, unsigned nTimes, R redfunc, unsigned nChunks) -> std::vector<InvokeResult_t<F>>;
116 template <class F, class INTEGER, class R, class Cond = validMapReturnCond<F, INTEGER>>
117 auto Map(F func, ROOT::TSeq<INTEGER> args, R redfunc, unsigned nChunks)
118 -> std::vector<InvokeResult_t<F, INTEGER>>;
119 template <class F, class T, class R, class Cond = validMapReturnCond<F, T>>
120 auto Map(F func, std::initializer_list<T> args, R redfunc, unsigned nChunks) -> std::vector<InvokeResult_t<F, T>>;
121 template <class F, class T, class R, class Cond = validMapReturnCond<F, T>>
122 auto Map(F func, std::vector<T> &args, R redfunc, unsigned nChunks) -> std::vector<InvokeResult_t<F, T>>;
123 template <class F, class T, class R, class Cond = validMapReturnCond<F, T>>
124 auto Map(F func, const std::vector<T> &args, R redfunc, unsigned nChunks) -> std::vector<InvokeResult_t<F, T>>;
125
126 // Functions that interface with the parallel library used as a backend
127 void ParallelFor(unsigned start, unsigned end, unsigned step, const std::function<void(unsigned int i)> &f);
128 double ParallelReduce(const std::vector<double> &objs, const std::function<double(double a, double b)> &redfunc);
129 float ParallelReduce(const std::vector<float> &objs, const std::function<float(float a, float b)> &redfunc);
130 template<class T, class R>
131 auto SeqReduce(const std::vector<T> &objs, R redfunc) -> decltype(redfunc(objs));
132
133 /// Pointer to the TBB task arena wrapper
134 std::shared_ptr<ROOT::Internal::RTaskArenaWrapper> fTaskArenaW = nullptr;
135 };
136
137 /************ TEMPLATE METHODS IMPLEMENTATION ******************/
138
139 //////////////////////////////////////////////////////////////////////////
140 /// \brief Execute a function without arguments several times in parallel, dividing the execution in nChunks.
141 ///
142 /// \param func Function to be executed.
143 /// \param nTimes Number of times function should be called.
144 /// \param nChunks Number of chunks to split the input data for processing.
145 template<class F>
146 void TThreadExecutor::Foreach(F func, unsigned nTimes, unsigned nChunks) {
147 if (nChunks == 0) {
148 ParallelFor(0U, nTimes, 1, [&](unsigned int){func();});
149 return;
150 }
151
152 unsigned step = (nTimes + nChunks - 1) / nChunks;
153 auto lambda = [&](unsigned int i)
154 {
155 for (unsigned j = 0; j < step && (i + j) < nTimes; j++) {
156 func();
157 }
158 };
159 ParallelFor(0U, nTimes, step, lambda);
160 }
161
162 //////////////////////////////////////////////////////////////////////////
163 /// \brief Execute a function in parallel over a sequence of indexes, dividing the execution in nChunks.
164 ///
165 /// \param func Function to be executed. Must take an element of the sequence passed assecond argument as a parameter.
166 /// \param args Sequence of indexes to execute `func` on.
167 /// \param nChunks Number of chunks to split the input data for processing.
168 template<class F, class INTEGER>
169 void TThreadExecutor::Foreach(F func, ROOT::TSeq<INTEGER> args, unsigned nChunks) {
170 if (nChunks == 0) {
171 ParallelFor(*args.begin(), *args.end(), args.step(), [&](unsigned int i){func(i);});
172 return;
173 }
174 unsigned start = *args.begin();
175 unsigned end = *args.end();
176 unsigned seqStep = args.step();
177 unsigned step = (end - start + nChunks - 1) / nChunks; //ceiling the division
178
179 auto lambda = [&](unsigned int i)
180 {
181 for (unsigned j = 0; j < step && (i + j) < end; j+=seqStep) {
182 func(i + j);
183 }
184 };
185 ParallelFor(start, end, step, lambda);
186 }
187
188 //////////////////////////////////////////////////////////////////////////
189 /// \brief Execute a function in parallel over the elements of an initializer_list, dividing the execution in nChunks.
190 ///
191 /// \param func Function to be executed on the elements of the initializer_list passed as second parameter.
192 /// \param args initializer_list for a vector to apply `func` on.
193 /// \param nChunks Number of chunks to split the input data for processing.
194 template<class F, class T>
195 void TThreadExecutor::Foreach(F func, std::initializer_list<T> args, unsigned nChunks) {
196 std::vector<T> vargs(std::move(args));
197 Foreach(func, vargs, nChunks);
198 }
199
200 //////////////////////////////////////////////////////////////////////////
201 /// \brief Execute a function in parallel over the elements of a vector, dividing the execution in nChunks.
202 ///
203 /// \param func Function to be executed on the elements of the vector passed as second parameter.
204 /// \param args Vector of elements passed as an argument to `func`.
205 /// \param nChunks Number of chunks to split the input data for processing.
206 template<class F, class T>
207 void TThreadExecutor::Foreach(F func, std::vector<T> &args, unsigned nChunks) {
208 unsigned int nToProcess = args.size();
209 if (nChunks == 0) {
210 ParallelFor(0U, nToProcess, 1, [&](unsigned int i){func(args[i]);});
211 return;
212 }
213
214 unsigned step = (nToProcess + nChunks - 1) / nChunks; //ceiling the division
215 auto lambda = [&](unsigned int i)
216 {
217 for (unsigned j = 0; j < step && (i + j) < nToProcess; j++) {
218 func(args[i + j]);
219 }
220 };
221 ParallelFor(0U, nToProcess, step, lambda);
222 }
223
224 //////////////////////////////////////////////////////////////////////////
225 /// \brief Execute a function in parallel over the elements of a immutable vector, dividing the execution in nChunks.
226 ///
227 /// \param func Function to be executed on the elements of the vector passed as second parameter.
228 /// \param args Immutable vector of elements passed as an argument to `func`.
229 /// \param nChunks Number of chunks to split the input data for processing.
230 template<class F, class T>
231 void TThreadExecutor::Foreach(F func, const std::vector<T> &args, unsigned nChunks) {
232 unsigned int nToProcess = args.size();
233 if (nChunks == 0) {
234 ParallelFor(0U, nToProcess, 1, [&](unsigned int i){func(args[i]);});
235 return;
236 }
237
238 unsigned step = (nToProcess + nChunks - 1) / nChunks; //ceiling the division
239 auto lambda = [&](unsigned int i)
240 {
241 for (unsigned j = 0; j < step && (i + j) < nToProcess; j++) {
242 func(args[i + j]);
243 }
244 };
245 ParallelFor(0U, nToProcess, step, lambda);
246 }
247
248 //////////////////////////////////////////////////////////////////////////
249 /// \brief Execute a function without arguments several times in parallel.
250 /// Implementation of the Map method.
251 ///
252 /// \copydetails TExecutorCRTP::Map(F func,unsigned nTimes)
253 template <class F, class Cond>
254 auto TThreadExecutor::MapImpl(F func, unsigned nTimes) -> std::vector<InvokeResult_t<F>>
255 {
256 using retType = decltype(func());
257 std::vector<retType> reslist(nTimes);
258 auto lambda = [&](unsigned int i)
259 {
260 reslist[i] = func();
261 };
262 ParallelFor(0U, nTimes, 1, lambda);
263
264 return reslist;
265 }
266
267 //////////////////////////////////////////////////////////////////////////
268 /// \brief Execute a function over a sequence of indexes in parallel.
269 /// Implementation of the Map method.
270 ///
271 /// \copydetails TExecutorCRTP::Map(F func,ROOT::TSeq<INTEGER> args)
272 template <class F, class INTEGER, class Cond>
273 auto TThreadExecutor::MapImpl(F func, ROOT::TSeq<INTEGER> args) -> std::vector<InvokeResult_t<F, INTEGER>>
274 {
275 using retType = decltype(func(*args.begin()));
276 std::vector<retType> reslist(args.size());
277 auto lambda = [&](unsigned int i) { reslist[i] = func(args[i]); };
278 ParallelFor(0U, args.size(), 1, lambda);
279
280 return reslist;
281 }
282
283 //////////////////////////////////////////////////////////////////////////
284 /// \brief Execute a function `nTimes` in parallel, dividing the execution in nChunks and
285 /// providing a result per chunk.
286 ///
287 /// \copydetails ROOT::Internal::TExecutor::Map(F func,unsigned nTimes,R redfunc,unsigned nChunks)
288 template <class F, class R, class Cond>
289 auto TThreadExecutor::Map(F func, unsigned nTimes, R redfunc, unsigned nChunks) -> std::vector<InvokeResult_t<F>>
290 {
291 if (nChunks == 0)
292 {
293 return Map(func, nTimes);
294 }
295
296 unsigned step = (nTimes + nChunks - 1) / nChunks;
297 // Avoid empty chunks
298 unsigned actualChunks = (nTimes + step - 1) / step;
299 using retType = decltype(func());
300 std::vector<retType> reslist(actualChunks);
301 auto lambda = [&](unsigned int i)
302 {
303 std::vector<retType> partialResults(std::min(nTimes-i, step));
304 for (unsigned j = 0; j < step && (i + j) < nTimes; j++) {
305 partialResults[j] = func();
306 }
307 reslist[i / step] = Reduce(partialResults, redfunc);
308 };
309 ParallelFor(0U, nTimes, step, lambda);
310
311 return reslist;
312 }
313
314 //////////////////////////////////////////////////////////////////////////
315 /// \brief Execute a function over the elements of a vector in parallel.
316 /// Implementation of the Map method.
317 ///
318 /// \copydetails TExecutorCRTP::Map(F func,std::vector<T> &args)
319 template <class F, class T, class Cond>
320 auto TThreadExecutor::MapImpl(F func, std::vector<T> &args) -> std::vector<InvokeResult_t<F, T>>
321 {
322 // //check whether func is callable
323 using retType = decltype(func(args.front()));
324
325 unsigned int nToProcess = args.size();
326 std::vector<retType> reslist(nToProcess);
327
328 auto lambda = [&](unsigned int i)
329 {
330 reslist[i] = func(args[i]);
331 };
332
333 ParallelFor(0U, nToProcess, 1, lambda);
334
335 return reslist;
336 }
337
338 //////////////////////////////////////////////////////////////////////////
339 /// \brief Execute a function over the elements of a vector in parallel.
340 /// Implementation of the Map method.
341 ///
342 /// \copydetails TExecutorCRTP::Map(F func,const std::vector<T> &args)
343 template <class F, class T, class Cond>
344 auto TThreadExecutor::MapImpl(F func, const std::vector<T> &args) -> std::vector<InvokeResult_t<F, T>>
345 {
346 // //check whether func is callable
347 using retType = decltype(func(args.front()));
348
349 unsigned int nToProcess = args.size();
350 std::vector<retType> reslist(nToProcess);
351
352 auto lambda = [&](unsigned int i)
353 {
354 reslist[i] = func(args[i]);
355 };
356
357 ParallelFor(0U, nToProcess, 1, lambda);
358
359 return reslist;
360 }
361
362 //////////////////////////////////////////////////////////////////////////
363 /// \brief Execute a function in parallel over the elements of a sequence, dividing the execution in nChunks and
364 /// providing a result per chunk.
365 ///
366 /// \copydetails ROOT::Internal::TExecutor::Map(F func,ROOT::TSeq<INTEGER> args,R redfunc,unsigned nChunks)
367 template <class F, class INTEGER, class R, class Cond>
368 auto TThreadExecutor::Map(F func, ROOT::TSeq<INTEGER> args, R redfunc, unsigned nChunks)
369 -> std::vector<InvokeResult_t<F, INTEGER>>
370 {
371 if (nChunks == 0)
372 {
373 return Map(func, args);
374 }
375
376 unsigned nToProcess = args.size();
377 unsigned step = (nToProcess + nChunks - 1) / nChunks; // ceiling the division
378 // Avoid empty chunks
379 unsigned actualChunks = (nToProcess + step - 1) / step;
380
381 using retType = decltype(func(*args.begin()));
382 std::vector<retType> reslist(actualChunks);
383 auto lambda = [&](unsigned int i) {
384 std::vector<retType> partialResults(std::min(step, nToProcess - i)); // last chunk might be smaller
385 for (unsigned j = 0; j < partialResults.size(); j++) {
386 partialResults[j] = func(args[i + j]);
387 }
388 reslist[i / step] = Reduce(partialResults, redfunc);
389 };
390
391 ParallelFor(0U, nToProcess, step, lambda);
392
393 return reslist;
394 }
395
396 //////////////////////////////////////////////////////////////////////////
397 /// \brief Execute a function in parallel over the elements of a vector, dividing the execution in nChunks and
398 /// providing a result per chunk.
399 ///
400 /// \copydetails ROOT::Internal::TExecutor::Map(F func,std::vector<T> &args,R redfunc,unsigned nChunks)
401 template <class F, class T, class R, class Cond>
402 auto TThreadExecutor::Map(F func, std::vector<T> &args, R redfunc, unsigned nChunks)
403 -> std::vector<InvokeResult_t<F, T>>
404 {
405 if (nChunks == 0)
406 {
407 return Map(func, args);
408 }
409
410 unsigned int nToProcess = args.size();
411 unsigned step = (nToProcess + nChunks - 1) / nChunks; //ceiling the division
412 // Avoid empty chunks
413 unsigned actualChunks = (nToProcess + step - 1) / step;
414
415 using retType = decltype(func(args.front()));
416 std::vector<retType> reslist(actualChunks);
417 auto lambda = [&](unsigned int i) {
418 std::vector<retType> partialResults(std::min(step, nToProcess - i));
419 for (unsigned j = 0; j < partialResults.size(); j++) {
420 partialResults[j] = func(args[i + j]);
421 }
422 reslist[i / step] = Reduce(partialResults, redfunc);
423 };
424
425 ParallelFor(0U, nToProcess, step, lambda);
426
427 return reslist;
428 }
429
430 //////////////////////////////////////////////////////////////////////////
431 /// \brief Execute a function in parallel over the elements of an immutable vector, dividing the execution in nChunks and
432 /// providing a result per chunk.
433 ///
434 /// \copydetails ROOT::Internal::TExecutor::Map(F func,const std::vector<T> &args,R redfunc,unsigned nChunks)
435 template <class F, class T, class R, class Cond>
436 auto TThreadExecutor::Map(F func, const std::vector<T> &args, R redfunc, unsigned nChunks)
437 -> std::vector<InvokeResult_t<F, T>>
438 {
439 if (nChunks == 0)
440 {
441 return Map(func, args);
442 }
443
444 unsigned int nToProcess = args.size();
445 unsigned step = (nToProcess + nChunks - 1) / nChunks; //ceiling the division
446 // Avoid empty chunks
447 unsigned actualChunks = (nToProcess + step - 1) / step;
448
449 using retType = decltype(func(args.front()));
450 std::vector<retType> reslist(actualChunks);
451 auto lambda = [&](unsigned int i) {
452 std::vector<retType> partialResults(std::min(step, nToProcess - i));
453 for (unsigned j = 0; j < partialResults.size(); j++) {
454 partialResults[j] = func(args[i + j]);
455 }
456 reslist[i / step] = Reduce(partialResults, redfunc);
457 };
458
459 ParallelFor(0U, nToProcess, step, lambda);
460
461 return reslist;
462 }
463
464 //////////////////////////////////////////////////////////////////////////
465 /// \brief Execute a function in parallel over the elements of an initializer_list, dividing the execution in nChunks and
466 /// providing a result per chunk.
467 ///
468 /// \copydetails ROOT::Internal::TExecutor::Map(F func,std::initializer_list<T> args,R redfunc,unsigned nChunks)
469 template <class F, class T, class R, class Cond>
470 auto TThreadExecutor::Map(F func, std::initializer_list<T> args, R redfunc, unsigned nChunks)
471 -> std::vector<InvokeResult_t<F, T>>
472 {
473 std::vector<T> vargs(std::move(args));
474 const auto &reslist = Map(func, vargs, redfunc, nChunks);
475 return reslist;
476 }
477
478 //////////////////////////////////////////////////////////////////////////
479 /// \brief Execute a function `nTimes` in parallel (Map) and accumulate the results into a single value (Reduce).
480 /// \copydetails ROOT::Internal::TExecutor::MapReduce(F func,unsigned nTimes,R redfunc)
481 template <class F, class R, class Cond>
482 auto TThreadExecutor::MapReduce(F func, unsigned nTimes, R redfunc) -> InvokeResult_t<F>
483 {
484 return Reduce(Map(func, nTimes), redfunc);
485 }
486
487 //////////////////////////////////////////////////////////////////////////
488 /// \brief Execute a function in parallel over the elements of a vector (Map) and accumulate the results into a single value (Reduce).
489 /// Benefits from partial reduction into `nChunks` intermediate results.
490 ///
491 /// \copydetails ROOT::Internal::TExecutor::MapReduce(F func,unsigned nTimes,R redfunc,unsigned nChunks)
492 template <class F, class R, class Cond>
493 auto TThreadExecutor::MapReduce(F func, unsigned nTimes, R redfunc, unsigned nChunks) -> InvokeResult_t<F>
494 {
495 return Reduce(Map(func, nTimes, redfunc, nChunks), redfunc);
496 }
497
498 //////////////////////////////////////////////////////////////////////////
499 /// \brief Execute a function in parallel over the elements of a vector (Map) and accumulate the results into a single value (Reduce).
500 /// Benefits from partial reduction into `nChunks` intermediate results.
501 ///
502 /// \copydetails ROOT::Internal::TExecutor::MapReduce(F func,ROOT::TSeq<INTEGER> args,R redfunc,unsigned nChunks)
503 template <class F, class INTEGER, class R, class Cond>
504 auto TThreadExecutor::MapReduce(F func, ROOT::TSeq<INTEGER> args, R redfunc, unsigned nChunks)
506 {
507 return Reduce(Map(func, args, redfunc, nChunks), redfunc);
508 }
509
510 //////////////////////////////////////////////////////////////////////////
511 /// \brief Execute a function in parallel over the elements of an initializer_list (Map) and accumulate the results into a single value (Reduce).
512 /// Benefits from partial reduction into `nChunks` intermediate results.
513 ///
514 /// \copydetails ROOT::Internal::TExecutor::MapReduce(F func,std::initializer_list<T> args,R redfunc,unsigned nChunks)
515 template <class F, class T, class R, class Cond>
516 auto TThreadExecutor::MapReduce(F func, std::initializer_list<T> args, R redfunc, unsigned nChunks)
518 {
519 return Reduce(Map(func, args, redfunc, nChunks), redfunc);
520 }
521
522 //////////////////////////////////////////////////////////////////////////
523 /// \brief Execute a function over the elements of a vector in parallel (Map) and accumulate the results into a single value (Reduce).
524 /// \copydetails ROOT::Internal::TExecutor::MapReduce(F func,std::vector<T> &args,R redfunc)
525 template <class F, class T, class R, class Cond>
526 auto TThreadExecutor::MapReduce(F func, std::vector<T> &args, R redfunc) -> InvokeResult_t<F, T>
527 {
528 return Reduce(Map(func, args), redfunc);
529 }
530
531 //////////////////////////////////////////////////////////////////////////
532 /// \brief Execute a function over the elements of an immutable vector in parallel (Map) and accumulate the results into a single value (Reduce).
533 /// \copydetails ROOT::Internal::TExecutor::MapReduce(F func,const std::vector<T> &args,R redfunc)
534 template <class F, class T, class R, class Cond>
535 auto TThreadExecutor::MapReduce(F func, const std::vector<T> &args, R redfunc) -> InvokeResult_t<F, T>
536 {
537 return Reduce(Map(func, args), redfunc);
538 }
539
540 //////////////////////////////////////////////////////////////////////////
541 /// \brief Execute a function in parallel over the elements of a vector (Map) and accumulate the results into a single value (Reduce).
542 /// Benefits from partial reduction into `nChunks` intermediate results.
543 ///
544 /// \copydetails ROOT::Internal::TExecutor::MapReduce(F func,std::vector<T> &args,R redfunc,unsigned nChunks)
545 template <class F, class T, class R, class Cond>
546 auto TThreadExecutor::MapReduce(F func, std::vector<T> &args, R redfunc, unsigned nChunks) -> InvokeResult_t<F, T>
547 {
548 return Reduce(Map(func, args, redfunc, nChunks), redfunc);
549 }
550
551 //////////////////////////////////////////////////////////////////////////
552 /// \brief Execute a function in parallel over the elements of an immutable vector (Map) and accumulate the results into a single value (Reduce).
553 /// Benefits from partial reduction into `nChunks` intermediate results.
554 ///
555 /// \copydetails ROOT::Internal::TExecutor::MapReduce(F func,const std::vector<T> &args,R redfunc,unsigned nChunks)
556 template <class F, class T, class R, class Cond>
557 auto TThreadExecutor::MapReduce(F func, const std::vector<T> &args, R redfunc, unsigned nChunks)
559 {
560 return Reduce(Map(func, args, redfunc, nChunks), redfunc);
561 }
562
563 //////////////////////////////////////////////////////////////////////////
564 /// \copydoc ROOT::Internal::TExecutor::Reduce(const std::vector<T> &objs,R redfunc)
565 template<class T, class R>
566 auto TThreadExecutor::Reduce(const std::vector<T> &objs, R redfunc) -> decltype(redfunc(objs))
567 {
568 // check we can apply reduce to objs
569 static_assert(std::is_same<decltype(redfunc(objs)), T>::value, "redfunc does not have the correct signature");
570 return SeqReduce(objs, redfunc);
571 }
572
573 //////////////////////////////////////////////////////////////////////////
574 /// \brief "Reduce" an std::vector into a single object in parallel by passing a
575 /// binary function as the second argument defining the reduction operation.
576 ///
577 /// \param objs A vector of elements to combine.
578 /// \param redfunc Binary reduction function to combine the elements of the vector `objs`.
579 /// \return A value result of combining the vector elements into a single object of the same type.
580 template<class T, class BINARYOP>
581 auto TThreadExecutor::Reduce(const std::vector<T> &objs, BINARYOP redfunc) -> decltype(redfunc(objs.front(), objs.front()))
582 {
583 // check we can apply reduce to objs
584 static_assert(std::is_same<decltype(redfunc(objs.front(), objs.front())), T>::value, "redfunc does not have the correct signature");
585 return ParallelReduce(objs, redfunc);
586 }
587
588 //////////////////////////////////////////////////////////////////////////
589 /// \brief "Reduce", sequentially, an std::vector into a single object
590 ///
591 /// \param objs A vector of elements to combine.
592 /// \param redfunc Reduction function to combine the elements of the vector `objs`.
593 /// \return A value result of combining the vector elements into a single object of the same type.
594 template<class T, class R>
595 auto TThreadExecutor::SeqReduce(const std::vector<T> &objs, R redfunc) -> decltype(redfunc(objs))
596 {
597 return redfunc(objs);
598 }
599
600} // namespace ROOT
601
602#endif // R__USE_IMT
603#endif
#define b(i)
Definition RSha256.hxx:100
#define f(i)
Definition RSha256.hxx:104
#define a(i)
Definition RSha256.hxx:99
Option_t Option_t TPoint TPoint const char GetTextMagnitude GetFillStyle GetLineColor GetLineWidth GetMarkerStyle GetTextAlign GetTextColor GetTextSize void value
This class defines an interface to execute the same task multiple times, possibly in parallel and wit...
ROOT::TypeTraits::InvokeResult_t< F, Args... > InvokeResult_t
A pseudo container class which is a generator of indices.
Definition TSeq.hxx:67
iterator begin() const
Definition TSeq.hxx:172
T step() const
Definition TSeq.hxx:193
iterator end() const
Definition TSeq.hxx:175
This class provides a simple interface to execute the same task multiple times in parallel threads,...
auto SeqReduce(const std::vector< T > &objs, R redfunc) -> decltype(redfunc(objs))
"Reduce", sequentially, an std::vector into a single object
auto Map(F func, unsigned nTimes, R redfunc, unsigned nChunks) -> std::vector< InvokeResult_t< F > >
Execute a function nTimes in parallel, dividing the execution in nChunks and providing a result per c...
void ParallelFor(unsigned start, unsigned end, unsigned step, const std::function< void(unsigned int i)> &f)
Execute a function in parallel over the indices of a loop.
unsigned GetPoolSize() const
Returns the number of worker threads in the task arena.
auto MapReduce(F func, unsigned nTimes, R redfunc) -> InvokeResult_t< F >
Execute a function nTimes in parallel (Map) and accumulate the results into a single value (Reduce).
std::shared_ptr< ROOT::Internal::RTaskArenaWrapper > fTaskArenaW
Pointer to the TBB task arena wrapper.
auto Reduce(const std::vector< T > &objs, R redfunc) -> decltype(redfunc(objs))
"Reduce" an std::vector into a single object by passing a function as the second argument defining th...
void Foreach(F func, unsigned nTimes, unsigned nChunks=0)
Execute a function without arguments several times in parallel, dividing the execution in nChunks.
double ParallelReduce(const std::vector< double > &objs, const std::function< double(double a, double b)> &redfunc)
"Reduce" in parallel an std::vector<double> into a single double value
TThreadExecutor & operator=(const TThreadExecutor &)=delete
TThreadExecutor(const TThreadExecutor &)=delete
auto MapImpl(F func, unsigned nTimes) -> std::vector< InvokeResult_t< F > >
Execute a function without arguments several times in parallel.
#define F(x, y, z)
tbb::task_arena is an alias of tbb::interface7::task_arena, which doesn't allow to forward declare tb...