Logo ROOT  
Reference Guide
 
Loading...
Searching...
No Matches
TThreadExecutor.hxx
Go to the documentation of this file.
1// @(#)root/thread:$Id$
2// Author: Xavier Valls March 2016
3
4/*************************************************************************
5 * Copyright (C) 1995-2020, Rene Brun and Fons Rademakers. *
6 * All rights reserved. *
7 * *
8 * For the licensing terms see $ROOTSYS/LICENSE. *
9 * For the list of contributors see $ROOTSYS/README/CREDITS. *
10 *************************************************************************/
11
12#ifndef ROOT_TThreadExecutor
13#define ROOT_TThreadExecutor
14
15#include "RConfigure.h"
16
17// exclude in case ROOT does not have IMT support
18#ifndef R__USE_IMT
19// No need to error out for dictionaries.
20# if !defined(__ROOTCLING__) && !defined(G__DICTIONARY)
21# error "Cannot use ROOT::TThreadExecutor without defining R__USE_IMT."
22# endif
23#else
24
26#include "ROOT/TSeq.hxx"
27#include "ROOT/TypeTraits.hxx" // InvokeResult
28#include "RTaskArena.hxx"
29#include "TError.h"
30
31#include <functional> //std::function
32#include <initializer_list>
33#include <memory>
34#include <numeric> //std::accumulate
35#include <type_traits> //std::enable_if
36#include <utility> //std::move
37#include <vector>
38
39namespace ROOT {
40
41 class TThreadExecutor: public TExecutorCRTP<TThreadExecutor> {
43
44 public:
45
46 explicit TThreadExecutor(UInt_t nThreads = 0u);
47
50
51 // ForEach
52 //
53 template<class F>
54 void Foreach(F func, unsigned nTimes, unsigned nChunks = 0);
55 template<class F, class INTEGER>
56 void Foreach(F func, ROOT::TSeq<INTEGER> args, unsigned nChunks = 0);
57 template<class F, class T>
58 void Foreach(F func, std::initializer_list<T> args, unsigned nChunks = 0);
59 template<class F, class T>
60 void Foreach(F func, std::vector<T> &args, unsigned nChunks = 0);
61 template<class F, class T>
62 void Foreach(F func, const std::vector<T> &args, unsigned nChunks = 0);
63
64 // Map
65 //
67
68 // MapReduce
69 //
70 // We need to reimplement the MapReduce interfaces to allow for parallel reduction, defined in
71 // this class but not in the base class.
72 //
73 // the late return types also check at compile-time whether redfunc is compatible with func,
74 // other than checking that func is compatible with the type of arguments.
75 // a static_assert check in TThreadExecutor::Reduce is used to check that redfunc is compatible with the type returned by func
77 template <class F, class R, class Cond = validMapReturnCond<F>>
78 auto MapReduce(F func, unsigned nTimes, R redfunc) -> InvokeResult_t<F>;
79 template <class F, class R, class Cond = validMapReturnCond<F>>
80 auto MapReduce(F func, unsigned nTimes, R redfunc, unsigned nChunks) -> InvokeResult_t<F>;
81 template <class F, class INTEGER, class R, class Cond = validMapReturnCond<F, INTEGER>>
82 auto MapReduce(F func, ROOT::TSeq<INTEGER> args, R redfunc, unsigned nChunks) -> InvokeResult_t<F, INTEGER>;
83 template <class F, class T, class R, class Cond = validMapReturnCond<F, T>>
84 auto MapReduce(F func, std::initializer_list<T> args, R redfunc, unsigned nChunks) -> InvokeResult_t<F, T>;
85 template <class F, class T, class R, class Cond = validMapReturnCond<F, T>>
86 auto MapReduce(F func, std::vector<T> &args, R redfunc) -> InvokeResult_t<F, T>;
87 template <class F, class T, class R, class Cond = validMapReturnCond<F, T>>
88 auto MapReduce(F func, const std::vector<T> &args, R redfunc) -> InvokeResult_t<F, T>;
89 template <class F, class T, class R, class Cond = validMapReturnCond<F, T>>
90 auto MapReduce(F func, std::vector<T> &args, R redfunc, unsigned nChunks) -> InvokeResult_t<F, T>;
91 template <class F, class T, class R, class Cond = validMapReturnCond<F, T>>
92 auto MapReduce(F func, const std::vector<T> &args, R redfunc, unsigned nChunks) -> InvokeResult_t<F, T>;
93
95 template<class T, class R> auto Reduce(const std::vector<T> &objs, R redfunc) -> decltype(redfunc(objs));
96 template<class T, class BINARYOP> auto Reduce(const std::vector<T> &objs, BINARYOP redfunc) -> decltype(redfunc(objs.front(), objs.front()));
97
98 unsigned GetPoolSize() const;
99
100 private:
101 // Implementation of the Map functions declared in the parent class (TExecutorCRTP)
102 //
103 template <class F, class Cond = validMapReturnCond<F>>
104 auto MapImpl(F func, unsigned nTimes) -> std::vector<InvokeResult_t<F>>;
105 template <class F, class INTEGER, class Cond = validMapReturnCond<F, INTEGER>>
106 auto MapImpl(F func, ROOT::TSeq<INTEGER> args) -> std::vector<InvokeResult_t<F, INTEGER>>;
107 template <class F, class T, class Cond = validMapReturnCond<F, T>>
108 auto MapImpl(F func, std::vector<T> &args) -> std::vector<InvokeResult_t<F, T>>;
109 template <class F, class T, class Cond = validMapReturnCond<F, T>>
110 auto MapImpl(F func, const std::vector<T> &args) -> std::vector<InvokeResult_t<F, T>>;
111
112 // Extension of the Map interfaces with chunking, specific to this class and
113 // only available from a MapReduce call.
114 template <class F, class R, class Cond = validMapReturnCond<F>>
115 auto Map(F func, unsigned nTimes, R redfunc, unsigned nChunks) -> std::vector<InvokeResult_t<F>>;
116 template <class F, class INTEGER, class R, class Cond = validMapReturnCond<F, INTEGER>>
117 auto Map(F func, ROOT::TSeq<INTEGER> args, R redfunc, unsigned nChunks)
118 -> std::vector<InvokeResult_t<F, INTEGER>>;
119 template <class F, class T, class R, class Cond = validMapReturnCond<F, T>>
120 auto Map(F func, std::initializer_list<T> args, R redfunc, unsigned nChunks) -> std::vector<InvokeResult_t<F, T>>;
121 template <class F, class T, class R, class Cond = validMapReturnCond<F, T>>
122 auto Map(F func, std::vector<T> &args, R redfunc, unsigned nChunks) -> std::vector<InvokeResult_t<F, T>>;
123 template <class F, class T, class R, class Cond = validMapReturnCond<F, T>>
124 auto Map(F func, const std::vector<T> &args, R redfunc, unsigned nChunks) -> std::vector<InvokeResult_t<F, T>>;
125
126 // Functions that interface with the parallel library used as a backend
127 void ParallelFor(unsigned start, unsigned end, unsigned step, const std::function<void(unsigned int i)> &f);
128 double ParallelReduce(const std::vector<double> &objs, const std::function<double(double a, double b)> &redfunc);
129 float ParallelReduce(const std::vector<float> &objs, const std::function<float(float a, float b)> &redfunc);
130 template<class T, class R>
131 auto SeqReduce(const std::vector<T> &objs, R redfunc) -> decltype(redfunc(objs));
132
133 /// Pointer to the TBB task arena wrapper
134 std::shared_ptr<ROOT::Internal::RTaskArenaWrapper> fTaskArenaW = nullptr;
135 };
136
137 /************ TEMPLATE METHODS IMPLEMENTATION ******************/
138
139 //////////////////////////////////////////////////////////////////////////
140 /// \brief Execute a function without arguments several times in parallel, dividing the execution in nChunks.
141 ///
142 /// \param func Function to be executed.
143 /// \param nTimes Number of times function should be called.
144 /// \param nChunks Number of chunks to split the input data for processing.
145 template<class F>
146 void TThreadExecutor::Foreach(F func, unsigned nTimes, unsigned nChunks) {
147 if (nChunks == 0) {
148 ParallelFor(0U, nTimes, 1, [&](unsigned int){func();});
149 return;
150 }
151
152 unsigned step = (nTimes + nChunks - 1) / nChunks;
153 auto lambda = [&](unsigned int i)
154 {
155 for (unsigned j = 0; j < step && (i + j) < nTimes; j++) {
156 func();
157 }
158 };
159 ParallelFor(0U, nTimes, step, lambda);
160 }
161
162 //////////////////////////////////////////////////////////////////////////
163 /// \brief Execute a function in parallel over a sequence of indexes, dividing the execution in nChunks.
164 ///
165 /// \param func Function to be executed. Must take an element of the sequence passed assecond argument as a parameter.
166 /// \param args Sequence of indexes to execute `func` on.
167 /// \param nChunks Number of chunks to split the input data for processing.
168 template<class F, class INTEGER>
169 void TThreadExecutor::Foreach(F func, ROOT::TSeq<INTEGER> args, unsigned nChunks) {
170 if (nChunks == 0) {
171 ParallelFor(*args.begin(), *args.end(), args.step(), [&](unsigned int i){func(i);});
172 return;
173 }
174 unsigned start = *args.begin();
175 unsigned end = *args.end();
176 unsigned seqStep = args.step();
177 unsigned step = (end - start + nChunks - 1) / nChunks; //ceiling the division
178
179 auto lambda = [&](unsigned int i)
180 {
181 for (unsigned j = 0; j < step && (i + j) < end; j+=seqStep) {
182 func(i + j);
183 }
184 };
185 ParallelFor(start, end, step, lambda);
186 }
187
188 //////////////////////////////////////////////////////////////////////////
189 /// \brief Execute a function in parallel over the elements of an initializer_list, dividing the execution in nChunks.
190 ///
191 /// \param func Function to be executed on the elements of the initializer_list passed as second parameter.
192 /// \param args initializer_list for a vector to apply `func` on.
193 /// \param nChunks Number of chunks to split the input data for processing.
194 template<class F, class T>
195 void TThreadExecutor::Foreach(F func, std::initializer_list<T> args, unsigned nChunks) {
196 std::vector<T> vargs(std::move(args));
197 Foreach(func, vargs, nChunks);
198 }
199
200 //////////////////////////////////////////////////////////////////////////
201 /// \brief Execute a function in parallel over the elements of a vector, dividing the execution in nChunks.
202 ///
203 /// \param func Function to be executed on the elements of the vector passed as second parameter.
204 /// \param args Vector of elements passed as an argument to `func`.
205 /// \param nChunks Number of chunks to split the input data for processing.
206 template<class F, class T>
207 void TThreadExecutor::Foreach(F func, std::vector<T> &args, unsigned nChunks) {
208 unsigned int nToProcess = args.size();
209 if (nChunks == 0) {
210 ParallelFor(0U, nToProcess, 1, [&](unsigned int i){func(args[i]);});
211 return;
212 }
213
214 unsigned step = (nToProcess + nChunks - 1) / nChunks; //ceiling the division
215 auto lambda = [&](unsigned int i)
216 {
217 for (unsigned j = 0; j < step && (i + j) < nToProcess; j++) {
218 func(args[i + j]);
219 }
220 };
221 ParallelFor(0U, nToProcess, step, lambda);
222 }
223
224 //////////////////////////////////////////////////////////////////////////
225 /// \brief Execute a function in parallel over the elements of a immutable vector, dividing the execution in nChunks.
226 ///
227 /// \param func Function to be executed on the elements of the vector passed as second parameter.
228 /// \param args Immutable vector of elements passed as an argument to `func`.
229 /// \param nChunks Number of chunks to split the input data for processing.
230 template<class F, class T>
231 void TThreadExecutor::Foreach(F func, const std::vector<T> &args, unsigned nChunks) {
232 unsigned int nToProcess = args.size();
233 if (nChunks == 0) {
234 ParallelFor(0U, nToProcess, 1, [&](unsigned int i){func(args[i]);});
235 return;
236 }
237
238 unsigned step = (nToProcess + nChunks - 1) / nChunks; //ceiling the division
239 auto lambda = [&](unsigned int i)
240 {
241 for (unsigned j = 0; j < step && (i + j) < nToProcess; j++) {
242 func(args[i + j]);
243 }
244 };
245 ParallelFor(0U, nToProcess, step, lambda);
246 }
247
248 //////////////////////////////////////////////////////////////////////////
249 /// \brief Execute a function without arguments several times in parallel.
250 /// Implementation of the Map method.
251 ///
252 /// \copydetails TExecutorCRTP::Map(F func,unsigned nTimes)
253 template <class F, class Cond>
254 auto TThreadExecutor::MapImpl(F func, unsigned nTimes) -> std::vector<InvokeResult_t<F>>
255 {
256 using retType = decltype(func());
257 std::vector<retType> reslist(nTimes);
258 auto lambda = [&](unsigned int i)
259 {
260 reslist[i] = func();
261 };
262 ParallelFor(0U, nTimes, 1, lambda);
263
264 return reslist;
265 }
266
267 //////////////////////////////////////////////////////////////////////////
268 /// \brief Execute a function over a sequence of indexes in parallel.
269 /// Implementation of the Map method.
270 ///
271 /// \copydetails TExecutorCRTP::Map(F func,ROOT::TSeq<INTEGER> args)
272 template <class F, class INTEGER, class Cond>
273 auto TThreadExecutor::MapImpl(F func, ROOT::TSeq<INTEGER> args) -> std::vector<InvokeResult_t<F, INTEGER>>
274 {
275 using retType = decltype(func(*args.begin()));
276 std::vector<retType> reslist(args.size());
277 auto lambda = [&](unsigned int i) { reslist[i] = func(args[i]); };
278 ParallelFor(0U, args.size(), 1, lambda);
279
280 return reslist;
281 }
282
283 //////////////////////////////////////////////////////////////////////////
284 /// \brief Execute a function `nTimes` in parallel, dividing the execution in nChunks and
285 /// providing a result per chunk.
286 ///
287 /// \copydetails ROOT::Internal::TExecutor::Map(F func,unsigned nTimes,R redfunc,unsigned nChunks)
288 template <class F, class R, class Cond>
289 auto TThreadExecutor::Map(F func, unsigned nTimes, R redfunc, unsigned nChunks) -> std::vector<InvokeResult_t<F>>
290 {
291 if (nChunks == 0)
292 {
293 return Map(func, nTimes);
294 }
295
296 unsigned step = (nTimes + nChunks - 1) / nChunks;
297 // Avoid empty chunks
298 unsigned actualChunks = (nTimes + step - 1) / step;
299 using retType = decltype(func());
300 std::vector<retType> reslist(actualChunks);
301 auto lambda = [&](unsigned int i)
302 {
303 std::vector<retType> partialResults(std::min(nTimes-i, step));
304 for (unsigned j = 0; j < step && (i + j) < nTimes; j++) {
305 partialResults[j] = func();
306 }
307 reslist[i / step] = Reduce(partialResults, redfunc);
308 };
309 ParallelFor(0U, nTimes, step, lambda);
310
311 return reslist;
312 }
313
314 //////////////////////////////////////////////////////////////////////////
315 /// \brief Execute a function over the elements of a vector in parallel.
316 /// Implementation of the Map method.
317 ///
318 /// \copydetails TExecutorCRTP::Map(F func,std::vector<T> &args)
319 template <class F, class T, class Cond>
320 auto TThreadExecutor::MapImpl(F func, std::vector<T> &args) -> std::vector<InvokeResult_t<F, T>>
321 {
322 // //check whether func is callable
323 using retType = decltype(func(args.front()));
324
325 unsigned int nToProcess = args.size();
326 std::vector<retType> reslist(nToProcess);
327
328 auto lambda = [&](unsigned int i)
329 {
330 reslist[i] = func(args[i]);
331 };
332
333 ParallelFor(0U, nToProcess, 1, lambda);
334
335 return reslist;
336 }
337
338 //////////////////////////////////////////////////////////////////////////
339 /// \brief Execute a function over the elements of a vector in parallel.
340 /// Implementation of the Map method.
341 ///
342 /// \copydetails TExecutorCRTP::Map(F func,const std::vector<T> &args)
343 template <class F, class T, class Cond>
344 auto TThreadExecutor::MapImpl(F func, const std::vector<T> &args) -> std::vector<InvokeResult_t<F, T>>
345 {
346 // //check whether func is callable
347 using retType = decltype(func(args.front()));
348
349 unsigned int nToProcess = args.size();
350 std::vector<retType> reslist(nToProcess);
351
352 auto lambda = [&](unsigned int i)
353 {
354 reslist[i] = func(args[i]);
355 };
356
357 ParallelFor(0U, nToProcess, 1, lambda);
358
359 return reslist;
360 }
361
362 //////////////////////////////////////////////////////////////////////////
363 /// \brief Execute a function in parallel over the elements of a sequence, dividing the execution in nChunks and
364 /// providing a result per chunk.
365 ///
366 /// \copydetails ROOT::Internal::TExecutor::Map(F func,ROOT::TSeq<INTEGER> args,R redfunc,unsigned nChunks)
367 template <class F, class INTEGER, class R, class Cond>
368 auto TThreadExecutor::Map(F func, ROOT::TSeq<INTEGER> args, R redfunc, unsigned nChunks)
369 -> std::vector<InvokeResult_t<F, INTEGER>>
370 {
371 if (nChunks == 0)
372 {
373 return Map(func, args);
374 }
375
376 unsigned nToProcess = args.size();
377 unsigned step = (nToProcess + nChunks - 1) / nChunks; // ceiling the division
378 // Avoid empty chunks
379 unsigned actualChunks = (nToProcess + step - 1) / step;
380
381 using retType = decltype(func(*args.begin()));
382 std::vector<retType> reslist(actualChunks);
383 auto lambda = [&](unsigned int i) {
384 std::vector<retType> partialResults(std::min(step, nToProcess - i)); // last chunk might be smaller
385 for (unsigned j = 0; j < partialResults.size(); j++) {
386 partialResults[j] = func(args[i + j]);
387 }
388 reslist[i / step] = Reduce(partialResults, redfunc);
389 };
390
391 ParallelFor(0U, nToProcess, step, lambda);
392
393 return reslist;
394 }
395
396 //////////////////////////////////////////////////////////////////////////
397 /// \brief Execute a function in parallel over the elements of a vector, dividing the execution in nChunks and
398 /// providing a result per chunk.
399 ///
400 /// \copydetails ROOT::Internal::TExecutor::Map(F func,std::vector<T> &args,R redfunc,unsigned nChunks)
401 template <class F, class T, class R, class Cond>
402 auto TThreadExecutor::Map(F func, std::vector<T> &args, R redfunc, unsigned nChunks)
403 -> std::vector<InvokeResult_t<F, T>>
404 {
405 if (nChunks == 0)
406 {
407 return Map(func, args);
408 }
409
410 unsigned int nToProcess = args.size();
411 unsigned step = (nToProcess + nChunks - 1) / nChunks; //ceiling the division
412 // Avoid empty chunks
413 unsigned actualChunks = (nToProcess + step - 1) / step;
414
415 using retType = decltype(func(args.front()));
416 std::vector<retType> reslist(actualChunks);
417 auto lambda = [&](unsigned int i) {
418 std::vector<retType> partialResults(std::min(step, nToProcess - i));
419 for (unsigned j = 0; j < partialResults.size(); j++) {
420 partialResults[j] = func(args[i + j]);
421 }
422 reslist[i / step] = Reduce(partialResults, redfunc);
423 };
424
425 ParallelFor(0U, nToProcess, step, lambda);
426
427 return reslist;
428 }
429
430 //////////////////////////////////////////////////////////////////////////
431 /// \brief Execute a function in parallel over the elements of an immutable vector, dividing the execution in nChunks and
432 /// providing a result per chunk.
433 ///
434 /// \copydetails ROOT::Internal::TExecutor::Map(F func,const std::vector<T> &args,R redfunc,unsigned nChunks)
435 template <class F, class T, class R, class Cond>
436 auto TThreadExecutor::Map(F func, const std::vector<T> &args, R redfunc, unsigned nChunks)
437 -> std::vector<InvokeResult_t<F, T>>
438 {
439 if (nChunks == 0)
440 {
441 return Map(func, args);
442 }
443
444 unsigned int nToProcess = args.size();
445 unsigned step = (nToProcess + nChunks - 1) / nChunks; //ceiling the division
446 // Avoid empty chunks
447 unsigned actualChunks = (nToProcess + step - 1) / step;
448
449 using retType = decltype(func(args.front()));
450 std::vector<retType> reslist(actualChunks);
451 auto lambda = [&](unsigned int i) {
452 std::vector<retType> partialResults(std::min(step, nToProcess - i));
453 for (unsigned j = 0; j < partialResults.size(); j++) {
454 partialResults[j] = func(args[i + j]);
455 }
456 reslist[i / step] = Reduce(partialResults, redfunc);
457 };
458
459 ParallelFor(0U, nToProcess, step, lambda);
460
461 return reslist;
462 }
463
464 //////////////////////////////////////////////////////////////////////////
465 /// \brief Execute a function in parallel over the elements of an initializer_list, dividing the execution in nChunks and
466 /// providing a result per chunk.
467 ///
468 /// \copydetails ROOT::Internal::TExecutor::Map(F func,std::initializer_list<T> args,R redfunc,unsigned nChunks)
469 template <class F, class T, class R, class Cond>
470 auto TThreadExecutor::Map(F func, std::initializer_list<T> args, R redfunc, unsigned nChunks)
471 -> std::vector<InvokeResult_t<F, T>>
472 {
473 std::vector<T> vargs(std::move(args));
474 const auto &reslist = Map(func, vargs, redfunc, nChunks);
475 return reslist;
476 }
477
478 //////////////////////////////////////////////////////////////////////////
479 /// \brief Execute a function `nTimes` in parallel (Map) and accumulate the results into a single value (Reduce).
480 /// \copydetails ROOT::Internal::TExecutor::MapReduce(F func,unsigned nTimes,R redfunc)
481 template <class F, class R, class Cond>
482 auto TThreadExecutor::MapReduce(F func, unsigned nTimes, R redfunc) -> InvokeResult_t<F>
483 {
484 return Reduce(Map(func, nTimes), redfunc);
485 }
486
487 //////////////////////////////////////////////////////////////////////////
488 /// \brief Execute a function in parallel over the elements of a vector (Map) and accumulate the results into a single value (Reduce).
489 /// Benefits from partial reduction into `nChunks` intermediate results.
490 ///
491 /// \copydetails ROOT::Internal::TExecutor::MapReduce(F func,unsigned nTimes,R redfunc,unsigned nChunks)
492 template <class F, class R, class Cond>
493 auto TThreadExecutor::MapReduce(F func, unsigned nTimes, R redfunc, unsigned nChunks) -> InvokeResult_t<F>
494 {
495 return Reduce(Map(func, nTimes, redfunc, nChunks), redfunc);
496 }
497
498 //////////////////////////////////////////////////////////////////////////
499 /// \brief Execute a function in parallel over the elements of a vector (Map) and accumulate the results into a single value (Reduce).
500 /// Benefits from partial reduction into `nChunks` intermediate results.
501 ///
502 /// \copydetails ROOT::Internal::TExecutor::MapReduce(F func,ROOT::TSeq<INTEGER> args,R redfunc,unsigned nChunks)
503 template <class F, class INTEGER, class R, class Cond>
504 auto TThreadExecutor::MapReduce(F func, ROOT::TSeq<INTEGER> args, R redfunc, unsigned nChunks)
506 {
507 return Reduce(Map(func, args, redfunc, nChunks), redfunc);
508 }
509
510 //////////////////////////////////////////////////////////////////////////
511 /// \brief Execute a function in parallel over the elements of an initializer_list (Map) and accumulate the results into a single value (Reduce).
512 /// Benefits from partial reduction into `nChunks` intermediate results.
513 ///
514 /// \copydetails ROOT::Internal::TExecutor::MapReduce(F func,std::initializer_list<T> args,R redfunc,unsigned nChunks)
515 template <class F, class T, class R, class Cond>
516 auto TThreadExecutor::MapReduce(F func, std::initializer_list<T> args, R redfunc, unsigned nChunks)
518 {
519 return Reduce(Map(func, args, redfunc, nChunks), redfunc);
520 }
521
522 //////////////////////////////////////////////////////////////////////////
523 /// \brief Execute a function over the elements of a vector in parallel (Map) and accumulate the results into a single value (Reduce).
524 /// \copydetails ROOT::Internal::TExecutor::MapReduce(F func,std::vector<T> &args,R redfunc)
525 template <class F, class T, class R, class Cond>
526 auto TThreadExecutor::MapReduce(F func, std::vector<T> &args, R redfunc) -> InvokeResult_t<F, T>
527 {
528 return Reduce(Map(func, args), redfunc);
529 }
530
531 //////////////////////////////////////////////////////////////////////////
532 /// \brief Execute a function over the elements of an immutable vector in parallel (Map) and accumulate the results into a single value (Reduce).
533 /// \copydetails ROOT::Internal::TExecutor::MapReduce(F func,const std::vector<T> &args,R redfunc)
534 template <class F, class T, class R, class Cond>
535 auto TThreadExecutor::MapReduce(F func, const std::vector<T> &args, R redfunc) -> InvokeResult_t<F, T>
536 {
537 return Reduce(Map(func, args), redfunc);
538 }
539
540 //////////////////////////////////////////////////////////////////////////
541 /// \brief Execute a function in parallel over the elements of a vector (Map) and accumulate the results into a single value (Reduce).
542 /// Benefits from partial reduction into `nChunks` intermediate results.
543 ///
544 /// \copydetails ROOT::Internal::TExecutor::MapReduce(F func,std::vector<T> &args,R redfunc,unsigned nChunks)
545 template <class F, class T, class R, class Cond>
546 auto TThreadExecutor::MapReduce(F func, std::vector<T> &args, R redfunc, unsigned nChunks) -> InvokeResult_t<F, T>
547 {
548 return Reduce(Map(func, args, redfunc, nChunks), redfunc);
549 }
550
551 //////////////////////////////////////////////////////////////////////////
552 /// \brief Execute a function in parallel over the elements of an immutable vector (Map) and accumulate the results into a single value (Reduce).
553 /// Benefits from partial reduction into `nChunks` intermediate results.
554 ///
555 /// \copydetails ROOT::Internal::TExecutor::MapReduce(F func,const std::vector<T> &args,R redfunc,unsigned nChunks)
556 template <class F, class T, class R, class Cond>
557 auto TThreadExecutor::MapReduce(F func, const std::vector<T> &args, R redfunc, unsigned nChunks)
559 {
560 return Reduce(Map(func, args, redfunc, nChunks), redfunc);
561 }
562
563 //////////////////////////////////////////////////////////////////////////
564 /// \copydoc ROOT::Internal::TExecutor::Reduce(const std::vector<T> &objs,R redfunc)
565 template<class T, class R>
566 auto TThreadExecutor::Reduce(const std::vector<T> &objs, R redfunc) -> decltype(redfunc(objs))
567 {
568 // check we can apply reduce to objs
569 static_assert(std::is_same<decltype(redfunc(objs)), T>::value, "redfunc does not have the correct signature");
570 return SeqReduce(objs, redfunc);
571 }
572
573 //////////////////////////////////////////////////////////////////////////
574 /// \brief "Reduce" an std::vector into a single object in parallel by passing a
575 /// binary function as the second argument defining the reduction operation.
576 ///
577 /// \param objs A vector of elements to combine.
578 /// \param redfunc Binary reduction function to combine the elements of the vector `objs`.
579 /// \return A value result of combining the vector elements into a single object of the same type.
580 template<class T, class BINARYOP>
581 auto TThreadExecutor::Reduce(const std::vector<T> &objs, BINARYOP redfunc) -> decltype(redfunc(objs.front(), objs.front()))
582 {
583 // check we can apply reduce to objs
584 static_assert(std::is_same<decltype(redfunc(objs.front(), objs.front())), T>::value, "redfunc does not have the correct signature");
585 return ParallelReduce(objs, redfunc);
586 }
587
588 //////////////////////////////////////////////////////////////////////////
589 /// \brief "Reduce", sequentially, an std::vector into a single object
590 ///
591 /// \param objs A vector of elements to combine.
592 /// \param redfunc Reduction function to combine the elements of the vector `objs`.
593 /// \return A value result of combining the vector elements into a single object of the same type.
594 template<class T, class R>
595 auto TThreadExecutor::SeqReduce(const std::vector<T> &objs, R redfunc) -> decltype(redfunc(objs))
596 {
597 return redfunc(objs);
598 }
599
600} // namespace ROOT
601
602#endif // R__USE_IMT
603#endif
#define b(i)
Definition RSha256.hxx:100
#define f(i)
Definition RSha256.hxx:104
#define a(i)
Definition RSha256.hxx:99
unsigned int UInt_t
Definition RtypesCore.h:46
Option_t Option_t TPoint TPoint const char GetTextMagnitude GetFillStyle GetLineColor GetLineWidth GetMarkerStyle GetTextAlign GetTextColor GetTextSize void value
Int_t i
ROOT::TypeTraits::InvokeResult_t< F, Args... > InvokeResult_t
A pseudo container class which is a generator of indices.
Definition TSeq.hxx:67
iterator begin() const
Definition TSeq.hxx:172
T step() const
Definition TSeq.hxx:193
iterator end() const
Definition TSeq.hxx:175
auto SeqReduce(const std::vector< T > &objs, R redfunc) -> decltype(redfunc(objs))
"Reduce", sequentially, an std::vector into a single object
auto Map(F func, unsigned nTimes, R redfunc, unsigned nChunks) -> std::vector< InvokeResult_t< F > >
Execute a function nTimes in parallel, dividing the execution in nChunks and providing a result per c...
void ParallelFor(unsigned start, unsigned end, unsigned step, const std::function< void(unsigned int i)> &f)
Execute a function in parallel over the indices of a loop.
unsigned GetPoolSize() const
Returns the number of worker threads in the task arena.
auto MapReduce(F func, unsigned nTimes, R redfunc) -> InvokeResult_t< F >
Execute a function nTimes in parallel (Map) and accumulate the results into a single value (Reduce).
std::shared_ptr< ROOT::Internal::RTaskArenaWrapper > fTaskArenaW
Pointer to the TBB task arena wrapper.
auto Reduce(const std::vector< T > &objs, R redfunc) -> decltype(redfunc(objs))
"Reduce" an std::vector into a single object by passing a function as the second argument defining th...
void Foreach(F func, unsigned nTimes, unsigned nChunks=0)
Execute a function without arguments several times in parallel, dividing the execution in nChunks.
TThreadExecutor(UInt_t nThreads=0u)
Class constructor.
double ParallelReduce(const std::vector< double > &objs, const std::function< double(double a, double b)> &redfunc)
"Reduce" in parallel an std::vector<double> into a single double value
TThreadExecutor & operator=(const TThreadExecutor &)=delete
TThreadExecutor(const TThreadExecutor &)=delete
auto MapImpl(F func, unsigned nTimes) -> std::vector< InvokeResult_t< F > >
Execute a function without arguments several times in parallel.
#define F(x, y, z)
namespace associated R package for ROOT.
Definition RExports.h:71
tbb::task_arena is an alias of tbb::interface7::task_arena, which doesn't allow to forward declare tb...