Logo ROOT  
Reference Guide
Loading...
Searching...
No Matches
TThreadExecutor.hxx
Go to the documentation of this file.
1// @(#)root/thread:$Id$
2// Author: Xavier Valls March 2016
3
4/*************************************************************************
5 * Copyright (C) 1995-2020, Rene Brun and Fons Rademakers. *
6 * All rights reserved. *
7 * *
8 * For the licensing terms see $ROOTSYS/LICENSE. *
9 * For the list of contributors see $ROOTSYS/README/CREDITS. *
10 *************************************************************************/
11
12#ifndef ROOT_TThreadExecutor
13#define ROOT_TThreadExecutor
14
15#include "RConfigure.h"
16
17// exclude in case ROOT does not have IMT support
18#ifndef R__USE_IMT
19// No need to error out for dictionaries.
20# if !defined(__ROOTCLING__) && !defined(G__DICTIONARY)
21# error "Cannot use ROOT::TThreadExecutor without defining R__USE_IMT."
22# endif
23#else
24
26#include "ROOT/TSeq.hxx"
27#include "ROOT/TypeTraits.hxx" // InvokeResult
28#include "RTaskArena.hxx"
29#include "TError.h"
30
31#include <functional> //std::function
32#include <initializer_list>
33#include <memory>
34#include <numeric> //std::accumulate
35#include <type_traits> //std::enable_if
36#include <utility> //std::move
37#include <vector>
38
39namespace ROOT {
40
41 class TThreadExecutor: public TExecutorCRTP<TThreadExecutor> {
43
44 public:
45
46 explicit TThreadExecutor(UInt_t nThreads = 0u);
47
50
51 // ForEach
52 //
53 template<class F>
54 void Foreach(F func, unsigned nTimes, unsigned nChunks = 0);
55 template<class F, class INTEGER>
56 void Foreach(F func, ROOT::TSeq<INTEGER> args, unsigned nChunks = 0);
57 template<class F, class T>
58 void Foreach(F func, std::initializer_list<T> args, unsigned nChunks = 0);
59 template<class F, class T>
60 void Foreach(F func, std::vector<T> &args, unsigned nChunks = 0);
61 template<class F, class T>
62 void Foreach(F func, const std::vector<T> &args, unsigned nChunks = 0);
63
64 // Map
65 //
67
68 // Extension of the Map interfaces with chunking, specific to this class
69 template <class F, class R, class Cond = validMapReturnCond<F>>
70 auto Map(F func, unsigned nTimes, R redfunc, unsigned nChunks) -> std::vector<InvokeResult_t<F>>;
71 template <class F, class INTEGER, class R, class Cond = validMapReturnCond<F, INTEGER>>
72 auto Map(F func, ROOT::TSeq<INTEGER> args, R redfunc, unsigned nChunks)
73 -> std::vector<InvokeResult_t<F, INTEGER>>;
74 template <class F, class T, class R, class Cond = validMapReturnCond<F, T>>
75 auto Map(F func, std::initializer_list<T> args, R redfunc, unsigned nChunks) -> std::vector<InvokeResult_t<F, T>>;
76 template <class F, class T, class R, class Cond = validMapReturnCond<F, T>>
77 auto Map(F func, std::vector<T> &args, R redfunc, unsigned nChunks) -> std::vector<InvokeResult_t<F, T>>;
78 template <class F, class T, class R, class Cond = validMapReturnCond<F, T>>
79 auto Map(F func, const std::vector<T> &args, R redfunc, unsigned nChunks) -> std::vector<InvokeResult_t<F, T>>;
80
81 // MapReduce
82 //
83 // We need to reimplement the MapReduce interfaces to allow for parallel reduction, defined in
84 // this class but not in the base class.
85 //
86 // the late return types also check at compile-time whether redfunc is compatible with func,
87 // other than checking that func is compatible with the type of arguments.
88 // a static_assert check in TThreadExecutor::Reduce is used to check that redfunc is compatible with the type returned by func
90 template <class F, class R, class Cond = validMapReturnCond<F>>
91 auto MapReduce(F func, unsigned nTimes, R redfunc) -> InvokeResult_t<F>;
92 template <class F, class R, class Cond = validMapReturnCond<F>>
93 auto MapReduce(F func, unsigned nTimes, R redfunc, unsigned nChunks) -> InvokeResult_t<F>;
94 template <class F, class INTEGER, class R, class Cond = validMapReturnCond<F, INTEGER>>
95 auto MapReduce(F func, ROOT::TSeq<INTEGER> args, R redfunc, unsigned nChunks) -> InvokeResult_t<F, INTEGER>;
96 template <class F, class T, class R, class Cond = validMapReturnCond<F, T>>
97 auto MapReduce(F func, std::initializer_list<T> args, R redfunc, unsigned nChunks) -> InvokeResult_t<F, T>;
98 template <class F, class T, class R, class Cond = validMapReturnCond<F, T>>
99 auto MapReduce(F func, std::vector<T> &args, R redfunc) -> InvokeResult_t<F, T>;
100 template <class F, class T, class R, class Cond = validMapReturnCond<F, T>>
101 auto MapReduce(F func, const std::vector<T> &args, R redfunc) -> InvokeResult_t<F, T>;
102 template <class F, class T, class R, class Cond = validMapReturnCond<F, T>>
103 auto MapReduce(F func, std::vector<T> &args, R redfunc, unsigned nChunks) -> InvokeResult_t<F, T>;
104 template <class F, class T, class R, class Cond = validMapReturnCond<F, T>>
105 auto MapReduce(F func, const std::vector<T> &args, R redfunc, unsigned nChunks) -> InvokeResult_t<F, T>;
106
108 template<class T, class R> auto Reduce(const std::vector<T> &objs, R redfunc) -> decltype(redfunc(objs));
109 template<class T, class BINARYOP> auto Reduce(const std::vector<T> &objs, BINARYOP redfunc) -> decltype(redfunc(objs.front(), objs.front()));
110
111 unsigned GetPoolSize() const;
112
113 private:
114 // Implementation of the Map functions declared in the parent class (TExecutorCRTP)
115 //
116 template <class F, class Cond = validMapReturnCond<F>>
117 auto MapImpl(F func, unsigned nTimes) -> std::vector<InvokeResult_t<F>>;
118 template <class F, class INTEGER, class Cond = validMapReturnCond<F, INTEGER>>
119 auto MapImpl(F func, ROOT::TSeq<INTEGER> args) -> std::vector<InvokeResult_t<F, INTEGER>>;
120 template <class F, class T, class Cond = validMapReturnCond<F, T>>
121 auto MapImpl(F func, std::vector<T> &args) -> std::vector<InvokeResult_t<F, T>>;
122 template <class F, class T, class Cond = validMapReturnCond<F, T>>
123 auto MapImpl(F func, const std::vector<T> &args) -> std::vector<InvokeResult_t<F, T>>;
124
125 // Functions that interface with the parallel library used as a backend
126 void ParallelFor(unsigned start, unsigned end, unsigned step, const std::function<void(unsigned int i)> &f);
127 double ParallelReduce(const std::vector<double> &objs, const std::function<double(double a, double b)> &redfunc);
128 float ParallelReduce(const std::vector<float> &objs, const std::function<float(float a, float b)> &redfunc);
129 template<class T, class R>
130 auto SeqReduce(const std::vector<T> &objs, R redfunc) -> decltype(redfunc(objs));
131
132 /// Pointer to the TBB task arena wrapper
133 std::shared_ptr<ROOT::Internal::RTaskArenaWrapper> fTaskArenaW = nullptr;
134 };
135
136 /************ TEMPLATE METHODS IMPLEMENTATION ******************/
137
138 //////////////////////////////////////////////////////////////////////////
139 /// \brief Execute a function without arguments several times in parallel, dividing the execution in nChunks.
140 ///
141 /// \param func Function to be executed.
142 /// \param nTimes Number of times function should be called.
143 /// \param nChunks Number of chunks to split the input data for processing.
144 template<class F>
145 void TThreadExecutor::Foreach(F func, unsigned nTimes, unsigned nChunks) {
146 if (nChunks == 0) {
147 ParallelFor(0U, nTimes, 1, [&](unsigned int){func();});
148 return;
149 }
150
151 unsigned step = (nTimes + nChunks - 1) / nChunks;
152 auto lambda = [&](unsigned int i)
153 {
154 for (unsigned j = 0; j < step && (i + j) < nTimes; j++) {
155 func();
156 }
157 };
158 ParallelFor(0U, nTimes, step, lambda);
159 }
160
161 //////////////////////////////////////////////////////////////////////////
162 /// \brief Execute a function in parallel over a sequence of indexes, dividing the execution in nChunks.
163 ///
164 /// \param func Function to be executed. Must take an element of the sequence passed assecond argument as a parameter.
165 /// \param args Sequence of indexes to execute `func` on.
166 /// \param nChunks Number of chunks to split the input data for processing.
167 template<class F, class INTEGER>
168 void TThreadExecutor::Foreach(F func, ROOT::TSeq<INTEGER> args, unsigned nChunks) {
169 if (nChunks == 0) {
170 ParallelFor(*args.begin(), *args.end(), args.step(), [&](unsigned int i){func(i);});
171 return;
172 }
173 unsigned start = *args.begin();
174 unsigned end = *args.end();
175 unsigned seqStep = args.step();
176 unsigned step = (end - start + nChunks - 1) / nChunks; //ceiling the division
177
178 auto lambda = [&](unsigned int i)
179 {
180 for (unsigned j = 0; j < step && (i + j) < end; j+=seqStep) {
181 func(i + j);
182 }
183 };
184 ParallelFor(start, end, step, lambda);
185 }
186
187 //////////////////////////////////////////////////////////////////////////
188 /// \brief Execute a function in parallel over the elements of an initializer_list, dividing the execution in nChunks.
189 ///
190 /// \param func Function to be executed on the elements of the initializer_list passed as second parameter.
191 /// \param args initializer_list for a vector to apply `func` on.
192 /// \param nChunks Number of chunks to split the input data for processing.
193 template<class F, class T>
194 void TThreadExecutor::Foreach(F func, std::initializer_list<T> args, unsigned nChunks) {
195 std::vector<T> vargs(std::move(args));
196 Foreach(func, vargs, nChunks);
197 }
198
199 //////////////////////////////////////////////////////////////////////////
200 /// \brief Execute a function in parallel over the elements of a vector, dividing the execution in nChunks.
201 ///
202 /// \param func Function to be executed on the elements of the vector passed as second parameter.
203 /// \param args Vector of elements passed as an argument to `func`.
204 /// \param nChunks Number of chunks to split the input data for processing.
205 template<class F, class T>
206 void TThreadExecutor::Foreach(F func, std::vector<T> &args, unsigned nChunks) {
207 unsigned int nToProcess = args.size();
208 if (nChunks == 0) {
209 ParallelFor(0U, nToProcess, 1, [&](unsigned int i){func(args[i]);});
210 return;
211 }
212
213 unsigned step = (nToProcess + nChunks - 1) / nChunks; //ceiling the division
214 auto lambda = [&](unsigned int i)
215 {
216 for (unsigned j = 0; j < step && (i + j) < nToProcess; j++) {
217 func(args[i + j]);
218 }
219 };
220 ParallelFor(0U, nToProcess, step, lambda);
221 }
222
223 //////////////////////////////////////////////////////////////////////////
224 /// \brief Execute a function in parallel over the elements of a immutable vector, dividing the execution in nChunks.
225 ///
226 /// \param func Function to be executed on the elements of the vector passed as second parameter.
227 /// \param args Immutable vector of elements passed as an argument to `func`.
228 /// \param nChunks Number of chunks to split the input data for processing.
229 template<class F, class T>
230 void TThreadExecutor::Foreach(F func, const std::vector<T> &args, unsigned nChunks) {
231 unsigned int nToProcess = args.size();
232 if (nChunks == 0) {
233 ParallelFor(0U, nToProcess, 1, [&](unsigned int i){func(args[i]);});
234 return;
235 }
236
237 unsigned step = (nToProcess + nChunks - 1) / nChunks; //ceiling the division
238 auto lambda = [&](unsigned int i)
239 {
240 for (unsigned j = 0; j < step && (i + j) < nToProcess; j++) {
241 func(args[i + j]);
242 }
243 };
244 ParallelFor(0U, nToProcess, step, lambda);
245 }
246
247 //////////////////////////////////////////////////////////////////////////
248 /// \brief Execute a function without arguments several times in parallel.
249 /// Implementation of the Map method.
250 ///
251 /// \copydetails TExecutorCRTP::Map(F func,unsigned nTimes)
252 template <class F, class Cond>
253 auto TThreadExecutor::MapImpl(F func, unsigned nTimes) -> std::vector<InvokeResult_t<F>>
254 {
255 using retType = decltype(func());
256 std::vector<retType> reslist(nTimes);
257 auto lambda = [&](unsigned int i)
258 {
259 reslist[i] = func();
260 };
261 ParallelFor(0U, nTimes, 1, lambda);
262
263 return reslist;
264 }
265
266 //////////////////////////////////////////////////////////////////////////
267 /// \brief Execute a function over a sequence of indexes in parallel.
268 /// Implementation of the Map method.
269 ///
270 /// \copydetails TExecutorCRTP::Map(F func,ROOT::TSeq<INTEGER> args)
271 template <class F, class INTEGER, class Cond>
272 auto TThreadExecutor::MapImpl(F func, ROOT::TSeq<INTEGER> args) -> std::vector<InvokeResult_t<F, INTEGER>>
273 {
274 using retType = decltype(func(*args.begin()));
275 std::vector<retType> reslist(args.size());
276 auto lambda = [&](unsigned int i) { reslist[i] = func(args[i]); };
277 ParallelFor(0U, args.size(), 1, lambda);
278
279 return reslist;
280 }
281
282 //////////////////////////////////////////////////////////////////////////
283 /// \brief Execute a function `nTimes` in parallel, dividing the execution in nChunks and
284 /// providing a result per chunk.
285 ///
286 /// \copydetails ROOT::Internal::TExecutor::Map(F func,unsigned nTimes,R redfunc,unsigned nChunks)
287 template <class F, class R, class Cond>
288 auto TThreadExecutor::Map(F func, unsigned nTimes, R redfunc, unsigned nChunks) -> std::vector<InvokeResult_t<F>>
289 {
290 if (nChunks == 0)
291 {
292 return Map(func, nTimes);
293 }
294
295 unsigned step = (nTimes + nChunks - 1) / nChunks;
296 // Avoid empty chunks
297 unsigned actualChunks = (nTimes + step - 1) / step;
298 using retType = decltype(func());
299 std::vector<retType> reslist(actualChunks);
300 auto lambda = [&](unsigned int i)
301 {
302 std::vector<retType> partialResults(std::min(nTimes-i, step));
303 for (unsigned j = 0; j < step && (i + j) < nTimes; j++) {
304 partialResults[j] = func();
305 }
306 reslist[i / step] = Reduce(partialResults, redfunc);
307 };
308 ParallelFor(0U, nTimes, step, lambda);
309
310 return reslist;
311 }
312
313 //////////////////////////////////////////////////////////////////////////
314 /// \brief Execute a function over the elements of a vector in parallel.
315 /// Implementation of the Map method.
316 ///
317 /// \copydetails TExecutorCRTP::Map(F func,std::vector<T> &args)
318 template <class F, class T, class Cond>
319 auto TThreadExecutor::MapImpl(F func, std::vector<T> &args) -> std::vector<InvokeResult_t<F, T>>
320 {
321 // //check whether func is callable
322 using retType = decltype(func(args.front()));
323
324 unsigned int nToProcess = args.size();
325 std::vector<retType> reslist(nToProcess);
326
327 auto lambda = [&](unsigned int i)
328 {
329 reslist[i] = func(args[i]);
330 };
331
332 ParallelFor(0U, nToProcess, 1, lambda);
333
334 return reslist;
335 }
336
337 //////////////////////////////////////////////////////////////////////////
338 /// \brief Execute a function over the elements of a vector in parallel.
339 /// Implementation of the Map method.
340 ///
341 /// \copydetails TExecutorCRTP::Map(F func,const std::vector<T> &args)
342 template <class F, class T, class Cond>
343 auto TThreadExecutor::MapImpl(F func, const std::vector<T> &args) -> std::vector<InvokeResult_t<F, T>>
344 {
345 // //check whether func is callable
346 using retType = decltype(func(args.front()));
347
348 unsigned int nToProcess = args.size();
349 std::vector<retType> reslist(nToProcess);
350
351 auto lambda = [&](unsigned int i)
352 {
353 reslist[i] = func(args[i]);
354 };
355
356 ParallelFor(0U, nToProcess, 1, lambda);
357
358 return reslist;
359 }
360
361 //////////////////////////////////////////////////////////////////////////
362 /// \brief Execute a function in parallel over the elements of a sequence, dividing the execution in nChunks and
363 /// providing a result per chunk.
364 ///
365 /// \copydetails ROOT::Internal::TExecutor::Map(F func,ROOT::TSeq<INTEGER> args,R redfunc,unsigned nChunks)
366 template <class F, class INTEGER, class R, class Cond>
367 auto TThreadExecutor::Map(F func, ROOT::TSeq<INTEGER> args, R redfunc, unsigned nChunks)
368 -> std::vector<InvokeResult_t<F, INTEGER>>
369 {
370 if (nChunks == 0)
371 {
372 return Map(func, args);
373 }
374
375 unsigned nToProcess = args.size();
376 unsigned step = (nToProcess + nChunks - 1) / nChunks; // ceiling the division
377 // Avoid empty chunks
378 unsigned actualChunks = (nToProcess + step - 1) / step;
379
380 using retType = decltype(func(*args.begin()));
381 std::vector<retType> reslist(actualChunks);
382 auto lambda = [&](unsigned int i) {
383 std::vector<retType> partialResults(std::min(step, nToProcess - i)); // last chunk might be smaller
384 for (unsigned j = 0; j < partialResults.size(); j++) {
385 partialResults[j] = func(args[i + j]);
386 }
387 reslist[i / step] = Reduce(partialResults, redfunc);
388 };
389
390 ParallelFor(0U, nToProcess, step, lambda);
391
392 return reslist;
393 }
394
395 //////////////////////////////////////////////////////////////////////////
396 /// \brief Execute a function in parallel over the elements of a vector, dividing the execution in nChunks and
397 /// providing a result per chunk.
398 ///
399 /// \copydetails ROOT::Internal::TExecutor::Map(F func,std::vector<T> &args,R redfunc,unsigned nChunks)
400 template <class F, class T, class R, class Cond>
401 auto TThreadExecutor::Map(F func, std::vector<T> &args, R redfunc, unsigned nChunks)
402 -> std::vector<InvokeResult_t<F, T>>
403 {
404 if (nChunks == 0)
405 {
406 return Map(func, args);
407 }
408
409 unsigned int nToProcess = args.size();
410 unsigned step = (nToProcess + nChunks - 1) / nChunks; //ceiling the division
411 // Avoid empty chunks
412 unsigned actualChunks = (nToProcess + step - 1) / step;
413
414 using retType = decltype(func(args.front()));
415 std::vector<retType> reslist(actualChunks);
416 auto lambda = [&](unsigned int i) {
417 std::vector<retType> partialResults(std::min(step, nToProcess - i));
418 for (unsigned j = 0; j < partialResults.size(); j++) {
419 partialResults[j] = func(args[i + j]);
420 }
421 reslist[i / step] = Reduce(partialResults, redfunc);
422 };
423
424 ParallelFor(0U, nToProcess, step, lambda);
425
426 return reslist;
427 }
428
429 //////////////////////////////////////////////////////////////////////////
430 /// \brief Execute a function in parallel over the elements of an immutable vector, dividing the execution in nChunks and
431 /// providing a result per chunk.
432 ///
433 /// \copydetails ROOT::Internal::TExecutor::Map(F func,const std::vector<T> &args,R redfunc,unsigned nChunks)
434 template <class F, class T, class R, class Cond>
435 auto TThreadExecutor::Map(F func, const std::vector<T> &args, R redfunc, unsigned nChunks)
436 -> std::vector<InvokeResult_t<F, T>>
437 {
438 if (nChunks == 0)
439 {
440 return Map(func, args);
441 }
442
443 unsigned int nToProcess = args.size();
444 unsigned step = (nToProcess + nChunks - 1) / nChunks; //ceiling the division
445 // Avoid empty chunks
446 unsigned actualChunks = (nToProcess + step - 1) / step;
447
448 using retType = decltype(func(args.front()));
449 std::vector<retType> reslist(actualChunks);
450 auto lambda = [&](unsigned int i) {
451 std::vector<retType> partialResults(std::min(step, nToProcess - i));
452 for (unsigned j = 0; j < partialResults.size(); j++) {
453 partialResults[j] = func(args[i + j]);
454 }
455 reslist[i / step] = Reduce(partialResults, redfunc);
456 };
457
458 ParallelFor(0U, nToProcess, step, lambda);
459
460 return reslist;
461 }
462
463 //////////////////////////////////////////////////////////////////////////
464 /// \brief Execute a function in parallel over the elements of an initializer_list, dividing the execution in nChunks and
465 /// providing a result per chunk.
466 ///
467 /// \copydetails ROOT::Internal::TExecutor::Map(F func,std::initializer_list<T> args,R redfunc,unsigned nChunks)
468 template <class F, class T, class R, class Cond>
469 auto TThreadExecutor::Map(F func, std::initializer_list<T> args, R redfunc, unsigned nChunks)
470 -> std::vector<InvokeResult_t<F, T>>
471 {
472 std::vector<T> vargs(std::move(args));
473 const auto &reslist = Map(func, vargs, redfunc, nChunks);
474 return reslist;
475 }
476
477 //////////////////////////////////////////////////////////////////////////
478 /// \brief Execute a function `nTimes` in parallel (Map) and accumulate the results into a single value (Reduce).
479 /// \copydetails ROOT::Internal::TExecutor::MapReduce(F func,unsigned nTimes,R redfunc)
480 template <class F, class R, class Cond>
481 auto TThreadExecutor::MapReduce(F func, unsigned nTimes, R redfunc) -> InvokeResult_t<F>
482 {
483 return Reduce(Map(func, nTimes), redfunc);
484 }
485
486 //////////////////////////////////////////////////////////////////////////
487 /// \brief Execute a function in parallel over the elements of a vector (Map) and accumulate the results into a single value (Reduce).
488 /// Benefits from partial reduction into `nChunks` intermediate results.
489 ///
490 /// \copydetails ROOT::Internal::TExecutor::MapReduce(F func,unsigned nTimes,R redfunc,unsigned nChunks)
491 template <class F, class R, class Cond>
492 auto TThreadExecutor::MapReduce(F func, unsigned nTimes, R redfunc, unsigned nChunks) -> InvokeResult_t<F>
493 {
494 return Reduce(Map(func, nTimes, redfunc, nChunks), redfunc);
495 }
496
497 //////////////////////////////////////////////////////////////////////////
498 /// \brief Execute a function in parallel over the elements of a vector (Map) and accumulate the results into a single value (Reduce).
499 /// Benefits from partial reduction into `nChunks` intermediate results.
500 ///
501 /// \copydetails ROOT::Internal::TExecutor::MapReduce(F func,ROOT::TSeq<INTEGER> args,R redfunc,unsigned nChunks)
502 template <class F, class INTEGER, class R, class Cond>
503 auto TThreadExecutor::MapReduce(F func, ROOT::TSeq<INTEGER> args, R redfunc, unsigned nChunks)
505 {
506 return Reduce(Map(func, args, redfunc, nChunks), redfunc);
507 }
508
509 //////////////////////////////////////////////////////////////////////////
510 /// \brief Execute a function in parallel over the elements of an initializer_list (Map) and accumulate the results into a single value (Reduce).
511 /// Benefits from partial reduction into `nChunks` intermediate results.
512 ///
513 /// \copydetails ROOT::Internal::TExecutor::MapReduce(F func,std::initializer_list<T> args,R redfunc,unsigned nChunks)
514 template <class F, class T, class R, class Cond>
515 auto TThreadExecutor::MapReduce(F func, std::initializer_list<T> args, R redfunc, unsigned nChunks)
517 {
518 return Reduce(Map(func, args, redfunc, nChunks), redfunc);
519 }
520
521 //////////////////////////////////////////////////////////////////////////
522 /// \brief Execute a function over the elements of a vector in parallel (Map) and accumulate the results into a single value (Reduce).
523 /// \copydetails ROOT::Internal::TExecutor::MapReduce(F func,std::vector<T> &args,R redfunc)
524 template <class F, class T, class R, class Cond>
525 auto TThreadExecutor::MapReduce(F func, std::vector<T> &args, R redfunc) -> InvokeResult_t<F, T>
526 {
527 return Reduce(Map(func, args), redfunc);
528 }
529
530 //////////////////////////////////////////////////////////////////////////
531 /// \brief Execute a function over the elements of an immutable vector in parallel (Map) and accumulate the results into a single value (Reduce).
532 /// \copydetails ROOT::Internal::TExecutor::MapReduce(F func,const std::vector<T> &args,R redfunc)
533 template <class F, class T, class R, class Cond>
534 auto TThreadExecutor::MapReduce(F func, const std::vector<T> &args, R redfunc) -> InvokeResult_t<F, T>
535 {
536 return Reduce(Map(func, args), redfunc);
537 }
538
539 //////////////////////////////////////////////////////////////////////////
540 /// \brief Execute a function in parallel over the elements of a vector (Map) and accumulate the results into a single value (Reduce).
541 /// Benefits from partial reduction into `nChunks` intermediate results.
542 ///
543 /// \copydetails ROOT::Internal::TExecutor::MapReduce(F func,std::vector<T> &args,R redfunc,unsigned nChunks)
544 template <class F, class T, class R, class Cond>
545 auto TThreadExecutor::MapReduce(F func, std::vector<T> &args, R redfunc, unsigned nChunks) -> InvokeResult_t<F, T>
546 {
547 return Reduce(Map(func, args, redfunc, nChunks), redfunc);
548 }
549
550 //////////////////////////////////////////////////////////////////////////
551 /// \brief Execute a function in parallel over the elements of an immutable vector (Map) and accumulate the results into a single value (Reduce).
552 /// Benefits from partial reduction into `nChunks` intermediate results.
553 ///
554 /// \copydetails ROOT::Internal::TExecutor::MapReduce(F func,const std::vector<T> &args,R redfunc,unsigned nChunks)
555 template <class F, class T, class R, class Cond>
556 auto TThreadExecutor::MapReduce(F func, const std::vector<T> &args, R redfunc, unsigned nChunks)
558 {
559 return Reduce(Map(func, args, redfunc, nChunks), redfunc);
560 }
561
562 //////////////////////////////////////////////////////////////////////////
563 /// \copydoc ROOT::Internal::TExecutor::Reduce(const std::vector<T> &objs,R redfunc)
564 template<class T, class R>
565 auto TThreadExecutor::Reduce(const std::vector<T> &objs, R redfunc) -> decltype(redfunc(objs))
566 {
567 // check we can apply reduce to objs
568 static_assert(std::is_same<decltype(redfunc(objs)), T>::value, "redfunc does not have the correct signature");
569 return SeqReduce(objs, redfunc);
570 }
571
572 //////////////////////////////////////////////////////////////////////////
573 /// \brief "Reduce" an std::vector into a single object in parallel by passing a
574 /// binary function as the second argument defining the reduction operation.
575 ///
576 /// \param objs A vector of elements to combine.
577 /// \param redfunc Binary reduction function to combine the elements of the vector `objs`.
578 /// \return A value result of combining the vector elements into a single object of the same type.
579 template<class T, class BINARYOP>
580 auto TThreadExecutor::Reduce(const std::vector<T> &objs, BINARYOP redfunc) -> decltype(redfunc(objs.front(), objs.front()))
581 {
582 // check we can apply reduce to objs
583 static_assert(std::is_same<decltype(redfunc(objs.front(), objs.front())), T>::value, "redfunc does not have the correct signature");
584 return ParallelReduce(objs, redfunc);
585 }
586
587 //////////////////////////////////////////////////////////////////////////
588 /// \brief "Reduce", sequentially, an std::vector into a single object
589 ///
590 /// \param objs A vector of elements to combine.
591 /// \param redfunc Reduction function to combine the elements of the vector `objs`.
592 /// \return A value result of combining the vector elements into a single object of the same type.
593 template<class T, class R>
594 auto TThreadExecutor::SeqReduce(const std::vector<T> &objs, R redfunc) -> decltype(redfunc(objs))
595 {
596 return redfunc(objs);
597 }
598
599} // namespace ROOT
600
601#endif // R__USE_IMT
602#endif
#define b(i)
Definition RSha256.hxx:100
#define f(i)
Definition RSha256.hxx:104
#define a(i)
Definition RSha256.hxx:99
start
Definition Rotated.cxx:223
unsigned int UInt_t
Unsigned integer 4 bytes (unsigned int).
Definition RtypesCore.h:60
ROOT::TypeTraits::InvokeResult_t< F, Args... > InvokeResult_t
A pseudo container class which is a generator of indices.
Definition TSeq.hxx:67
iterator begin() const
Definition TSeq.hxx:172
T step() const
Definition TSeq.hxx:193
iterator end() const
Definition TSeq.hxx:175
auto SeqReduce(const std::vector< T > &objs, R redfunc) -> decltype(redfunc(objs))
"Reduce", sequentially, an std::vector into a single object
auto Map(F func, unsigned nTimes, R redfunc, unsigned nChunks) -> std::vector< InvokeResult_t< F > >
Execute a function nTimes in parallel, dividing the execution in nChunks and providing a result per c...
void ParallelFor(unsigned start, unsigned end, unsigned step, const std::function< void(unsigned int i)> &f)
Execute a function in parallel over the indices of a loop.
unsigned GetPoolSize() const
Returns the number of worker threads in the task arena.
auto MapReduce(F func, unsigned nTimes, R redfunc) -> InvokeResult_t< F >
Execute a function nTimes in parallel (Map) and accumulate the results into a single value (Reduce).
std::shared_ptr< ROOT::Internal::RTaskArenaWrapper > fTaskArenaW
Pointer to the TBB task arena wrapper.
auto Reduce(const std::vector< T > &objs, R redfunc) -> decltype(redfunc(objs))
void Foreach(F func, unsigned nTimes, unsigned nChunks=0)
Execute a function without arguments several times in parallel, dividing the execution in nChunks.
TThreadExecutor(UInt_t nThreads=0u)
Class constructor.
double ParallelReduce(const std::vector< double > &objs, const std::function< double(double a, double b)> &redfunc)
"Reduce" in parallel an std::vector<double> into a single double value
TThreadExecutor & operator=(const TThreadExecutor &)=delete
TThreadExecutor(const TThreadExecutor &)=delete
auto MapImpl(F func, unsigned nTimes) -> std::vector< InvokeResult_t< F > >
Execute a function without arguments several times in parallel.
#define F(x, y, z)
namespace associated R package for ROOT.
Definition RExports.h:72