Logo ROOT   6.14/05
Reference Guide
TThreadExecutor.hxx
Go to the documentation of this file.
1 // @(#)root/thread:$Id$
2 // Author: Xavier Valls March 2016
3 
4 /*************************************************************************
5  * Copyright (C) 1995-2006, Rene Brun and Fons Rademakers. *
6  * All rights reserved. *
7  * *
8  * For the licensing terms see $ROOTSYS/LICENSE. *
9  * For the list of contributors see $ROOTSYS/README/CREDITS. *
10  *************************************************************************/
11 
12 #ifndef ROOT_TThreadExecutor
13 #define ROOT_TThreadExecutor
14 
15 #include "RConfigure.h"
16 
17 // exclude in case ROOT does not have IMT support
18 #ifndef R__USE_IMT
19 // No need to error out for dictionaries.
20 # if !defined(__ROOTCLING__) && !defined(G__DICTIONARY)
21 # error "Cannot use ROOT::TThreadExecutor without defining R__USE_IMT."
22 # endif
23 #else
24 
25 #include "ROOT/TExecutor.hxx"
26 #include "ROOT/TPoolManager.hxx"
27 #include "TROOT.h"
28 #include "TError.h"
29 #include <functional>
30 #include <memory>
31 #include <numeric>
32 
33 namespace ROOT {
34 
35  class TThreadExecutor: public TExecutor<TThreadExecutor> {
36  public:
37  explicit TThreadExecutor();
38 
39  explicit TThreadExecutor(UInt_t nThreads);
40 
41  TThreadExecutor(TThreadExecutor &) = delete;
43 
44  template<class F>
45  void Foreach(F func, unsigned nTimes);
46  template<class F, class INTEGER>
47  void Foreach(F func, ROOT::TSeq<INTEGER> args);
48  /// \cond
49  template<class F, class T>
50  void Foreach(F func, std::initializer_list<T> args);
51  /// \endcond
52  template<class F, class T>
53  void Foreach(F func, std::vector<T> &args);
54  template<class F, class T>
55  void Foreach(F func, const std::vector<T> &args);
56 
58  template<class F, class Cond = noReferenceCond<F>>
59  auto Map(F func, unsigned nTimes) -> std::vector<typename std::result_of<F()>::type>;
60  template<class F, class INTEGER, class Cond = noReferenceCond<F, INTEGER>>
62  template<class F, class T, class Cond = noReferenceCond<F, T>>
63  auto Map(F func, std::vector<T> &args) -> std::vector<typename std::result_of<F(T)>::type>;
64 
65  // // MapReduce
66  // // the late return types also check at compile-time whether redfunc is compatible with func,
67  // // other than checking that func is compatible with the type of arguments.
68  // // a static_assert check in TThreadExecutor::Reduce is used to check that redfunc is compatible with the type returned by func
70  template<class F, class R, class Cond = noReferenceCond<F>>
71  auto MapReduce(F func, unsigned nTimes, R redfunc) -> typename std::result_of<F()>::type;
72  template<class F, class R, class Cond = noReferenceCond<F>>
73  auto MapReduce(F func, unsigned nTimes, R redfunc, unsigned nChunks) -> typename std::result_of<F()>::type;
74  template<class F, class INTEGER, class R, class Cond = noReferenceCond<F, INTEGER>>
75  auto MapReduce(F func, ROOT::TSeq<INTEGER> args, R redfunc, unsigned nChunks) -> typename std::result_of<F(INTEGER)>::type;
76  /// \cond
77  template<class F, class T, class R, class Cond = noReferenceCond<F, T>>
78  auto MapReduce(F func, std::initializer_list<T> args, R redfunc, unsigned nChunks) -> typename std::result_of<F(T)>::type;
79  /// \endcond
80  template<class F, class T, class R, class Cond = noReferenceCond<F, T>>
81  auto MapReduce(F func, std::vector<T> &args, R redfunc) -> typename std::result_of<F(T)>::type;
82  template<class F, class T, class R, class Cond = noReferenceCond<F, T>>
83  auto MapReduce(F func, std::vector<T> &args, R redfunc, unsigned nChunks) -> typename std::result_of<F(T)>::type;
84 
86  template<class T, class BINARYOP> auto Reduce(const std::vector<T> &objs, BINARYOP redfunc) -> decltype(redfunc(objs.front(), objs.front()));
87  template<class T, class R> auto Reduce(const std::vector<T> &objs, R redfunc) -> decltype(redfunc(objs));
88 
89  protected:
90  template<class F, class R, class Cond = noReferenceCond<F>>
91  auto Map(F func, unsigned nTimes, R redfunc, unsigned nChunks) -> std::vector<typename std::result_of<F()>::type>;
92  template<class F, class INTEGER, class R, class Cond = noReferenceCond<F, INTEGER>>
93  auto Map(F func, ROOT::TSeq<INTEGER> args, R redfunc, unsigned nChunks) -> std::vector<typename std::result_of<F(INTEGER)>::type>;
94  template<class F, class T, class R, class Cond = noReferenceCond<F, T>>
95  auto Map(F func, std::vector<T> &args, R redfunc, unsigned nChunks) -> std::vector<typename std::result_of<F(T)>::type>;
96  template<class F, class T, class R, class Cond = noReferenceCond<F, T>>
97  auto Map(F func, std::initializer_list<T> args, R redfunc, unsigned nChunks) -> std::vector<typename std::result_of<F(T)>::type>;
98 
99  private:
100  void ParallelFor(unsigned start, unsigned end, unsigned step, const std::function<void(unsigned int i)> &f);
101  double ParallelReduce(const std::vector<double> &objs, const std::function<double(double a, double b)> &redfunc);
102  float ParallelReduce(const std::vector<float> &objs, const std::function<float(float a, float b)> &redfunc);
103  template<class T, class R>
104  auto SeqReduce(const std::vector<T> &objs, R redfunc) -> decltype(redfunc(objs));
105 
106  std::shared_ptr<ROOT::Internal::TPoolManager> fSched = nullptr;
107  };
108 
109  /************ TEMPLATE METHODS IMPLEMENTATION ******************/
110 
111  //////////////////////////////////////////////////////////////////////////
112  /// Execute func (with no arguments) nTimes in parallel.
113  /// Functions that take more than zero arguments can be executed (with
114  /// fixed arguments) by wrapping them in a lambda or with std::bind.
115  template<class F>
116  void TThreadExecutor::Foreach(F func, unsigned nTimes) {
117  ParallelFor(0U, nTimes, 1, [&](unsigned int){func();});
118  }
119 
120  //////////////////////////////////////////////////////////////////////////
121  /// Execute func in parallel, taking an element of a
122  /// sequence as argument.
123  template<class F, class INTEGER>
125  ParallelFor(*args.begin(), *args.end(), args.step(), [&](unsigned int i){func(i);});
126  }
127 
128  /// \cond
129  //////////////////////////////////////////////////////////////////////////
130  /// Execute func in parallel, taking an element of a
131  /// initializer_list as argument.
132  template<class F, class T>
133  void TThreadExecutor::Foreach(F func, std::initializer_list<T> args) {
134  std::vector<T> vargs(std::move(args));
135  Foreach(func, vargs);
136  }
137  /// \endcond
138 
139  //////////////////////////////////////////////////////////////////////////
140  /// Execute func in parallel, taking an element of an
141  /// std::vector as argument.
142  template<class F, class T>
143  void TThreadExecutor::Foreach(F func, std::vector<T> &args) {
144  unsigned int nToProcess = args.size();
145  ParallelFor(0U, nToProcess, 1, [&](unsigned int i){func(args[i]);});
146  }
147 
148  //////////////////////////////////////////////////////////////////////////
149  /// Execute func in parallel, taking an element of a std::vector as argument.
150  template<class F, class T>
151  void TThreadExecutor::Foreach(F func, const std::vector<T> &args) {
152  unsigned int nToProcess = args.size();
153  ParallelFor(0U, nToProcess, 1, [&](unsigned int i){func(args[i]);});
154  }
155 
156  //////////////////////////////////////////////////////////////////////////
157  /// Execute func (with no arguments) nTimes in parallel.
158  /// A vector containg executions' results is returned.
159  /// Functions that take more than zero arguments can be executed (with
160  /// fixed arguments) by wrapping them in a lambda or with std::bind.
161  template<class F, class Cond>
163  using retType = decltype(func());
164  std::vector<retType> reslist(nTimes);
165  auto lambda = [&](unsigned int i)
166  {
167  reslist[i] = func();
168  };
169  ParallelFor(0U, nTimes, 1, lambda);
170 
171  return reslist;
172  }
173 
174  //////////////////////////////////////////////////////////////////////////
175  /// Execute func in parallel, taking an element of a
176  /// sequence as argument.
177  /// A vector containg executions' results is returned.
178  template<class F, class INTEGER, class Cond>
180  unsigned start = *args.begin();
181  unsigned end = *args.end();
182  unsigned seqStep = args.step();
183 
184  using retType = decltype(func(start));
185  std::vector<retType> reslist(end - start);
186  auto lambda = [&](unsigned int i)
187  {
188  reslist[i] = func(i);
189  };
190  ParallelFor(start, end, seqStep, lambda);
191 
192  return reslist;
193  }
194 
195  //////////////////////////////////////////////////////////////////////////
196  /// Execute func (with no arguments) nTimes in parallel.
197  /// Divides and groups the executions in nChunks (if it doesn't make sense will reduce the number of chunks) with partial reduction;
198  /// A vector containg partial reductions' results is returned.
199  template<class F, class R, class Cond>
200  auto TThreadExecutor::Map(F func, unsigned nTimes, R redfunc, unsigned nChunks) -> std::vector<typename std::result_of<F()>::type> {
201  if (nChunks == 0)
202  {
203  return Map(func, nTimes);
204  }
205 
206  unsigned step = (nTimes + nChunks - 1) / nChunks;
207  // Avoid empty chunks
208  unsigned actualChunks = (nTimes + step - 1) / step;
209  using retType = decltype(func());
210  std::vector<retType> reslist(actualChunks);
211  auto lambda = [&](unsigned int i)
212  {
213  std::vector<retType> partialResults(std::min(nTimes-i, step));
214  for (unsigned j = 0; j < step && (i + j) < nTimes; j++) {
215  partialResults[j] = func();
216  }
217  reslist[i / step] = redfunc(partialResults);
218  };
219  ParallelFor(0U, nTimes, step, lambda);
220 
221  return reslist;
222  }
223 
224  //////////////////////////////////////////////////////////////////////////
225  /// Execute func in parallel, taking an element of an
226  /// std::vector as argument.
227  /// A vector containg executions' results is returned.
228  // actual implementation of the Map method. all other calls with arguments eventually
229  // call this one
230  template<class F, class T, class Cond>
232  // //check whether func is callable
233  using retType = decltype(func(args.front()));
234 
235  unsigned int nToProcess = args.size();
236  std::vector<retType> reslist(nToProcess);
237 
238  auto lambda = [&](unsigned int i)
239  {
240  reslist[i] = func(args[i]);
241  };
242 
243  ParallelFor(0U, nToProcess, 1, lambda);
244 
245  return reslist;
246  }
247 
248  //////////////////////////////////////////////////////////////////////////
249  /// Execute func in parallel, taking an element of a
250  /// sequence as argument.
251  /// Divides and groups the executions in nChunks (if it doesn't make sense will reduce the number of chunks) with partial reduction\n
252  /// A vector containg partial reductions' results is returned.
253  template<class F, class INTEGER, class R, class Cond>
255  if (nChunks == 0)
256  {
257  return Map(func, args);
258  }
259 
260  unsigned start = *args.begin();
261  unsigned end = *args.end();
262  unsigned seqStep = args.step();
263  unsigned step = (end - start + nChunks - 1) / nChunks; //ceiling the division
264  // Avoid empty chunks
265  unsigned actualChunks = (end - start + step - 1) / step;
266 
267  using retType = decltype(func(start));
268  std::vector<retType> reslist(actualChunks);
269  auto lambda = [&](unsigned int i)
270  {
271  std::vector<retType> partialResults(std::min(end-i, step));
272  for (unsigned j = 0; j < step && (i + j) < end; j+=seqStep) {
273  partialResults[j] = func(i + j);
274  }
275  reslist[i / step] = redfunc(partialResults);
276  };
277  ParallelFor(start, end, step, lambda);
278 
279  return reslist;
280  }
281 
282 /// \cond
283  //////////////////////////////////////////////////////////////////////////
284  /// Execute func in parallel, taking an element of an
285  /// std::vector as argument. Divides and groups the executions in nChunks with partial reduction.
286  /// If it doesn't make sense will reduce the number of chunks.\n
287  /// A vector containg partial reductions' results is returned.
288  template<class F, class T, class R, class Cond>
289  auto TThreadExecutor::Map(F func, std::vector<T> &args, R redfunc, unsigned nChunks) -> std::vector<typename std::result_of<F(T)>::type> {
290  if (nChunks == 0)
291  {
292  return Map(func, args);
293  }
294 
295  unsigned int nToProcess = args.size();
296  unsigned step = (nToProcess + nChunks - 1) / nChunks; //ceiling the division
297  // Avoid empty chunks
298  unsigned actualChunks = (nToProcess + step - 1) / step;
299 
300  using retType = decltype(func(args.front()));
301  std::vector<retType> reslist(actualChunks);
302  auto lambda = [&](unsigned int i)
303  {
304  std::vector<T> partialResults(step);
305  for (unsigned j = 0; j < step && (i + j) < nToProcess; j++) {
306  partialResults[j] = func(args[i + j]);
307  }
308  reslist[i / step] = redfunc(partialResults);
309  };
310 
311  ParallelFor(0U, nToProcess, step, lambda);
312 
313  return reslist;
314  }
315 
316  //////////////////////////////////////////////////////////////////////////
317  /// Execute func in parallel, taking an element of an
318  /// std::initializer_list as an argument. Divides and groups the executions in nChunks with partial reduction.
319  /// If it doesn't make sense will reduce the number of chunks.\n
320  /// A vector containg partial reductions' results is returned.
321  template<class F, class T, class R, class Cond>
322  auto TThreadExecutor::Map(F func, std::initializer_list<T> args, R redfunc, unsigned nChunks) -> std::vector<typename std::result_of<F(T)>::type> {
323  std::vector<T> vargs(std::move(args));
324  const auto &reslist = Map(func, vargs, redfunc, nChunks);
325  return reslist;
326  }
327 /// \endcond
328 
329 
330  //////////////////////////////////////////////////////////////////////////
331  /// This method behaves just like Map, but an additional redfunc function
332  /// must be provided. redfunc is applied to the vector Map would return and
333  /// must return the same type as func. In practice, redfunc can be used to
334  /// "squash" the vector returned by Map into a single object by merging,
335  /// adding, mixing the elements of the vector.\n
336  /// The fourth argument indicates the number of chunks we want to divide our work in.
337  template<class F, class R, class Cond>
338  auto TThreadExecutor::MapReduce(F func, unsigned nTimes, R redfunc) -> typename std::result_of<F()>::type {
339  return Reduce(Map(func, nTimes), redfunc);
340  }
341 
342  template<class F, class R, class Cond>
343  auto TThreadExecutor::MapReduce(F func, unsigned nTimes, R redfunc, unsigned nChunks) -> typename std::result_of<F()>::type {
344  return Reduce(Map(func, nTimes, redfunc, nChunks), redfunc);
345  }
346 
347  template<class F, class INTEGER, class R, class Cond>
348  auto TThreadExecutor::MapReduce(F func, ROOT::TSeq<INTEGER> args, R redfunc, unsigned nChunks) -> typename std::result_of<F(INTEGER)>::type {
349  return Reduce(Map(func, args, redfunc, nChunks), redfunc);
350  }
351  /// \cond
352  template<class F, class T, class R, class Cond>
353  auto TThreadExecutor::MapReduce(F func, std::initializer_list<T> args, R redfunc, unsigned nChunks) -> typename std::result_of<F(T)>::type {
354  return Reduce(Map(func, args, redfunc, nChunks), redfunc);
355  }
356  /// \endcond
357 
358  template<class F, class T, class R, class Cond>
359  auto TThreadExecutor::MapReduce(F func, std::vector<T> &args, R redfunc) -> typename std::result_of<F(T)>::type {
360  return Reduce(Map(func, args), redfunc);
361  }
362 
363  template<class F, class T, class R, class Cond>
364  auto TThreadExecutor::MapReduce(F func, std::vector<T> &args, R redfunc, unsigned nChunks) -> typename std::result_of<F(T)>::type {
365  return Reduce(Map(func, args, redfunc, nChunks), redfunc);
366  }
367 
368  //////////////////////////////////////////////////////////////////////////
369  /// "Reduce" an std::vector into a single object in parallel by passing a
370  /// binary operator as the second argument to act on pairs of elements of the std::vector.
371  template<class T, class BINARYOP>
372  auto TThreadExecutor::Reduce(const std::vector<T> &objs, BINARYOP redfunc) -> decltype(redfunc(objs.front(), objs.front()))
373  {
374  // check we can apply reduce to objs
375  static_assert(std::is_same<decltype(redfunc(objs.front(), objs.front())), T>::value, "redfunc does not have the correct signature");
376  return ParallelReduce(objs, redfunc);
377  }
378 
379  //////////////////////////////////////////////////////////////////////////
380  /// "Reduce" an std::vector into a single object by passing a
381  /// function as the second argument defining the reduction operation.
382  template<class T, class R>
383  auto TThreadExecutor::Reduce(const std::vector<T> &objs, R redfunc) -> decltype(redfunc(objs))
384  {
385  // check we can apply reduce to objs
386  static_assert(std::is_same<decltype(redfunc(objs)), T>::value, "redfunc does not have the correct signature");
387  return SeqReduce(objs, redfunc);
388  }
389 
390  template<class T, class R>
391  auto TThreadExecutor::SeqReduce(const std::vector<T> &objs, R redfunc) -> decltype(redfunc(objs))
392  {
393  return redfunc(objs);
394  }
395 
396 } // namespace ROOT
397 
398 #endif // R__USE_IMT
399 #endif
void Foreach(F func, unsigned nTimes)
Execute func (with no arguments) nTimes in parallel.
Namespace for new ROOT classes and functions.
Definition: StringConv.hxx:21
double T(double x)
Definition: ChebyshevPol.h:34
auto SeqReduce(const std::vector< T > &objs, R redfunc) -> decltype(redfunc(objs))
void ParallelFor(unsigned start, unsigned end, unsigned step, const std::function< void(unsigned int i)> &f)
#define f(i)
Definition: RSha256.hxx:104
This class defines an interface to execute the same task multiple times in parallel, possibly with different arguments every time.
Definition: TExecutor.hxx:61
#define R(a, b, c, d, e, f, g, h, i)
Definition: RSha256.hxx:110
double ParallelReduce(const std::vector< double > &objs, const std::function< double(double a, double b)> &redfunc)
void function(const Char_t *name_, T fun, const Char_t *docstring=0)
Definition: RExports.h:146
auto Reduce(const std::vector< T > &objs, BINARYOP redfunc) -> decltype(redfunc(objs.front(), objs.front()))
"Reduce" an std::vector into a single object in parallel by passing a binary operator as the second a...
This class provides a simple interface to execute the same task multiple times in parallel...
#define F(x, y, z)
TThreadExecutor & operator=(TThreadExecutor &)=delete
auto * a
Definition: textangle.C:12
unsigned int UInt_t
Definition: RtypesCore.h:42
TThreadExecutor()
Class constructor.
T step() const
Definition: TSeq.hxx:184
A pseudo container class which is a generator of indices.
Definition: TSeq.hxx:66
int type
Definition: TGX11.cxx:120
iterator end() const
Definition: TSeq.hxx:166
auto MapReduce(F func, unsigned nTimes, R redfunc) -> typename std::result_of< F()>::type
This method behaves just like Map, but an additional redfunc function must be provided.
auto Map(F func, unsigned nTimes) -> std::vector< typename std::result_of< F()>::type >
Execute func (with no arguments) nTimes in parallel.
you should not use this method at all Int_t Int_t Double_t Double_t Double_t Int_t Double_t Double_t Double_t Double_t b
Definition: TRolke.cxx:630
std::shared_ptr< ROOT::Internal::TPoolManager > fSched
iterator begin() const
Definition: TSeq.hxx:163