Logo ROOT   6.10/09
Reference Guide
TThreadExecutor.hxx
Go to the documentation of this file.
1 // @(#)root/thread:$Id$
2 // Author: Xavier Valls March 2016
3 
4 /*************************************************************************
5  * Copyright (C) 1995-2006, Rene Brun and Fons Rademakers. *
6  * All rights reserved. *
7  * *
8  * For the licensing terms see $ROOTSYS/LICENSE. *
9  * For the list of contributors see $ROOTSYS/README/CREDITS. *
10  *************************************************************************/
11 
12 #ifndef ROOT_TThreadExecutor
13 #define ROOT_TThreadExecutor
14 
15 #include "RConfigure.h"
16 
17 // exclude in case ROOT does not have IMT support
18 #ifndef R__USE_IMT
19 // No need to error out for dictionaries.
20 # if !defined(__ROOTCLING__) && !defined(G__DICTIONARY)
21 # error "Cannot use ROOT::TThreadExecutor without defining R__USE_IMT."
22 # endif
23 #else
24 
25 #include "ROOT/TExecutor.hxx"
26 #include "ROOT/TPoolManager.hxx"
27 #include "TROOT.h"
28 #include <functional>
29 #include <memory>
30 #include <numeric>
31 
32 namespace ROOT {
33 
34  class TThreadExecutor: public TExecutor<TThreadExecutor> {
35  public:
36  explicit TThreadExecutor();
37 
38  explicit TThreadExecutor(UInt_t nThreads);
39 
40  TThreadExecutor(TThreadExecutor &) = delete;
42 
43  template<class F>
44  void Foreach(F func, unsigned nTimes);
45  template<class F, class INTEGER>
46  void Foreach(F func, ROOT::TSeq<INTEGER> args);
47  /// \cond
48  template<class F, class T>
49  void Foreach(F func, std::initializer_list<T> args);
50  /// \endcond
51  template<class F, class T>
52  void Foreach(F func, std::vector<T> &args);
53 
55  template<class F, class Cond = noReferenceCond<F>>
56  auto Map(F func, unsigned nTimes) -> std::vector<typename std::result_of<F()>::type>;
57  template<class F, class INTEGER, class Cond = noReferenceCond<F, INTEGER>>
59  template<class F, class T, class Cond = noReferenceCond<F, T>>
60  auto Map(F func, std::vector<T> &args) -> std::vector<typename std::result_of<F(T)>::type>;
61 
62  // // MapReduce
63  // // the late return types also check at compile-time whether redfunc is compatible with func,
64  // // other than checking that func is compatible with the type of arguments.
65  // // a static_assert check in TThreadExecutor::Reduce is used to check that redfunc is compatible with the type returned by func
67  template<class F, class R, class Cond = noReferenceCond<F>>
68  auto MapReduce(F func, unsigned nTimes, R redfunc, unsigned nChunks) -> typename std::result_of<F()>::type;
69  template<class F, class INTEGER, class R, class Cond = noReferenceCond<F, INTEGER>>
70  auto MapReduce(F func, ROOT::TSeq<INTEGER> args, R redfunc, unsigned nChunks) -> typename std::result_of<F(INTEGER)>::type;
71  /// \cond
72  template<class F, class T, class R, class Cond = noReferenceCond<F, T>>
73  auto MapReduce(F func, std::initializer_list<T> args, R redfunc, unsigned nChunks) -> typename std::result_of<F(T)>::type;
74  /// \endcond
75  template<class F, class T, class R, class Cond = noReferenceCond<F, T>>
76  auto MapReduce(F func, std::vector<T> &args, R redfunc, unsigned nChunks) -> typename std::result_of<F(T)>::type;
77 
79  template<class T, class BINARYOP> auto Reduce(const std::vector<T> &objs, BINARYOP redfunc) -> decltype(redfunc(objs.front(), objs.front()));
80  template<class T, class R> auto Reduce(const std::vector<T> &objs, R redfunc) -> decltype(redfunc(objs));
81 
82  protected:
83 
84  template<class F, class R, class Cond = noReferenceCond<F>>
85  auto Map(F func, unsigned nTimes, R redfunc, unsigned nChunks) -> std::vector<typename std::result_of<F()>::type>;
86  template<class F, class INTEGER, class R, class Cond = noReferenceCond<F, INTEGER>>
87  auto Map(F func, ROOT::TSeq<INTEGER> args, R redfunc, unsigned nChunks) -> std::vector<typename std::result_of<F(INTEGER)>::type>;
88  template<class F, class T, class R, class Cond = noReferenceCond<F, T>>
89  auto Map(F func, std::vector<T> &args, R redfunc, unsigned nChunks) -> std::vector<typename std::result_of<F(T)>::type>;
90  template<class F, class T, class R, class Cond = noReferenceCond<F, T>>
91  auto Map(F func, std::initializer_list<T> args, R redfunc, unsigned nChunks) -> std::vector<typename std::result_of<F(T)>::type>;
92 
93  private:
94  void ParallelFor(unsigned start, unsigned end, unsigned step, const std::function<void(unsigned int i)> &f);
95  double ParallelReduce(const std::vector<double> &objs, const std::function<double(double a, double b)> &redfunc);
96  float ParallelReduce(const std::vector<float> &objs, const std::function<float(float a, float b)> &redfunc);
97  template<class T, class R>
98  auto SeqReduce(const std::vector<T> &objs, R redfunc) -> decltype(redfunc(objs));
99 
100  std::shared_ptr<ROOT::Internal::TPoolManager> fSched = nullptr;
101  };
102 
103  /************ TEMPLATE METHODS IMPLEMENTATION ******************/
104 
105  //////////////////////////////////////////////////////////////////////////
106  /// Execute func (with no arguments) nTimes in parallel.
107  /// Functions that take more than zero arguments can be executed (with
108  /// fixed arguments) by wrapping them in a lambda or with std::bind.
109  template<class F>
110  void TThreadExecutor::Foreach(F func, unsigned nTimes) {
111  ParallelFor(0U, nTimes, 1, [&](unsigned int){func();});
112  }
113 
114  //////////////////////////////////////////////////////////////////////////
115  /// Execute func in parallel, taking an element of a
116  /// sequence as argument.
117  template<class F, class INTEGER>
119  ParallelFor(*args.begin(), *args.end(), args.step(), [&](unsigned int i){func(i);});
120  }
121 
122  /// \cond
123  //////////////////////////////////////////////////////////////////////////
124  /// Execute func in parallel, taking an element of a
125  /// initializer_list as argument.
126  template<class F, class T>
127  void TThreadExecutor::Foreach(F func, std::initializer_list<T> args) {
128  std::vector<T> vargs(std::move(args));
129  Foreach(func, vargs);
130  }
131  /// \endcond
132 
133  //////////////////////////////////////////////////////////////////////////
134  /// Execute func in parallel, taking an element of an
135  /// std::vector as argument.
136  template<class F, class T>
137  void TThreadExecutor::Foreach(F func, std::vector<T> &args) {
138  unsigned int nToProcess = args.size();
139  ParallelFor(0U, nToProcess, 1, [&](unsigned int i){func(args[i]);});
140  }
141 
142  //////////////////////////////////////////////////////////////////////////
143  /// Execute func (with no arguments) nTimes in parallel.
144  /// A vector containg executions' results is returned.
145  /// Functions that take more than zero arguments can be executed (with
146  /// fixed arguments) by wrapping them in a lambda or with std::bind.
147  template<class F, class Cond>
149  using retType = decltype(func());
150  std::vector<retType> reslist(nTimes);
151  auto lambda = [&](unsigned int i)
152  {
153  reslist[i] = func();
154  };
155  ParallelFor(0U, nTimes, 1, lambda);
156 
157  return reslist;
158  }
159 
160  //////////////////////////////////////////////////////////////////////////
161  /// Execute func in parallel, taking an element of a
162  /// sequence as argument.
163  /// A vector containg executions' results is returned.
164  template<class F, class INTEGER, class Cond>
166  unsigned start = *args.begin();
167  unsigned end = *args.end();
168  unsigned seqStep = args.step();
169 
170  using retType = decltype(func(start));
171  std::vector<retType> reslist(end - start);
172  auto lambda = [&](unsigned int i)
173  {
174  reslist[i] = func(i);
175  };
176  ParallelFor(start, end, seqStep, lambda);
177 
178  return reslist;
179  }
180 
181  //////////////////////////////////////////////////////////////////////////
182  /// Execute func (with no arguments) nTimes in parallel.
183  ///Divides and groups the executions in nChunks with partial reduction;
184  /// A vector containg partial reductions' results is returned.
185  template<class F, class R, class Cond>
186  auto TThreadExecutor::Map(F func, unsigned nTimes, R redfunc, unsigned nChunks) -> std::vector<typename std::result_of<F()>::type> {
187  if (nChunks == 0)
188  {
189  return Map(func, nTimes);
190  }
191 
192  using retType = decltype(func());
193  std::vector<retType> reslist(nChunks);
194  unsigned step = (nTimes + nChunks - 1) / nChunks;
195  auto lambda = [&](unsigned int i)
196  {
197  std::vector<retType> partialResults(step);
198  for (unsigned j = 0; j < step && (i + j) < nTimes; j++) {
199  partialResults[j] = func();
200  }
201  reslist[i / step] = redfunc(partialResults);
202  };
203  ParallelFor(0U, nTimes, step, lambda);
204 
205  return reslist;
206  }
207 
208  //////////////////////////////////////////////////////////////////////////
209  /// Execute func in parallel, taking an element of an
210  /// std::vector as argument.
211  /// A vector containg executions' results is returned.
212  // actual implementation of the Map method. all other calls with arguments eventually
213  // call this one
214  template<class F, class T, class Cond>
216  // //check whether func is callable
217  using retType = decltype(func(args.front()));
218 
219  unsigned int nToProcess = args.size();
220  std::vector<retType> reslist(nToProcess);
221 
222  auto lambda = [&](unsigned int i)
223  {
224  reslist[i] = func(args[i]);
225  };
226 
227  ParallelFor(0U, nToProcess, 1, lambda);
228 
229  return reslist;
230  }
231 
232  //////////////////////////////////////////////////////////////////////////
233  /// Execute func in parallel, taking an element of a
234  /// sequence as argument. Divides and groups the executions in nChunks with partial reduction;
235  /// A vector containg partial reductions' results is returned.
236  template<class F, class INTEGER, class R, class Cond>
238  if (nChunks == 0)
239  {
240  return Map(func, args);
241  }
242 
243  unsigned start = *args.begin();
244  unsigned end = *args.end();
245  unsigned seqStep = args.step();
246  unsigned step = (end - start + nChunks - 1) / nChunks; //ceiling the division
247 
248  using retType = decltype(func(start));
249  std::vector<retType> reslist(nChunks);
250  auto lambda = [&](unsigned int i)
251  {
252  std::vector<retType> partialResults(step);
253  for (unsigned j = 0; j < step && (i + j) < end; j+=seqStep) {
254  partialResults[j] = func(i + j);
255  }
256  reslist[i / step] = redfunc(partialResults);
257  };
258  ParallelFor(start, end, step, lambda);
259 
260  return reslist;
261  }
262 
263 /// \cond
264  //////////////////////////////////////////////////////////////////////////
265  /// Execute func in parallel, taking an element of an
266  /// std::vector as argument. Divides and groups the executions in nChunks with partial reduction;
267  /// A vector containg partial reductions' results is returned.
268  template<class F, class T, class R, class Cond>
269  auto TThreadExecutor::Map(F func, std::vector<T> &args, R redfunc, unsigned nChunks) -> std::vector<typename std::result_of<F(T)>::type> {
270  if (nChunks == 0)
271  {
272  return Map(func, args);
273  }
274  // //check whether func is callable
275  using retType = decltype(func(args.front()));
276 
277  unsigned int nToProcess = args.size();
278  std::vector<retType> reslist(nChunks);
279  unsigned step = (nToProcess + nChunks - 1) / nChunks; //ceiling the division
280 
281  auto lambda = [&](unsigned int i)
282  {
283  std::vector<T> partialResults(step);
284  for (unsigned j = 0; j < step && (i + j) < nToProcess; j++) {
285  partialResults[j] = func(args[i + j]);
286  }
287  reslist[i / step] = redfunc(partialResults);
288  };
289 
290  ParallelFor(0U, nToProcess, step, lambda);
291 
292  return reslist;
293  }
294 
295  //////////////////////////////////////////////////////////////////////////
296  /// Execute func in parallel, taking an element of an
297  /// std::initializer_list as an argument. Divides and groups the executions in nChunks with partial reduction;
298  /// A vector containg partial reductions' results is returned.
299  template<class F, class T, class R, class Cond>
300  auto TThreadExecutor::Map(F func, std::initializer_list<T> args, R redfunc, unsigned nChunks) -> std::vector<typename std::result_of<F(T)>::type> {
301  std::vector<T> vargs(std::move(args));
302  const auto &reslist = Map(func, vargs, redfunc, nChunks);
303  return reslist;
304  }
305 /// \endcond
306 
307 
308  //////////////////////////////////////////////////////////////////////////
309  /// This method behaves just like Map, but an additional redfunc function
310  /// must be provided. redfunc is applied to the vector Map would return and
311  /// must return the same type as func. In practice, redfunc can be used to
312  /// "squash" the vector returned by Map into a single object by merging,
313  /// adding, mixing the elements of the vector.\n
314  /// The fourth argument indicates the number of chunks we want to divide our work in.
315  template<class F, class R, class Cond>
316  auto TThreadExecutor::MapReduce(F func, unsigned nTimes, R redfunc, unsigned nChunks) -> typename std::result_of<F()>::type {
317  return Reduce(Map(func, nTimes, redfunc, nChunks), redfunc);
318  }
319 
320  template<class F, class INTEGER, class R, class Cond>
321  auto TThreadExecutor::MapReduce(F func, ROOT::TSeq<INTEGER> args, R redfunc, unsigned nChunks) -> typename std::result_of<F(INTEGER)>::type {
322  return Reduce(Map(func, args, redfunc, nChunks), redfunc);
323  }
324  /// \cond
325  template<class F, class T, class R, class Cond>
326  auto TThreadExecutor::MapReduce(F func, std::initializer_list<T> args, R redfunc, unsigned nChunks) -> typename std::result_of<F(T)>::type {
327  return Reduce(Map(func, args, redfunc, nChunks), redfunc);
328  }
329  /// \endcond
330  template<class F, class T, class R, class Cond>
331  auto TThreadExecutor::MapReduce(F func, std::vector<T> &args, R redfunc, unsigned nChunks) -> typename std::result_of<F(T)>::type {
332  return Reduce(Map(func, args, redfunc, nChunks), redfunc);
333  }
334 
335  //////////////////////////////////////////////////////////////////////////
336  /// "Reduce" an std::vector into a single object in parallel by passing a
337  /// binary operator as the second argument to act on pairs of elements of the std::vector.
338  template<class T, class BINARYOP>
339  auto TThreadExecutor::Reduce(const std::vector<T> &objs, BINARYOP redfunc) -> decltype(redfunc(objs.front(), objs.front()))
340  {
341  // check we can apply reduce to objs
342  static_assert(std::is_same<decltype(redfunc(objs.front(), objs.front())), T>::value, "redfunc does not have the correct signature");
343  return ParallelReduce(objs, redfunc);
344  }
345 
346  //////////////////////////////////////////////////////////////////////////
347  /// "Reduce" an std::vector into a single object by passing a
348  /// function as the second argument defining the reduction operation.
349  template<class T, class R>
350  auto TThreadExecutor::Reduce(const std::vector<T> &objs, R redfunc) -> decltype(redfunc(objs))
351  {
352  // check we can apply reduce to objs
353  static_assert(std::is_same<decltype(redfunc(objs)), T>::value, "redfunc does not have the correct signature");
354  return SeqReduce(objs, redfunc);
355  }
356 
357  template<class T, class R>
358  auto TThreadExecutor::SeqReduce(const std::vector<T> &objs, R redfunc) -> decltype(redfunc(objs))
359  {
360  return redfunc(objs);
361  }
362 
363 } // namespace ROOT
364 
365 #endif // R__USE_IMT
366 #endif
void Foreach(F func, unsigned nTimes)
Execute func (with no arguments) nTimes in parallel.
Namespace for new ROOT classes and functions.
Definition: StringConv.hxx:21
double T(double x)
Definition: ChebyshevPol.h:34
auto SeqReduce(const std::vector< T > &objs, R redfunc) -> decltype(redfunc(objs))
void ParallelFor(unsigned start, unsigned end, unsigned step, const std::function< void(unsigned int i)> &f)
TArc * a
Definition: textangle.C:12
This class defines an interface to execute the same task multiple times in parallel, possibly with different arguments every time.
Definition: TExecutor.hxx:61
double ParallelReduce(const std::vector< double > &objs, const std::function< double(double a, double b)> &redfunc)
void function(const Char_t *name_, T fun, const Char_t *docstring=0)
Definition: RExports.h:146
auto Reduce(const std::vector< T > &objs, BINARYOP redfunc) -> decltype(redfunc(objs.front(), objs.front()))
"Reduce" an std::vector into a single object in parallel by passing a binary operator as the second a...
This class provides a simple interface to execute the same task multiple times in parallel...
#define F(x, y, z)
TThreadExecutor & operator=(TThreadExecutor &)=delete
unsigned int UInt_t
Definition: RtypesCore.h:42
TThreadExecutor()
Class constructor.
T step() const
Definition: TSeq.hxx:184
double f(double x)
A pseudo container class which is a generator of indices.
Definition: TSeq.hxx:66
int type
Definition: TGX11.cxx:120
double func(double *x, double *p)
Definition: stressTF1.cxx:213
iterator end() const
Definition: TSeq.hxx:166
auto Map(F func, unsigned nTimes) -> std::vector< typename std::result_of< F()>::type >
Execute func (with no arguments) nTimes in parallel.
you should not use this method at all Int_t Int_t Double_t Double_t Double_t Int_t Double_t Double_t Double_t Double_t b
Definition: TRolke.cxx:630
std::shared_ptr< ROOT::Internal::TPoolManager > fSched
auto MapReduce(F func, unsigned nTimes, R redfunc, unsigned nChunks) -> typename std::result_of< F()>::type
This method behaves just like Map, but an additional redfunc function must be provided.
TRandom3 R
a TMatrixD.
Definition: testIO.cxx:28
iterator begin() const
Definition: TSeq.hxx:163