Logo ROOT   6.16/01
Reference Guide
TThreadExecutor.cxx
Go to the documentation of this file.
2
3#pragma GCC diagnostic push
4#pragma GCC diagnostic ignored "-Wshadow"
5
6#include "tbb/tbb.h"
7
8#pragma GCC diagnostic pop
9
10//////////////////////////////////////////////////////////////////////////
11///
12/// \class ROOT::TThreadExecutor
13/// \ingroup Parallelism
14/// \brief This class provides a simple interface to execute the same task
15/// multiple times in parallel, possibly with different arguments every
16/// time. This mimics the behaviour of python's pool.Map method.
17///
18/// ###ROOT::TThreadExecutor::Map
19/// This class inherits its interfaces from ROOT::TExecutor\n.
20/// The two possible usages of the Map method are:\n
21/// * Map(F func, unsigned nTimes): func is executed nTimes with no arguments
22/// * Map(F func, T& args): func is executed on each element of the collection of arguments args
23///
24/// For either signature, func is executed as many times as needed by a pool of
25/// nThreads threads; It defaults to the number of cores.\n
26/// A collection containing the result of each execution is returned.\n
27/// **Note:** the user is responsible for the deletion of any object that might
28/// be created upon execution of func, returned objects included: ROOT::TThreadExecutor never
29/// deletes what it returns, it simply forgets it.\n
30///
31/// \param func
32/// \parblock
33/// a lambda expression, an std::function, a loaded macro, a
34/// functor class or a function that takes zero arguments (for the first signature)
35/// or one (for the second signature).
36/// \endparblock
37/// \param args
38/// \parblock
39/// a standard vector, a ROOT::TSeq of integer type or an initializer list for the second signature.
40/// An integer only for the first.
41/// \endparblock
42/// **Note:** in cases where the function to be executed takes more than
43/// zero/one argument but all are fixed except zero/one, the function can be wrapped
44/// in a lambda or via std::bind to give it the right signature.\n
45///
46/// #### Return value:
47/// An std::vector. The elements in the container
48/// will be the objects returned by func.
49///
50///
51/// #### Examples:
52///
53/// ~~~{.cpp}
54/// root[] ROOT::TThreadExecutor pool; auto hists = pool.Map(CreateHisto, 10);
55/// root[] ROOT::TThreadExecutor pool(2); auto squares = pool.Map([](int a) { return a*a; }, {1,2,3});
56/// ~~~
57///
58/// ###ROOT::TThreadExecutor::MapReduce
59/// This set of methods behaves exactly like Map, but takes an additional
60/// function as a third argument. This function is applied to the set of
61/// objects returned by the corresponding Map execution to "squash" them
62/// to a single object. This function should be independent of the size of
63/// the vector returned by Map due to optimization of the number of chunks.
64///
65/// If this function is a binary operator, the "squashing" will be performed in parallel.
66/// This is exclusive to ROOT::TThreadExecutor and not any other ROOT::TExecutor-derived classes.\n
67/// An integer can be passed as the fourth argument indicating the number of chunks we want to divide our work in.
68/// This may be useful to avoid the overhead introduced when running really short tasks.
69///
70/// ####Examples:
71/// ~~~{.cpp}
72/// root[] ROOT::TThreadExecutor pool; auto ten = pool.MapReduce([]() { return 1; }, 10, [](std::vector<int> v) { return std::accumulate(v.begin(), v.end(), 0); })
73/// root[] ROOT::TThreadExecutor pool; auto hist = pool.MapReduce(CreateAndFillHists, 10, PoolUtils::ReduceObjects);
74/// ~~~
75///
76//////////////////////////////////////////////////////////////////////////
77
78/*
79VERY IMPORTANT NOTE ABOUT WORK ISOLATION
80
81We enclose the parallel_for and parallel_reduce invocations in a
82task_arena::isolate because we want to prevent a thread to start executing an
83outer task when the task it's running spawned subtasks, e.g. with a parallel_for,
84and is waiting on inner tasks to be completed.
85
86While this change has a negligible performance impact, it has benefits for
87several applications, for example big parallelised HEP frameworks and
88RDataFrame analyses.
89- For HEP Frameworks, without work isolation, it can happen that a huge
90framework task is pulled by a yielding ROOT task.
91This causes to delay the processing of the event which is interrupted by the
92long task.
93For example, work isolation avoids that during the wait due to the parallel
94flushing of baskets, a very long simulation task is pulled in by the idle task.
95- For RDataFrame analyses we want to guarantee that each entry is processed from
96the beginning to the end without TBB interrupting it to pull in other work items.
97As a corollary, the usage of ROOT (or TBB in work isolation mode) in actions
98and transformations guarantee that each entry is processed from the beginning to
99the end without being interrupted by the processing of outer tasks.
100*/
101
102namespace ROOT {
103namespace Internal {
104
105/// A helper function to implement the TThreadExecutor::ParallelReduce methods
106template<typename T>
107static T ParallelReduceHelper(const std::vector<T> &objs, const std::function<T(T a, T b)> &redfunc)
108{
109 using BRange_t = tbb::blocked_range<decltype(objs.begin())>;
110
111 auto pred = [redfunc](BRange_t const & range, T init) {
112 return std::accumulate(range.begin(), range.end(), init, redfunc);
113 };
114
115 BRange_t objRange(objs.begin(), objs.end());
116
117 return tbb::this_task_arena::isolate([&]{
118 return tbb::parallel_reduce(objRange, T{}, pred, redfunc);
119 });
120
121}
122
123} // End NS Internal
124} // End NS ROOT
125
126namespace ROOT {
127
128 //////////////////////////////////////////////////////////////////////////
129 /// Class constructor.
130 /// If the scheduler is active, gets a pointer to it.
131 /// If not, initializes the pool of threads with the number of logical threads supported by the hardware.
132 TThreadExecutor::TThreadExecutor(): TThreadExecutor::TThreadExecutor(tbb::task_scheduler_init::default_num_threads()) {}
133 //////////////////////////////////////////////////////////////////////////
134 /// Class constructor.
135 /// nThreads is the number of threads that will be spawned. If the scheduler is active (ImplicitMT enabled, another TThreadExecutor instance),
136 /// it won't change the number of threads.
138 {
140 }
141
142 void TThreadExecutor::ParallelFor(unsigned int start, unsigned int end, unsigned step, const std::function<void(unsigned int i)> &f)
143 {
144 tbb::this_task_arena::isolate([&]{
145 tbb::parallel_for(start, end, step, f);
146 });
147 }
148
149 double TThreadExecutor::ParallelReduce(const std::vector<double> &objs, const std::function<double(double a, double b)> &redfunc)
150 {
151 return ROOT::Internal::ParallelReduceHelper<double>(objs, redfunc);
152 }
153
154 float TThreadExecutor::ParallelReduce(const std::vector<float> &objs, const std::function<float(float a, float b)> &redfunc)
155 {
156 return ROOT::Internal::ParallelReduceHelper<float>(objs, redfunc);
157 }
158
161 }
162
163}
#define b(i)
Definition: RSha256.hxx:100
#define f(i)
Definition: RSha256.hxx:104
static Int_t init()
unsigned int UInt_t
Definition: RtypesCore.h:42
static UInt_t GetPoolSize()
Returns the number of threads running when the scheduler has been instantiated within ROOT.
This class provides a simple interface to execute the same task multiple times in parallel,...
std::shared_ptr< ROOT::Internal::TPoolManager > fSched
void ParallelFor(unsigned start, unsigned end, unsigned step, const std::function< void(unsigned int i)> &f)
TThreadExecutor()
Class constructor.
double ParallelReduce(const std::vector< double > &objs, const std::function< double(double a, double b)> &redfunc)
std::shared_ptr< TPoolManager > GetPoolManager(UInt_t nThreads=0)
Get a shared pointer to the manager.
static T ParallelReduceHelper(const std::vector< T > &objs, const std::function< T(T a, T b)> &redfunc)
A helper function to implement the TThreadExecutor::ParallelReduce methods.
double T(double x)
Definition: ChebyshevPol.h:34
void function(const Char_t *name_, T fun, const Char_t *docstring=0)
Definition: RExports.h:151
Namespace for new ROOT classes and functions.
Definition: StringConv.hxx:21
auto * a
Definition: textangle.C:12