Logo ROOT  
Reference Guide
 
Loading...
Searching...
No Matches
Utils.hxx
Go to the documentation of this file.
1// Author: Enrico Guiraud, Danilo Piparo CERN 12/2016
2
3/*************************************************************************
4 * Copyright (C) 1995-2018, Rene Brun and Fons Rademakers. *
5 * All rights reserved. *
6 * *
7 * For the licensing terms see $ROOTSYS/LICENSE. *
8 * For the list of contributors see $ROOTSYS/README/CREDITS. *
9 *************************************************************************/
10
11#ifndef ROOT_RDFUTILS
12#define ROOT_RDFUTILS
13
14#include "ROOT/RSpan.hxx"
15#include <string_view>
16#include "ROOT/RVec.hxx"
17#include "ROOT/TypeTraits.hxx"
18#include "Rtypes.h"
19
20#include <array>
21#include <deque>
22#include <functional>
23#include <memory>
24#include <new> // std::hardware_destructive_interference_size
25#include <unordered_set>
26#include <shared_mutex>
27#include <string>
28#include <type_traits> // std::decay, std::false_type
29#include <vector>
30
31class TTree;
32class TTreeReader;
33
35class RDatasetSpec;
36}
37namespace ROOT {
38namespace RDF {
39using ColumnNames_t = std::vector<std::string>;
40}
41
42class RLogChannel;
43
44namespace RDF {
45class RDataSource;
46}
47
48namespace Detail {
49namespace RDF {
50
52
54
55// fwd decl for ColumnName2ColumnTypeName
56class RDefineBase;
57
58// type used for tag dispatching
60};
61
62} // end ns Detail
63} // end ns RDF
64
65namespace Internal {
66namespace RDF {
67
68using namespace ROOT::TypeTraits;
69using namespace ROOT::Detail::RDF;
70using namespace ROOT::RDF;
71
72/// Obtain or set the number of threads that will share a clone of a thread-safe 3D histogram.
73/// Setting it to N will make N threads share a clone, setting it to 0 or 1 will use one clone
74/// per thread.
75/// Setting it to higher numbers reduces the RDF memory consumption, but might create contention
76/// on TH3Ds. When the RDF computation graph consists mostly of filling TH3Ds, lower values are better.
77/// \return A reference to the current divider.
78unsigned int &NThreadPerTH3();
79
80/// Check for container traits.
81///
82/// Note that for all uses in RDF we don't want to classify std::string as a container.
83/// Template specializations of IsDataContainer make it return `true` for std::span<T>, std::vector<bool> and
84/// RVec<bool>, which we do want to count as containers even though they do not satisfy all the traits tested by the
85/// generic IsDataContainer<T>.
86template <typename T>
88 using Test_t = std::decay_t<T>;
89
90 template <typename A>
91 static constexpr bool Test(A *pt, A const *cpt = nullptr, decltype(pt->begin()) * = nullptr,
92 decltype(pt->end()) * = nullptr, decltype(cpt->begin()) * = nullptr,
93 decltype(cpt->end()) * = nullptr, typename A::iterator *pi = nullptr,
94 typename A::const_iterator *pci = nullptr)
95 {
96 using It_t = typename A::iterator;
97 using CIt_t = typename A::const_iterator;
98 using V_t = typename A::value_type;
99 return std::is_same<decltype(pt->begin()), It_t>::value && std::is_same<decltype(pt->end()), It_t>::value &&
100 std::is_same<decltype(cpt->begin()), CIt_t>::value && std::is_same<decltype(cpt->end()), CIt_t>::value &&
101 std::is_same<decltype(**pi), V_t &>::value && std::is_same<decltype(**pci), V_t const &>::value &&
102 !std::is_same<T, std::string>::value;
103 }
104
105 template <typename A>
106 static constexpr bool Test(...)
107 {
108 return false;
109 }
110
111 static constexpr bool value = Test<Test_t>(nullptr);
112};
113
114template<>
115struct IsDataContainer<std::vector<bool>> {
116 static constexpr bool value = true;
117};
118
119template<>
121 static constexpr bool value = true;
122};
123
124template<typename T>
125struct IsDataContainer<std::span<T>> {
126 static constexpr bool value = true;
127};
128
129/// Detect whether a type is an instantiation of vector<T,A>
130template <typename>
131struct IsVector_t : public std::false_type {};
132
133template <typename T, typename A>
134struct IsVector_t<std::vector<T, A>> : public std::true_type {};
135
136std::string GetBranchOrLeafTypeName(TTree &t, const std::string &colName);
137
138const std::type_info &TypeName2TypeID(const std::string &name);
139
140std::string TypeID2TypeName(const std::type_info &id);
141
142std::string GetTypeNameWithOpts(const ROOT::RDF::RDataSource &df, std::string_view colName, bool vector2RVec);
143std::string
144ColumnName2ColumnTypeName(const std::string &colName, TTree *, RDataSource *, RDefineBase *, bool vector2RVec = true);
145
146char TypeName2ROOTTypeName(const std::string &b);
147char TypeID2ROOTTypeName(const std::type_info &tid);
148
149unsigned int GetNSlots();
150
151/// `type` is TypeList if MustRemove is false, otherwise it is a TypeList with the first type removed
152template <bool MustRemove, typename TypeList>
156
157template <typename TypeList>
161
162template <bool MustRemove, typename TypeList>
164
165template <bool MustRemove, typename TypeList>
169
170template <typename TypeList>
175
176template <bool MustRemove, typename TypeList>
178
179// Check the value_type type of a type with a SFINAE to allow compilation in presence
180// fundamental types
181template <typename T,
182 bool IsDataContainer = IsDataContainer<std::decay_t<T>>::value || std::is_same<std::string, T>::value>
183struct ValueType {
184 using value_type = typename T::value_type;
185};
186
187template <typename T>
188struct ValueType<T, false> {
189 using value_type = T;
190};
191
192template <typename T>
194 using value_type = T;
195};
196
197std::vector<std::string> ReplaceDotWithUnderscore(const std::vector<std::string> &columnNames);
198
199/// Erase `that` element from vector `v`
200template <typename T>
201void Erase(const T &that, std::vector<T> &v)
202{
203 v.erase(std::remove(v.begin(), v.end(), that), v.end());
204}
205
206/// Declare code in the interpreter via the TInterpreter::Declare method, throw in case of errors
207void InterpreterDeclare(const std::string &code);
208
209/// Jit code in the interpreter with TInterpreter::Calc, throw in case of errors.
210/// The optional `context` parameter, if present, is mentioned in the error message.
211void InterpreterCalc(const std::string &code, const std::string &context = "");
212
213/// Whether custom column with name colName is an "internal" column such as rdfentry_ or rdfslot_
214bool IsInternalColumn(std::string_view colName);
215
216/// Get optimal column width for printing a table given the names and the desired minimal space between columns
217unsigned int GetColumnWidth(const std::vector<std::string>& names, const unsigned int minColumnSpace = 8u);
218
219/// Stepping through CacheLineStep<T> values in a vector<T> brings you to a new cache line.
220/// Useful to avoid false sharing.
221template <typename T>
222constexpr std::size_t CacheLineStep() {
223 constexpr std::size_t cacheLineSize = R__HARDWARE_INTERFERENCE_SIZE;
224 return (cacheLineSize + sizeof(T) - 1) / sizeof(T);
225}
226
227void CheckReaderTypeMatches(const std::type_info &colType, const std::type_info &requestedType,
228 const std::string &colName);
229
230// TODO in C++17 this could be a lambda within FillHelper::Exec
231template <typename T>
232constexpr std::size_t FindIdxTrue(const T &arr)
233{
234 for (size_t i = 0; i < arr.size(); ++i) {
235 if (arr[i])
236 return i;
237 }
238 return arr.size();
239}
240
241// return type has to be decltype(auto) to preserve perfect forwarding
242template <std::size_t N, typename... Ts>
243decltype(auto) GetNthElement(Ts &&...args)
244{
245 auto tuple = std::forward_as_tuple(args...);
246 return std::get<N>(tuple);
247}
248
249#if __cplusplus >= 201703L
250template <class... Ts>
251using Disjunction = std::disjunction<Ts...>;
252#else
253template <class...>
254struct Disjunction : std::false_type {
255};
256template <class B1>
258};
259template <class B1, class... Bn>
260struct Disjunction<B1, Bn...> : std::conditional_t<bool(B1::value), B1, Disjunction<Bn...>> {
261};
262#endif
263
264bool IsStrInVec(const std::string &str, const std::vector<std::string> &vec);
265
266/// Return a vector with all elements of v1 and v2 and duplicates removed.
267/// Precondition: each of v1 and v2 must not have duplicate elements.
268template <typename T>
269std::vector<T> Union(const std::vector<T> &v1, const std::vector<T> &v2)
270{
271 std::vector<T> res = v1;
272
273 // Add the variations coming from the input columns
274 for (const auto &e : v2)
275 if (std::find(v1.begin(), v1.end(), e) == v1.end())
276 res.emplace_back(e);
277
278 return res;
279}
280
281/**
282 * \brief A Thread-safe cache for strings.
283 *
284 * This is used to generically store strings that are created in the computation
285 * graph machinery, for example when adding a new node.
286 */
288 std::unordered_set<std::string> fStrings{};
289 std::shared_mutex fMutex{};
290
291public:
292 /**
293 * \brief Inserts the input string in the cache and returns an iterator to the cached string.
294 *
295 * The function implements the following strategy for thread-safety:
296 * 1. Take a shared lock and early return if the string is already in the cache.
297 * 2. Release the shared lock and take an exclusive lock.
298 * 3. Check again if another thread filled the cache meanwhile. If so, return the cached value.
299 * 4. Insert the new value in the cache and return.
300 */
301 auto Insert(const std::string &string) -> decltype(fStrings)::const_iterator;
302};
303
304/**
305 * \brief Struct to wrap the call to a function with a guaranteed order of
306 * execution of its arguments.
307 * \tparam F Type of the callable.
308 * \tparam Args Variadic types of the arguments to the callable.
309 *
310 * The execution order is guaranteed by calling the function in the constructor
311 * thus enabling the exploitation of the list-initialization sequenced-before
312 * feature (See rule 9 at https://en.cppreference.com/w/cpp/language/eval_order).
313 */
315 template <typename F, typename... Args>
316 CallGuaranteedOrder(F &&f, Args &&...args)
317 {
318 f(std::forward<Args>(args)...);
319 }
320};
321
322template <typename T>
324{
325 const static std::shared_ptr<T> fgRawPtrCtrlBlock;
326 return std::shared_ptr<T>(fgRawPtrCtrlBlock, rawPtr);
327}
328
329
330/**
331 * \brief Function to retrieve RDatasetSpec from JSON file provided
332 * \param[in] jsonFile Path to the dataset specification JSON file.
333 *
334 * This function allows us to have access to an RDatasetSpec which needs to
335 * be created when we use the FromSpec factory function.
336 */
338
339/**
340 * Tag to let data sources use the native data type when creating a column reader.
341 *
342 * See usage of this in RNTupleDS
343 */
345
346} // end NS RDF
347} // end NS Internal
348} // end NS ROOT
349
350#endif // RDFUTILS
#define b(i)
Definition RSha256.hxx:100
#define f(i)
Definition RSha256.hxx:104
#define e(i)
Definition RSha256.hxx:103
ROOT::Detail::TRangeCast< T, true > TRangeDynCast
TRangeDynCast is an adapter class that allows the typed iteration through a TCollection.
#define N
Option_t Option_t TPoint TPoint const char GetTextMagnitude GetFillStyle GetLineColor GetLineWidth GetMarkerStyle GetTextAlign GetTextColor GetTextSize void value
char name[80]
Definition TGX11.cxx:110
A Thread-safe cache for strings.
Definition Utils.hxx:287
auto Insert(const std::string &string) -> decltype(fStrings)::const_iterator
Inserts the input string in the cache and returns an iterator to the cached string.
Definition RDFUtils.cxx:528
std::unordered_set< std::string > fStrings
Definition Utils.hxx:288
The dataset specification for RDataFrame.
RDataSource defines an API that RDataFrame can use to read arbitrary data formats.
A log configuration for a channel, e.g.
Definition RLogger.hxx:98
const_iterator begin() const
const_iterator end() const
A "std::vector"-like collection of values implementing handy operation to analyse them.
Definition RVec.hxx:1525
A simple, robust and fast interface to read values from ROOT columnar datasets such as TTree,...
Definition TTreeReader.h:46
A TTree represents a columnar dataset.
Definition TTree.h:89
TPaveText * pt
#define F(x, y, z)
ROOT::RLogChannel & RDFLogChannel()
Definition RDFUtils.cxx:42
auto MakeAliasedSharedPtr(T *rawPtr)
Definition Utils.hxx:323
std::vector< std::string > ReplaceDotWithUnderscore(const std::vector< std::string > &columnNames)
Replace occurrences of '.
Definition RDFUtils.cxx:397
const std::type_info & TypeName2TypeID(const std::string &name)
Return the type_info associated to a name.
Definition RDFUtils.cxx:73
typename RemoveFirstTwoParametersIf< MustRemove, TypeList >::type RemoveFirstTwoParametersIf_t
Definition Utils.hxx:177
ROOT::RDF::Experimental::RDatasetSpec RetrieveSpecFromJson(const std::string &jsonFile)
Function to retrieve RDatasetSpec from JSON file provided.
Definition RDFUtils.cxx:545
unsigned int GetNSlots()
Definition RDFUtils.cxx:384
decltype(auto) GetNthElement(Ts &&...args)
Definition Utils.hxx:243
char TypeName2ROOTTypeName(const std::string &b)
Convert type name (e.g.
Definition RDFUtils.cxx:342
std::string TypeID2TypeName(const std::type_info &id)
Returns the name of a type starting from its type_info An empty string is returned in case of failure...
Definition RDFUtils.cxx:178
bool IsStrInVec(const std::string &str, const std::vector< std::string > &vec)
Definition RDFUtils.cxx:523
void Erase(const T &that, std::vector< T > &v)
Erase that element from vector v
Definition Utils.hxx:201
unsigned int GetColumnWidth(const std::vector< std::string > &names, const unsigned int minColumnSpace=8u)
Get optimal column width for printing a table given the names and the desired minimal space between c...
Definition RDFUtils.cxx:474
std::string GetBranchOrLeafTypeName(TTree &t, const std::string &colName)
Return the typename of object colName stored in t, if any.
Definition RDFUtils.cxx:255
constexpr std::size_t CacheLineStep()
Stepping through CacheLineStep<T> values in a vector<T> brings you to a new cache line.
Definition Utils.hxx:222
std::string ColumnName2ColumnTypeName(const std::string &colName, TTree *, RDataSource *, RDefineBase *, bool vector2RVec=true)
Return a string containing the type of the given branch.
Definition RDFUtils.cxx:312
void InterpreterCalc(const std::string &code, const std::string &context="")
Jit code in the interpreter with TInterpreter::Calc, throw in case of errors.
Definition RDFUtils.cxx:428
void CheckReaderTypeMatches(const std::type_info &colType, const std::type_info &requestedType, const std::string &colName)
Definition RDFUtils.cxx:486
constexpr std::size_t FindIdxTrue(const T &arr)
Definition Utils.hxx:232
std::vector< T > Union(const std::vector< T > &v1, const std::vector< T > &v2)
Return a vector with all elements of v1 and v2 and duplicates removed.
Definition Utils.hxx:269
bool IsInternalColumn(std::string_view colName)
Whether custom column with name colName is an "internal" column such as rdfentry_ or rdfslot_.
Definition RDFUtils.cxx:465
std::string GetTypeNameWithOpts(const ROOT::RDF::RDataSource &ds, std::string_view colName, bool vector2RVec)
Definition RDFUtils.cxx:627
void InterpreterDeclare(const std::string &code)
Declare code in the interpreter via the TInterpreter::Declare method, throw in case of errors.
Definition RDFUtils.cxx:416
unsigned int & NThreadPerTH3()
Obtain or set the number of threads that will share a clone of a thread-safe 3D histogram.
Definition RDFUtils.cxx:63
typename RemoveFirstParameterIf< MustRemove, TypeList >::type RemoveFirstParameterIf_t
Definition Utils.hxx:163
char TypeID2ROOTTypeName(const std::type_info &tid)
Definition RDFUtils.cxx:206
std::vector< std::string > ColumnNames_t
ROOT type_traits extensions.
Namespace for new ROOT classes and functions.
Struct to wrap the call to a function with a guaranteed order of execution of its arguments.
Definition Utils.hxx:314
CallGuaranteedOrder(F &&f, Args &&...args)
Definition Utils.hxx:316
Check for container traits.
Definition Utils.hxx:87
static constexpr bool Test(A *pt, A const *cpt=nullptr, decltype(pt->begin()) *=nullptr, decltype(pt->end()) *=nullptr, decltype(cpt->begin()) *=nullptr, decltype(cpt->end()) *=nullptr, typename A::iterator *pi=nullptr, typename A::const_iterator *pci=nullptr)
Definition Utils.hxx:91
static constexpr bool Test(...)
Definition Utils.hxx:106
static constexpr bool value
Definition Utils.hxx:111
Detect whether a type is an instantiation of vector<T,A>
Definition Utils.hxx:131
type is TypeList if MustRemove is false, otherwise it is a TypeList with the first type removed
Definition Utils.hxx:153
typename RemoveFirstParameterIf< true, typeTmp >::type type
Definition Utils.hxx:173
typename RemoveFirstParameterIf< true, TypeList >::type typeTmp
Definition Utils.hxx:172
Tag to let data sources use the native data type when creating a column reader.
Definition Utils.hxx:344
typename T::value_type value_type
Definition Utils.hxx:184
Lightweight storage for a collection of types.