Logo ROOT   6.16/01
Reference Guide
RColumnValue.hxx
Go to the documentation of this file.
1// Author: Enrico Guiraud, Danilo Piparo CERN 09/2018
2
3/*************************************************************************
4 * Copyright (C) 1995-2018, Rene Brun and Fons Rademakers. *
5 * All rights reserved. *
6 * *
7 * For the licensing terms see $ROOTSYS/LICENSE. *
8 * For the list of contributors see $ROOTSYS/README/CREDITS. *
9 *************************************************************************/
10
11#ifndef ROOT_RCOLUMNVALUE
12#define ROOT_RCOLUMNVALUE
13
15#include <ROOT/RDF/Utils.hxx> // IsRVec_t, TypeID2TypeName
17#include <ROOT/RMakeUnique.hxx>
18#include <ROOT/RVec.hxx>
19#include <ROOT/TypeTraits.hxx> // TakeFirstParameter_t
20#include <RtypesCore.h>
21#include <TTreeReader.h>
22#include <TTreeReaderValue.h>
23#include <TTreeReaderArray.h>
24
25#include <cstring> // strcmp
26#include <initializer_list>
27#include <limits>
28#include <memory>
29#include <stdexcept>
30#include <string>
31#include <tuple>
32#include <type_traits>
33#include <vector>
34
35namespace ROOT {
36namespace Internal {
37namespace RDF {
38using namespace ROOT::VecOps;
39
40/**
41\class ROOT::Internal::RDF::RColumnValue
42\ingroup dataframe
43\brief Helper class that updates and returns TTree branches as well as RDataFrame temporary columns
44\tparam T The type of the column
45
46RDataFrame nodes must access two different types of values during the event loop:
47values of real branches, for which TTreeReader{Values,Arrays} act as proxies, or
48temporary columns whose values are generated on the fly. While the type of the
49value is known at compile time (or just-in-time), it is only at runtime that nodes
50can check whether a certain value is generated on the fly or not.
51
52RColumnValue abstracts this difference by providing the same interface for
53both cases and handling the reading or generation of new values transparently.
54Only one of the two data members fReaderProxy or fValuePtr will be non-null
55for a given RColumnValue, depending on whether the value comes from a real
56TTree branch or from a temporary column respectively.
57
58RDataFrame nodes can store tuples of RColumnValues and retrieve an updated
59value for the column via the `Get` method.
60**/
61template <typename T>
62class R__CLING_PTRCHECK(off) RColumnValue {
63// R__CLING_PTRCHECK is disabled because all pointers are hand-crafted by RDF.
64
65 using MustUseRVec_t = IsRVec_t<T>;
66
67 // ColumnValue_t is the type of the column or the type of the elements of an array column
68 using ColumnValue_t = typename std::conditional<MustUseRVec_t::value, TakeFirstParameter_t<T>, T>::type;
69 using TreeReader_t = typename std::conditional<MustUseRVec_t::value, TTreeReaderArray<ColumnValue_t>,
71
72 /// RColumnValue has a slightly different behaviour whether the column comes from a TTreeReader, a RDataFrame Define
73 /// or a RDataSource. It stores which it is as an enum.
74 enum class EColumnKind { kTree, kCustomColumn, kDataSource, kInvalid };
75 // Set to the correct value by MakeProxy or SetTmpColumn
76 EColumnKind fColumnKind = EColumnKind::kInvalid;
77 /// The slot this value belongs to. Only needed when querying custom column values, it is set in `SetTmpColumn`.
78 unsigned int fSlot = std::numeric_limits<unsigned int>::max();
79
80 // Each element of the following stacks will be in use by a _single task_.
81 // Each task will push one element when it starts and pop it when it ends.
82 // Stacks will typically be very small (1-2 elements typically) and will only grow over size 1 in case of interleaved
83 // task execution i.e. when more than one task needs readers in this worker thread.
84
85 /// Owning ptrs to a TTreeReaderValue or TTreeReaderArray. Only used for Tree columns.
86 std::unique_ptr<TreeReader_t> fTreeReader;
87 /// Non-owning ptrs to the value of a custom column.
88 T *fCustomValuePtr;
89 /// Non-owning ptrs to the value of a data-source column.
90 T **fDSValuePtr;
91 /// Non-owning ptrs to the node responsible for the custom column. Needed when querying custom values.
92 RCustomColumnBase *fCustomColumn;
93 /// Enumerator for the different properties of the branch storage in memory
94 enum class EStorageType : char { kContiguous, kUnknown, kSparse };
95 /// Signal whether we ever checked that the branch we are reading with a TTreeReaderArray stores array elements
96 /// in contiguous memory. Only used when T == RVec<U>.
97 EStorageType fStorageType = EStorageType::kUnknown;
98 /// If MustUseRVec, i.e. we are reading an array, we return a reference to this RVec to clients
100 bool fCopyWarningPrinted = false;
101
102public:
103 RColumnValue(){};
104
105 void SetTmpColumn(unsigned int slot, RCustomColumnBase *customColumn)
106 {
107 fCustomColumn = customColumn;
108 // Here we compare names and not typeinfos since they may come from two different contexts: a compiled
109 // and a jitted one.
110 if (0 != strcmp(customColumn->GetTypeId().name(), typeid(T).name()))
111 throw std::runtime_error(
112 std::string("RColumnValue: type specified for column \"" + customColumn->GetName() + "\" is ") +
113 TypeID2TypeName(typeid(T)) + " but temporary column has type " +
114 TypeID2TypeName(customColumn->GetTypeId()));
115
116 if (customColumn->IsDataSourceColumn()) {
117 fColumnKind = EColumnKind::kDataSource;
118 fDSValuePtr = static_cast<T **>(customColumn->GetValuePtr(slot));
119 } else {
120 fColumnKind = EColumnKind::kCustomColumn;
121 fCustomValuePtr = static_cast<T *>(customColumn->GetValuePtr(slot));
122 }
123 fSlot = slot;
124 }
125
126 void MakeProxy(TTreeReader *r, const std::string &bn)
127 {
128 fColumnKind = EColumnKind::kTree;
129 fTreeReader = std::make_unique<TreeReader_t>(*r, bn.c_str());
130 }
131
132 /// This overload is used to return scalar quantities (i.e. types that are not read into a RVec)
133 // This method is executed inside the event-loop, many times per entry
134 // If need be, the if statement can be avoided using thunks
135 // (have both branches inside functions and have a pointer to the branch to be executed)
136 template <typename U = T, typename std::enable_if<!RColumnValue<U>::MustUseRVec_t::value, int>::type = 0>
137 T &Get(Long64_t entry)
138 {
139 if (fColumnKind == EColumnKind::kTree) {
140 return *(fTreeReader->Get());
141 } else {
142 fCustomColumn->Update(fSlot, entry);
143 return fColumnKind == EColumnKind::kCustomColumn ? *fCustomValuePtr : **fDSValuePtr;
144 }
145 }
146
147 /// This overload is used to return arrays (i.e. types that are read into a RVec).
148 /// In this case the returned T is always a RVec<ColumnValue_t>.
149 /// RVec<bool> is treated differently, in a separate overload.
150 template <typename U = T,
151 typename std::enable_if<RColumnValue<U>::MustUseRVec_t::value && !std::is_same<U, RVec<bool>>::value,
152 int>::type = 0>
153 T &Get(Long64_t entry)
154 {
155 if (fColumnKind == EColumnKind::kTree) {
156 auto &readerArray = *fTreeReader;
157 // We only use TTreeReaderArrays to read columns that users flagged as type `RVec`, so we need to check
158 // that the branch stores the array as contiguous memory that we can actually wrap in an `RVec`.
159 // Currently we need the first entry to have been loaded to perform the check
160 // TODO Move check to `MakeProxy` once Axel implements this kind of check in TTreeReaderArray using
161 // TBranchProxy
162
163 if (EStorageType::kUnknown == fStorageType && readerArray.GetSize() > 1) {
164 // We can decide since the array is long enough
165 fStorageType =
166 (1 == (&readerArray[1] - &readerArray[0])) ? EStorageType::kContiguous : EStorageType::kSparse;
167 }
168
169 const auto readerArraySize = readerArray.GetSize();
170 if (EStorageType::kContiguous == fStorageType ||
171 (EStorageType::kUnknown == fStorageType && readerArray.GetSize() < 2)) {
172 if (readerArraySize > 0) {
173 // trigger loading of the contens of the TTreeReaderArray
174 // the address of the first element in the reader array is not necessarily equal to
175 // the address returned by the GetAddress method
176 auto readerArrayAddr = &readerArray.At(0);
177 T rvec(readerArrayAddr, readerArraySize);
178 swap(fRVec, rvec);
179 } else {
180 T emptyVec{};
181 swap(fRVec, emptyVec);
182 }
183 } else {
184 // The storage is not contiguous or we don't know yet: we cannot but copy into the rvec
185#ifndef NDEBUG
186 if (!fCopyWarningPrinted) {
187 Warning("RColumnValue::Get",
188 "Branch %s hangs from a non-split branch. A copy is being performed in order "
189 "to properly read the content.",
190 readerArray.GetBranchName());
191 fCopyWarningPrinted = true;
192 }
193#else
194 (void)fCopyWarningPrinted;
195#endif
196 if (readerArraySize > 0) {
197 T rvec(readerArray.begin(), readerArray.end());
198 swap(fRVec, rvec);
199 } else {
200 T emptyVec{};
201 swap(fRVec, emptyVec);
202 }
203 }
204 return fRVec;
205
206 } else {
207 fCustomColumn->Update(fSlot, entry);
208 return fColumnKind == EColumnKind::kCustomColumn ? *fCustomValuePtr : **fDSValuePtr;
209 }
210 }
211
212 /// This overload covers the RVec<bool> case. In this case we always copy the contents of TTreeReaderArray<bool>
213 /// into RVec<bool> (never take a view into the memory buffer) because the underlying memory buffer might be the
214 /// one of a std::vector<bool>, which is not a contiguous slab of bool values.
215 /// Note that this also penalizes the case in which the column type is actually bool[], but the possible performance
216 /// gains in this edge case is probably not worth the extra complication required to differentiate the two cases.
217 template <typename U = T,
218 typename std::enable_if<RColumnValue<U>::MustUseRVec_t::value && std::is_same<U, RVec<bool>>::value,
219 int>::type = 0>
220 T &Get(Long64_t entry)
221 {
222 if (fColumnKind == EColumnKind::kTree) {
223 auto &readerArray = *fTreeReader;
224 const auto readerArraySize = readerArray.GetSize();
225 if (readerArraySize > 0) {
226 // always perform a copy
227 T rvec(readerArray.begin(), readerArray.end());
228 swap(fRVec, rvec);
229 } else {
230 T emptyVec{};
231 swap(fRVec, emptyVec);
232 }
233 return fRVec;
234 } else {
235 // business as usual
236 fCustomColumn->Update(fSlot, entry);
237 return fColumnKind == EColumnKind::kCustomColumn ? *fCustomValuePtr : **fDSValuePtr;
238 }
239 }
240
241 void Reset()
242 {
243 // This method should by all means not be removed, together with all
244 // of its callers, otherwise a race condition takes place in which a
245 // TTreeReader and its TTreeReader{Value,Array}s could be deleted
246 // concurrently:
247 // - Thread #1) a task ends and pushes back processing slot
248 // - Thread #2) a task starts and overwrites thread-local TTreeReaderValues
249 // - Thread #1) first task deletes TTreeReader
250 // See https://github.com/root-project/root/commit/26e8ace6e47de6794ac9ec770c3bbff9b7f2e945
251 if (EColumnKind::kTree == fColumnKind) {
252 fTreeReader.reset();
253 }
254 }
255};
256
257// Some extern instaniations to speed-up compilation/interpretation time
258// These are not active if c++17 is enabled because of a bug in our clang
259// See ROOT-9499.
260#if __cplusplus < 201703L
261extern template class RColumnValue<int>;
262extern template class RColumnValue<unsigned int>;
263extern template class RColumnValue<char>;
264extern template class RColumnValue<unsigned char>;
265extern template class RColumnValue<float>;
266extern template class RColumnValue<double>;
267extern template class RColumnValue<Long64_t>;
268extern template class RColumnValue<ULong64_t>;
269extern template class RColumnValue<std::vector<int>>;
270extern template class RColumnValue<std::vector<unsigned int>>;
271extern template class RColumnValue<std::vector<char>>;
272extern template class RColumnValue<std::vector<unsigned char>>;
273extern template class RColumnValue<std::vector<float>>;
274extern template class RColumnValue<std::vector<double>>;
275extern template class RColumnValue<std::vector<Long64_t>>;
276extern template class RColumnValue<std::vector<ULong64_t>>;
277#endif
278
279template <typename T>
281};
282
283template <typename... BranchTypes>
284struct TRDFValueTuple<TypeList<BranchTypes...>> {
285 using type = std::tuple<RColumnValue<BranchTypes>...>;
286};
287
288template <typename BranchType>
290
291/// Clear the proxies of a tuple of RColumnValues
292template <typename ValueTuple, std::size_t... S>
294{
295 // hack to expand a parameter pack without c++17 fold expressions.
296 std::initializer_list<int> expander{(std::get<S>(values).Reset(), 0)...};
297 (void)expander; // avoid "unused variable" warnings
298}
299
300
301} // ns RDF
302} // ns Internal
303} // ns ROOT
304
305#endif // ROOT_RCOLUMNVALUE
ROOT::R::TRInterface & r
Definition: Object.C:4
long long Long64_t
Definition: RtypesCore.h:69
void Warning(const char *location, const char *msgfmt,...)
int type
Definition: TGX11.cxx:120
@ kUnknown
Definition: TStructNode.h:19
typedef void((*Func_t)())
@ kInvalid
Definition: TSystem.h:80
Helper class that updates and returns TTree branches as well as RDataFrame temporary columns.
A "std::vector"-like collection of values implementing handy operation to analyse them.
Definition: RVec.hxx:221
An interface for reading values stored in ROOT columnar datasets.
A simple, robust and fast interface to read values from ROOT colmnar datasets such as TTree,...
Definition: TTreeReader.h:44
void swap(TDirectoryEntry &e1, TDirectoryEntry &e2) noexcept
std::string TypeID2TypeName(const std::type_info &id)
Returns the name of a type starting from its type_info An empty string is returned in case of failure...
Definition: RDFUtils.cxx:82
void ResetRDFValueTuple(std::vector< RTypeErasedColumnValue > &values, std::index_sequence< S... >, ROOT::TypeTraits::TypeList< ColTypes... >)
This overload is specialized to act on RTypeErasedColumnValues instead of RColumnValues.
Definition: RAction.hxx:90
typename TRDFValueTuple< BranchType >::type RDFValueTuple_t
double T(double x)
Definition: ChebyshevPol.h:34
Namespace for new ROOT classes and functions.
Definition: StringConv.hxx:21
RooArgSet S(const RooAbsArg &v1)
std::tuple< RColumnValue< BranchTypes >... > type
Lightweight storage for a collection of types.
Definition: TypeTraits.hxx:27