Logo ROOT  
Reference Guide
RColumnValue.hxx
Go to the documentation of this file.
1// Author: Enrico Guiraud, Danilo Piparo CERN 09/2018
2
3/*************************************************************************
4 * Copyright (C) 1995-2018, Rene Brun and Fons Rademakers. *
5 * All rights reserved. *
6 * *
7 * For the licensing terms see $ROOTSYS/LICENSE. *
8 * For the list of contributors see $ROOTSYS/README/CREDITS. *
9 *************************************************************************/
10
11#ifndef ROOT_RCOLUMNVALUE
12#define ROOT_RCOLUMNVALUE
13
15#include <ROOT/RDF/Utils.hxx> // IsRVec_t, TypeID2TypeName
17#include <ROOT/RMakeUnique.hxx>
18#include <ROOT/RVec.hxx>
19#include <ROOT/TypeTraits.hxx> // TakeFirstParameter_t
20#include <RtypesCore.h>
21#include <TTreeReader.h>
22#include <TTreeReaderValue.h>
23#include <TTreeReaderArray.h>
24
25#include <cstring> // strcmp
26#include <initializer_list>
27#include <limits>
28#include <memory>
29#include <stdexcept>
30#include <string>
31#include <tuple>
32#include <type_traits>
33#include <vector>
34
35namespace ROOT {
36namespace Internal {
37namespace RDF {
38using namespace ROOT::VecOps;
39
40/**
41\class ROOT::Internal::RDF::RColumnValue
42\ingroup dataframe
43\brief Helper class that updates and returns TTree branches as well as RDataFrame temporary columns
44\tparam T The type of the column
45
46RDataFrame nodes must access two different types of values during the event loop:
47values of real branches, for which TTreeReader{Values,Arrays} act as proxies, or
48temporary columns whose values are generated on the fly. While the type of the
49value is known at compile time (or just-in-time), it is only at runtime that nodes
50can check whether a certain value is generated on the fly or not.
51
52RColumnValue abstracts this difference by providing the same interface for
53both cases and handling the reading or generation of new values transparently.
54Only one of the two data members fReaderProxy or fValuePtr will be non-null
55for a given RColumnValue, depending on whether the value comes from a real
56TTree branch or from a temporary column respectively.
57
58RDataFrame nodes can store tuples of RColumnValues and retrieve an updated
59value for the column via the `Get` method.
60**/
61template <typename T>
62class R__CLING_PTRCHECK(off) RColumnValue {
63// R__CLING_PTRCHECK is disabled because all pointers are hand-crafted by RDF.
64
65 using MustUseRVec_t = IsRVec_t<T>;
66
67 // ColumnValue_t is the type of the column or the type of the elements of an array column
68 using ColumnValue_t = typename std::conditional<MustUseRVec_t::value, TakeFirstParameter_t<T>, T>::type;
69 using TreeReader_t = typename std::conditional<MustUseRVec_t::value, TTreeReaderArray<ColumnValue_t>,
71
72 /// RColumnValue has a slightly different behaviour whether the column comes from a TTreeReader, a RDataFrame Define
73 /// or a RDataSource. It stores which it is as an enum.
74 enum class EColumnKind { kTree, kCustomColumn, kDataSource, kInvalid };
75 // Set to the correct value by MakeProxy or SetTmpColumn
77 /// The slot this value belongs to. Only needed when querying custom column values, it is set in `SetTmpColumn`.
78 unsigned int fSlot = std::numeric_limits<unsigned int>::max();
79
80 // Each element of the following stacks will be in use by a _single task_.
81 // Each task will push one element when it starts and pop it when it ends.
82 // Stacks will typically be very small (1-2 elements typically) and will only grow over size 1 in case of interleaved
83 // task execution i.e. when more than one task needs readers in this worker thread.
84
85 /// Owning ptrs to a TTreeReaderValue or TTreeReaderArray. Only used for Tree columns.
86 std::unique_ptr<TreeReader_t> fTreeReader;
87 /// Non-owning ptrs to the value of a custom column.
89 /// Non-owning ptrs to the value of a data-source column.
91 /// Non-owning ptrs to the node responsible for the custom column. Needed when querying custom values.
93 /// Enumerator for the different properties of the branch storage in memory
94 enum class EStorageType : char { kContiguous, kUnknown, kSparse };
95 /// Signal whether we ever checked that the branch we are reading with a TTreeReaderArray stores array elements
96 /// in contiguous memory. Only used when T == RVec<U>.
98 /// If MustUseRVec, i.e. we are reading an array, we return a reference to this RVec to clients
100 bool fCopyWarningPrinted = false;
101
102public:
104
105 void SetTmpColumn(unsigned int slot, RCustomColumnBase *customColumn)
106 {
107 fCustomColumn = customColumn;
108 // Here we compare names and not typeinfos since they may come from two different contexts: a compiled
109 // and a jitted one.
110 const auto diffTypes = (0 != std::strcmp(customColumn->GetTypeId().name(), typeid(T).name()));
111 auto inheritedType = [&](){
112 auto colTClass = TClass::GetClass(customColumn->GetTypeId());
113 return colTClass && colTClass->InheritsFrom(TClass::GetClass<T>());
114 };
115
116 if (diffTypes && !inheritedType()) {
117 const auto tName = TypeID2TypeName(typeid(T));
118 const auto colTypeName = TypeID2TypeName(customColumn->GetTypeId());
119 std::string errMsg = "RColumnValue: type specified for column \"" +
120 customColumn->GetName() + "\" is ";
121 if (tName.empty()) {
122 errMsg += typeid(T).name();
123 errMsg += " (extracted from type info)";
124 } else {
125 errMsg += tName;
126 }
127 errMsg += " but temporary column has type ";
128 if (colTypeName.empty()) {
129 auto &id = customColumn->GetTypeId();
130 errMsg += id.name();
131 errMsg += " (extracted from type info)";
132 } else {
133 errMsg += colTypeName;
134 }
135 throw std::runtime_error(errMsg);
136 }
137
138 if (customColumn->IsDataSourceColumn()) {
139 fColumnKind = EColumnKind::kDataSource;
140 fDSValuePtr = static_cast<T **>(customColumn->GetValuePtr(slot));
141 } else {
142 fColumnKind = EColumnKind::kCustomColumn;
143 fCustomValuePtr = static_cast<T *>(customColumn->GetValuePtr(slot));
144 }
145 fSlot = slot;
146 }
147
148 void MakeProxy(TTreeReader *r, const std::string &bn)
149 {
150 fColumnKind = EColumnKind::kTree;
151 fTreeReader = std::make_unique<TreeReader_t>(*r, bn.c_str());
152 }
153
154 /// This overload is used to return scalar quantities (i.e. types that are not read into a RVec)
155 // This method is executed inside the event-loop, many times per entry
156 // If need be, the if statement can be avoided using thunks
157 // (have both branches inside functions and have a pointer to the branch to be executed)
158 template <typename U = T, typename std::enable_if<!RColumnValue<U>::MustUseRVec_t::value, int>::type = 0>
159 T &Get(Long64_t entry)
160 {
161 if (fColumnKind == EColumnKind::kTree) {
162 return *(fTreeReader->Get());
163 } else {
164 fCustomColumn->Update(fSlot, entry);
165 return fColumnKind == EColumnKind::kCustomColumn ? *fCustomValuePtr : **fDSValuePtr;
166 }
167 }
168
169 /// This overload is used to return arrays (i.e. types that are read into a RVec).
170 /// In this case the returned T is always a RVec<ColumnValue_t>.
171 /// RVec<bool> is treated differently, in a separate overload.
172 template <typename U = T,
173 typename std::enable_if<RColumnValue<U>::MustUseRVec_t::value && !std::is_same<U, RVec<bool>>::value,
174 int>::type = 0>
175 T &Get(Long64_t entry)
176 {
177 if (fColumnKind == EColumnKind::kTree) {
178 auto &readerArray = *fTreeReader;
179 // We only use TTreeReaderArrays to read columns that users flagged as type `RVec`, so we need to check
180 // that the branch stores the array as contiguous memory that we can actually wrap in an `RVec`.
181 // Currently we need the first entry to have been loaded to perform the check
182 // TODO Move check to `MakeProxy` once Axel implements this kind of check in TTreeReaderArray using
183 // TBranchProxy
184
185 const auto arrSize = readerArray.GetSize();
186 if (EStorageType::kUnknown == fStorageType && arrSize > 1) {
187 // We can decide since the array is long enough
188 fStorageType = EStorageType::kContiguous;
189 for (auto i = 0u; i < arrSize - 1; ++i) {
190 if ((char *)&readerArray[i + 1] - (char *)&readerArray[i] != sizeof(typename U::value_type)) {
191 fStorageType = EStorageType::kSparse;
192 break;
193 }
194 }
195 }
196
197 const auto readerArraySize = readerArray.GetSize();
198 if (EStorageType::kContiguous == fStorageType ||
199 (EStorageType::kUnknown == fStorageType && readerArray.GetSize() < 2)) {
200 if (readerArraySize > 0) {
201 // trigger loading of the contents of the TTreeReaderArray
202 // the address of the first element in the reader array is not necessarily equal to
203 // the address returned by the GetAddress method
204 auto readerArrayAddr = &readerArray.At(0);
205 T rvec(readerArrayAddr, readerArraySize);
206 std::swap(fRVec, rvec);
207 } else {
208 T emptyVec{};
209 std::swap(fRVec, emptyVec);
210 }
211 } else {
212 // The storage is not contiguous or we don't know yet: we cannot but copy into the rvec
213#ifndef NDEBUG
214 if (!fCopyWarningPrinted) {
215 Warning("RColumnValue::Get",
216 "Branch %s hangs from a non-split branch. A copy is being performed in order "
217 "to properly read the content.",
218 readerArray.GetBranchName());
219 fCopyWarningPrinted = true;
220 }
221#else
222 (void)fCopyWarningPrinted;
223#endif
224 if (readerArraySize > 0) {
225 T rvec(readerArray.begin(), readerArray.end());
226 std::swap(fRVec, rvec);
227 } else {
228 T emptyVec{};
229 std::swap(fRVec, emptyVec);
230 }
231 }
232 return fRVec;
233
234 } else {
235 fCustomColumn->Update(fSlot, entry);
236 return fColumnKind == EColumnKind::kCustomColumn ? *fCustomValuePtr : **fDSValuePtr;
237 }
238 }
239
240 /// This overload covers the RVec<bool> case. In this case we always copy the contents of TTreeReaderArray<bool>
241 /// into RVec<bool> (never take a view into the memory buffer) because the underlying memory buffer might be the
242 /// one of a std::vector<bool>, which is not a contiguous slab of bool values.
243 /// Note that this also penalizes the case in which the column type is actually bool[], but the possible performance
244 /// gains in this edge case is probably not worth the extra complication required to differentiate the two cases.
245 template <typename U = T,
246 typename std::enable_if<RColumnValue<U>::MustUseRVec_t::value && std::is_same<U, RVec<bool>>::value,
247 int>::type = 0>
248 T &Get(Long64_t entry)
249 {
250 if (fColumnKind == EColumnKind::kTree) {
251 auto &readerArray = *fTreeReader;
252 const auto readerArraySize = readerArray.GetSize();
253 if (readerArraySize > 0) {
254 // always perform a copy
255 T rvec(readerArray.begin(), readerArray.end());
256 std::swap(fRVec, rvec);
257 } else {
258 T emptyVec{};
259 std::swap(fRVec, emptyVec);
260 }
261 return fRVec;
262 } else {
263 // business as usual
264 fCustomColumn->Update(fSlot, entry);
265 return fColumnKind == EColumnKind::kCustomColumn ? *fCustomValuePtr : **fDSValuePtr;
266 }
267 }
268
269 void Reset()
270 {
271 // This method should by all means not be removed, together with all
272 // of its callers, otherwise a race condition takes place in which a
273 // TTreeReader and its TTreeReader{Value,Array}s could be deleted
274 // concurrently:
275 // - Thread #1) a task ends and pushes back processing slot
276 // - Thread #2) a task starts and overwrites thread-local TTreeReaderValues
277 // - Thread #1) first task deletes TTreeReader
278 // See https://github.com/root-project/root/commit/26e8ace6e47de6794ac9ec770c3bbff9b7f2e945
279 if (EColumnKind::kTree == fColumnKind) {
280 fTreeReader.reset();
281 }
282 }
283};
284
285// Some extern instantiations to speed-up compilation/interpretation time
286// These are not active if c++17 is enabled because of a bug in our clang
287// See ROOT-9499.
288#if __cplusplus < 201703L
289extern template class RColumnValue<int>;
290extern template class RColumnValue<unsigned int>;
291extern template class RColumnValue<char>;
292extern template class RColumnValue<unsigned char>;
293extern template class RColumnValue<float>;
294extern template class RColumnValue<double>;
295extern template class RColumnValue<Long64_t>;
296extern template class RColumnValue<ULong64_t>;
297extern template class RColumnValue<std::vector<int>>;
298extern template class RColumnValue<std::vector<unsigned int>>;
299extern template class RColumnValue<std::vector<char>>;
300extern template class RColumnValue<std::vector<unsigned char>>;
301extern template class RColumnValue<std::vector<float>>;
302extern template class RColumnValue<std::vector<double>>;
303extern template class RColumnValue<std::vector<Long64_t>>;
304extern template class RColumnValue<std::vector<ULong64_t>>;
305#endif
306
307template <typename T>
309};
310
311template <typename... BranchTypes>
312struct TRDFValueTuple<TypeList<BranchTypes...>> {
313 using type = std::tuple<RColumnValue<BranchTypes>...>;
314};
315
316template <typename BranchType>
318
319/// Clear the proxies of a tuple of RColumnValues
320template <typename ValueTuple, std::size_t... S>
321void ResetRDFValueTuple(ValueTuple &values, std::index_sequence<S...>)
322{
323 // hack to expand a parameter pack without c++17 fold expressions.
324 std::initializer_list<int> expander{(std::get<S>(values).Reset(), 0)...};
325 (void)expander; // avoid "unused variable" warnings
326}
327
328
329} // ns RDF
330} // ns Internal
331} // ns ROOT
332
333#endif // ROOT_RCOLUMNVALUE
ROOT::R::TRInterface & r
Definition: Object.C:4
long long Long64_t
Definition: RtypesCore.h:71
void Warning(const char *location, const char *msgfmt,...)
char name[80]
Definition: TGX11.cxx:109
int type
Definition: TGX11.cxx:120
@ kUnknown
Definition: TStructNode.h:19
typedef void((*Func_t)())
@ kInvalid
Definition: TSystem.h:78
virtual void Update(unsigned int slot, Long64_t entry)=0
virtual void * GetValuePtr(unsigned int slot)=0
virtual const std::type_info & GetTypeId() const =0
Helper class that updates and returns TTree branches as well as RDataFrame temporary columns.
T * fCustomValuePtr
Non-owning ptrs to the value of a custom column.
T ** fDSValuePtr
Non-owning ptrs to the value of a data-source column.
void MakeProxy(TTreeReader *r, const std::string &bn)
std::unique_ptr< TreeReader_t > fTreeReader
Owning ptrs to a TTreeReaderValue or TTreeReaderArray. Only used for Tree columns.
typename std::conditional< MustUseRVec_t::value, TakeFirstParameter_t< T >, T >::type ColumnValue_t
EColumnKind
RColumnValue has a slightly different behaviour whether the column comes from a TTreeReader,...
T & Get(Long64_t entry)
This overload is used to return scalar quantities (i.e. types that are not read into a RVec)
typename std::conditional< MustUseRVec_t::value, TTreeReaderArray< ColumnValue_t >, TTreeReaderValue< ColumnValue_t > >::type TreeReader_t
EStorageType
Enumerator for the different properties of the branch storage in memory.
void SetTmpColumn(unsigned int slot, RCustomColumnBase *customColumn)
RCustomColumnBase * fCustomColumn
Non-owning ptrs to the node responsible for the custom column. Needed when querying custom values.
RVec< ColumnValue_t > fRVec
If MustUseRVec, i.e. we are reading an array, we return a reference to this RVec to clients.
static TClass * GetClass(const char *name, Bool_t load=kTRUE, Bool_t silent=kFALSE)
Static method returning pointer to TClass of the specified class name.
Definition: TClass.cxx:2948
An interface for reading values stored in ROOT columnar datasets.
A simple, robust and fast interface to read values from ROOT columnar datasets such as TTree,...
Definition: TTreeReader.h:43
void swap(RDirectoryEntry &e1, RDirectoryEntry &e2) noexcept
std::string TypeID2TypeName(const std::type_info &id)
Returns the name of a type starting from its type_info An empty string is returned in case of failure...
Definition: RDFUtils.cxx:84
void ResetRDFValueTuple(std::vector< RTypeErasedColumnValue > &values, std::index_sequence< S... >, ROOT::TypeTraits::TypeList< ColTypes... >)
This overload is specialized to act on RTypeErasedColumnValues instead of RColumnValues.
Definition: RAction.hxx:88
typename TRDFValueTuple< BranchType >::type RDFValueTuple_t
double T(double x)
Definition: ChebyshevPol.h:34
tbb::task_arena is an alias of tbb::interface7::task_arena, which doesn't allow to forward declare tb...
Definition: StringConv.hxx:21
RooArgSet S(const RooAbsArg &v1)
std::tuple< RColumnValue< BranchTypes >... > type
Lightweight storage for a collection of types.
Definition: TypeTraits.hxx:25