Logo ROOT  
Reference Guide
 
Loading...
Searching...
No Matches
RNTupleProcessor.hxx
Go to the documentation of this file.
1/// \file ROOT/RNTupleProcessor.hxx
2/// \ingroup NTuple ROOT7
3/// \author Florine de Geus <florine.de.geus@cern.ch>
4/// \date 2024-03-26
5/// \warning This is part of the ROOT 7 prototype! It will change without notice. It might trigger earthquakes. Feedback
6/// is welcome!
7
8/*************************************************************************
9 * Copyright (C) 1995-2024, Rene Brun and Fons Rademakers. *
10 * All rights reserved. *
11 * *
12 * For the licensing terms see $ROOTSYS/LICENSE. *
13 * For the list of contributors see $ROOTSYS/README/CREDITS. *
14 *************************************************************************/
15
16#ifndef ROOT7_RNTupleProcessor
17#define ROOT7_RNTupleProcessor
18
19#include <ROOT/REntry.hxx>
20#include <ROOT/RError.hxx>
22#include <ROOT/RNTupleIndex.hxx>
23#include <ROOT/RNTupleModel.hxx>
24#include <ROOT/RNTupleUtil.hxx>
25#include <ROOT/RPageStorage.hxx>
26
27#include <memory>
28#include <string>
29#include <string_view>
30#include <vector>
31
32namespace ROOT {
33namespace Experimental {
34
35// clang-format off
36/**
37\class ROOT::Experimental::RNTupleProcessor
38\ingroup NTuple
39\brief Interface for iterating over entries of RNTuples and vertically concatenated RNTuples (chains).
40
41Example usage (see ntpl012_processor.C for a full example):
42
43~~~{.cpp}
44#include <ROOT/RNTupleProcessor.hxx>
45using ROOT::Experimental::RNTupleProcessor;
46using ROOT::Experimental::RNTupleOpenSpec;
47
48std::vector<RNTupleOpenSpec> ntuples = {{"ntuple1", "ntuple1.root"}, {"ntuple2", "ntuple2.root"}};
49auto processor = RNTupleProcessor::CreateChain(ntuples);
50
51for (const auto &entry : processor) {
52 std::cout << "pt = " << *entry.GetPtr<float>("pt") << std::endl;
53}
54~~~
55
56An RNTupleProcessor is created by providing one or more RNTupleOpenSpecs, each of which contains the name and storage
57location of a single RNTuple. The RNTuples are processed in the order in which they were provided.
58
59The RNTupleProcessor constructor also (optionally) accepts an RNTupleModel, which determines which fields should be
60read. If no model is provided, a default model based on the descriptor of the first specified RNTuple will be used.
61If a field that was present in the first RNTuple is not found in a subsequent one, an error will be thrown.
62
63The RNTupleProcessor provides an iterator which gives access to the REntry containing the field data for the current
64entry. Additional bookkeeping information can be obtained through the RNTupleProcessor itself.
65*/
66// clang-format on
68protected:
69 // clang-format off
70 /**
71 \class ROOT::Experimental::RNTupleProcessor::RFieldContext
72 \ingroup NTuple
73 \brief Manager for a field as part of the RNTupleProcessor.
74
75 An RFieldContext contains two fields: a proto-field which is not connected to any page source but serves as the
76 blueprint for this particular field, and a concrete field that is connected to the page source currently connected
77 to the RNTupleProcessor for reading. When a new page source is connected, the current concrete field gets reset. A
78 new concrete field that is connected to this new page source is subsequently created from the proto-field.
79 */
80 // clang-format on
82 friend class RNTupleProcessor;
86
87 private:
88 std::unique_ptr<RFieldBase> fProtoField;
89 std::unique_ptr<RFieldBase> fConcreteField;
91 // Which RNTuple the field belongs to, in case the field belongs to an auxiliary RNTuple, according to the order
92 // in which it was specified. For chained RNTuples, this value will always be 0.
93 std::size_t fNTupleIdx;
94
95 public:
96 RFieldContext(std::unique_ptr<RFieldBase> protoField, REntry::RFieldToken token, std::size_t ntupleIdx = 0)
97 : fProtoField(std::move(protoField)), fToken(token), fNTupleIdx(ntupleIdx)
98 {
99 }
100
101 const RFieldBase &GetProtoField() const { return *fProtoField; }
102 /// Concrete pages need to be reset explicitly before the page source they belong to is destroyed.
104 void SetConcreteField() { fConcreteField = fProtoField->Clone(fProtoField->GetFieldName()); }
105 bool IsAuxiliary() const { return fNTupleIdx > 0; }
106 };
107
108 std::vector<RNTupleOpenSpec> fNTuples;
109 std::unique_ptr<REntry> fEntry;
110 std::unique_ptr<Internal::RPageSource> fPageSource;
111 // Maps the (qualified) field name to its corresponding field context.
112 std::unordered_map<std::string, RFieldContext> fFieldContexts;
113
114 NTupleSize_t fNEntriesProcessed; //< Total number of entries processed so far
115 std::size_t fCurrentNTupleNumber; //< Index of the currently open RNTuple
116 NTupleSize_t fLocalEntryNumber; //< Entry number within the current ntuple
117
118 /////////////////////////////////////////////////////////////////////////////
119 /// \brief Creates and connects a concrete field to the current page source, based on its proto field.
120 void ConnectField(RFieldContext &fieldContext, Internal::RPageSource &pageSource, REntry &entry);
121
122 //////////////////////////////////////////////////////////////////////////
123 /// \brief Advance the processor to the next available entry.
124 ///
125 /// \return The number of the entry loaded after advancing, or kInvalidNTupleIndex if there was no entry to advance
126 /// to.
127 ///
128 /// Checks if the end of the currently connected RNTuple is reached. If this is the case, either the next RNTuple
129 /// is connected or the iterator has reached the end.
130 virtual NTupleSize_t Advance() = 0;
131
132 /////////////////////////////////////////////////////////////////////////////
133 /// \brief Fill the entry with values belonging to the current entry number.
134 virtual void LoadEntry() = 0;
135
136 /////////////////////////////////////////////////////////////////////////////
137 /// \brief Set the local (i.e. relative to the page source currently openend) entry number. Used by
138 /// `RNTupleProcessor::RIterator`.
139 ///
140 /// \param[in] entryNumber
141 void SetLocalEntryNumber(NTupleSize_t entryNumber) { fLocalEntryNumber = entryNumber; }
142
143 RNTupleProcessor(const std::vector<RNTupleOpenSpec> &ntuples)
145 {
146 }
147
148public:
153 virtual ~RNTupleProcessor() = default;
154
155 /////////////////////////////////////////////////////////////////////////////
156 /// \brief Get the total number of entries processed so far.
157 ///
158 /// When only one RNTuple is present in the processor chain, the return value is equal to GetLocalEntryNumber.
160
161 /////////////////////////////////////////////////////////////////////////////
162 /// \brief Get the index to the RNTuple currently being processed, according to the sources specified upon creation.
163 std::size_t GetCurrentNTupleNumber() const { return fCurrentNTupleNumber; }
164
165 /////////////////////////////////////////////////////////////////////////////
166 /// \brief Get the entry number local to the RNTuple that is currently being processed.
167 ///
168 /// When only one RNTuple is present in the processor chain, the return value is equal to GetGlobalEntryNumber.
170
171 /////////////////////////////////////////////////////////////////////////////
172 /// \brief Returns a reference to the entry used by the processor.
173 ///
174 /// \return A reference to the entry used by the processor.
175 ///
176 const REntry &GetEntry() const { return *fEntry; }
177
178 // clang-format off
179 /**
180 \class ROOT::Experimental::RNTupleProcessor::RIterator
181 \ingroup NTuple
182 \brief Iterator over the entries of an RNTuple, or vertical concatenation thereof.
183 */
184 // clang-format on
185 class RIterator {
186 private:
189
190 public:
191 using iterator_category = std::forward_iterator_tag;
194 using difference_type = std::ptrdiff_t;
195 using pointer = REntry *;
196 using reference = const REntry &;
197
198 RIterator(RNTupleProcessor &processor, NTupleSize_t entryNumber)
199 : fProcessor(processor), fCurrentEntryNumber(entryNumber)
200 {
201 // This constructor is called with kInvalidNTupleIndex for RNTupleProcessor::end(). In that case, we already
202 // know there is nothing to advance to.
206 }
207 }
208
210 {
213 return *this;
214 }
215
217 {
218 auto obj = *this;
219 ++(*this);
220 return obj;
221 }
222
224 {
226 return *fProcessor.fEntry;
227 }
228
229 friend bool operator!=(const iterator &lh, const iterator &rh)
230 {
232 }
233 friend bool operator==(const iterator &lh, const iterator &rh)
234 {
236 }
237 };
238
239 RIterator begin() { return RIterator(*this, 0); }
241
242 static std::unique_ptr<RNTupleProcessor> Create(const RNTupleOpenSpec &ntuple);
243 static std::unique_ptr<RNTupleProcessor> Create(const RNTupleOpenSpec &ntuple, RNTupleModel &model);
244
245 /////////////////////////////////////////////////////////////////////////////
246 /// \brief Create a new RNTuple processor chain for vertical concatenation of RNTuples.
247 ///
248 /// \param[in] ntuples A list specifying the names and locations of the ntuples to process.
249 /// \param[in] model An RNTupleModel specifying which fields can be read by the processor. If no model is provided,
250 /// one will be created based on the descriptor of the first ntuple specified.
251 ///
252 /// \return A pointer to the newly created RNTupleProcessor.
253 static std::unique_ptr<RNTupleProcessor>
254 CreateChain(const std::vector<RNTupleOpenSpec> &ntuples, std::unique_ptr<RNTupleModel> model = nullptr);
255
256 /////////////////////////////////////////////////////////////////////////////
257 /// \brief Create a new RNTuple processor for horizontallly concatenated RNTuples.
258 ///
259 /// \param[in] ntuples A list specifying the names and locations of the ntuples to process. The first ntuple in the
260 /// list will be considered the primary ntuple and drives the processor iteration loop. Subsequent ntuples are
261 /// considered auxiliary, whose entries to be read are determined by the primary ntuple (which does not necessarily
262 /// have to be sequential).
263 /// \param[in] joinFields The names of the fields on which to join, in case the specified ntuples are unaligned.
264 /// The join is made based on the combined join field values, and therefore each field has to be present in each
265 /// specified RNTuple. If an empty list is provided, it is assumed that the specified ntuple are fully aligned, and
266 /// `RNTupleIndex` will not be used.
267 /// \param[in] models A list of models for the ntuples. This list must either contain a model for each ntuple in
268 /// `ntuples` (following the specification order), or be empty. When the list is empty, the default model (i.e.
269 /// containing all fields) will be used for each ntuple.
270 ///
271 /// \return A pointer to the newly created RNTupleProcessor.
272 static std::unique_ptr<RNTupleProcessor> CreateJoin(const std::vector<RNTupleOpenSpec> &ntuples,
273 const std::vector<std::string> &joinFields,
274 std::vector<std::unique_ptr<RNTupleModel>> models = {});
275};
276
277// clang-format off
278/**
279\class ROOT::Experimental::RNTupleSingleProcessor
280\ingroup NTuple
281\brief Processor specializiation for processing a single RNTuple.
282*/
283// clang-format on
285 friend class RNTupleProcessor;
286
287private:
288 /////////////////////////////////////////////////////////////////////////////
289 /// \brief Constructs a new RNTupleProcessor for processing a single RNTuple.
290 ///
291 /// \param[in] ntuple The source specification (name and storage location) for the RNTuple to process.
292 /// \param[in] model The model that specifies which fields should be read by the processor.
294
295 NTupleSize_t Advance() final;
296
297public:
299};
300
301// clang-format off
302/**
303\class ROOT::Experimental::RNTupleChainProcessor
304\ingroup NTuple
305\brief Processor specializiation for vertically concatenated RNTuples (chains).
306*/
307// clang-format on
309 friend class RNTupleProcessor;
310
311private:
312 NTupleSize_t Advance() final;
313 void LoadEntry() final { fEntry->Read(fLocalEntryNumber); }
314
315 /////////////////////////////////////////////////////////////////////////////
316 /// \brief Connect an RNTuple for processing.
317 ///
318 /// \param[in] ntuple The RNTupleOpenSpec describing the RNTuple to connect.
319 ///
320 /// \return The number of entries in the newly-connected RNTuple.
321 ///
322 /// Creates and attaches new page source for the specified RNTuple, and connects the fields that are known by
323 /// the processor to it.
325
326 /////////////////////////////////////////////////////////////////////////////
327 /// \brief Constructs a new RNTupleChainProcessor.
328 ///
329 /// \param[in] ntuples The source specification (name and storage location) for each RNTuple to process.
330 /// \param[in] model The model that specifies which fields should be read by the processor. The pointer returned by
331 /// RNTupleModel::MakeField can be used to access a field's value during the processor iteration. When no model is
332 /// specified, it is created from the descriptor of the first RNTuple specified in `ntuples`.
333 ///
334 /// RNTuples are processed in the order in which they are specified.
335 RNTupleChainProcessor(const std::vector<RNTupleOpenSpec> &ntuples, std::unique_ptr<RNTupleModel> model = nullptr);
336};
337
338// clang-format off
339/**
340\class ROOT::Experimental::RNTupleJoinProcessor
341\ingroup NTuple
342\brief Processor specializiation for horizontally concatenated RNTuples (joins).
343*/
344// clang-format on
346 friend class RNTupleProcessor;
347
348private:
349 std::unique_ptr<RNTupleModel> fJoinModel;
350 std::vector<std::unique_ptr<Internal::RPageSource>> fAuxiliaryPageSources;
351 /// Tokens representing the join fields present in the main RNTuple
352 std::vector<REntry::RFieldToken> fJoinFieldTokens;
353 std::vector<std::unique_ptr<Internal::RNTupleIndex>> fJoinIndices;
354
355 bool IsUsingIndex() const { return fJoinIndices.size() > 0; }
356
357 NTupleSize_t Advance() final;
358
359 /////////////////////////////////////////////////////////////////////////////
360 /// \brief Fill the entry with values belonging to the current entry number of the primary RNTuple.
361 void LoadEntry() final;
362
363 /////////////////////////////////////////////////////////////////////////////
364 /// \brief Constructs a new RNTupleJoinProcessor.
365 ///
366 /// \param[in] mainNTuple The source specification (name and storage location) of the primary RNTuple.
367 /// \param[in] model The model that specifies which fields should be read by the processor. The pointer returned by
368 /// RNTupleModel::MakeField can be used to access a field's value during the processor iteration. When no model is
369 /// specified, it is created from the RNTuple's descriptor.
370 RNTupleJoinProcessor(const RNTupleOpenSpec &mainNTuple, std::unique_ptr<RNTupleModel> model = nullptr);
371
372 /////////////////////////////////////////////////////////////////////////////
373 /// \brief Add an auxiliary RNTuple to the processor.
374 ///
375 /// \param[in] auxNTuple The source specification (name and storage location) of the auxiliary RNTuple.
376 /// \param[in] joinFields The names of the fields used in the join.
377 /// \param[in] model The model that specifies which fields should be read by the processor. The pointer returned by
378 /// RNTupleModel::MakeField can be used to access a field's value during the processor iteration. When no model is
379 /// specified, it is created from the RNTuple's descriptor.
380 void AddAuxiliary(const RNTupleOpenSpec &auxNTuple, const std::vector<std::string> &joinFields,
381 std::unique_ptr<RNTupleModel> model = nullptr);
382 void ConnectFields();
383
384 /////////////////////////////////////////////////////////////////////////////
385 /// \brief Populate fJoinFieldTokens with tokens for join fields belonging to the main RNTuple in the join model.
386 ///
387 /// \param[in] joinFields The names of the fields used in the join.
388 void SetJoinFieldTokens(const std::vector<std::string> &joinFields)
389 {
390 fJoinFieldTokens.reserve(joinFields.size());
391 for (const auto &fieldName : joinFields) {
392 fJoinFieldTokens.emplace_back(fEntry->GetToken(fieldName));
393 }
394 }
395
396public:
402 {
403 for (auto &[_, fieldContext] : fFieldContexts) {
404 fieldContext.ResetConcreteField();
405 }
406 }
407};
408
409} // namespace Experimental
410} // namespace ROOT
411
412#endif // ROOT7_RNTupleProcessor
#define _(A, B)
Definition cfortran.h:108
Abstract interface to read data from an ntuple.
The field token identifies a (sub)field in this entry.
Definition REntry.hxx:63
The REntry is a collection of values in an ntuple corresponding to a complete row in the data set.
Definition REntry.hxx:51
A field translates read and write calls from/to underlying columns to/from tree values.
Processor specializiation for vertically concatenated RNTuples (chains).
void LoadEntry() final
Fill the entry with values belonging to the current entry number.
NTupleSize_t Advance() final
Advance the processor to the next available entry.
NTupleSize_t ConnectNTuple(const RNTupleOpenSpec &ntuple)
Connect an RNTuple for processing.
Processor specializiation for horizontally concatenated RNTuples (joins).
RNTupleJoinProcessor(const RNTupleJoinProcessor &)=delete
RNTupleJoinProcessor(RNTupleJoinProcessor &&)=delete
void SetJoinFieldTokens(const std::vector< std::string > &joinFields)
Populate fJoinFieldTokens with tokens for join fields belonging to the main RNTuple in the join model...
std::vector< std::unique_ptr< Internal::RPageSource > > fAuxiliaryPageSources
std::unique_ptr< RNTupleModel > fJoinModel
void AddAuxiliary(const RNTupleOpenSpec &auxNTuple, const std::vector< std::string > &joinFields, std::unique_ptr< RNTupleModel > model=nullptr)
Add an auxiliary RNTuple to the processor.
NTupleSize_t Advance() final
Advance the processor to the next available entry.
void LoadEntry() final
Fill the entry with values belonging to the current entry number of the primary RNTuple.
std::vector< std::unique_ptr< Internal::RNTupleIndex > > fJoinIndices
RNTupleJoinProcessor operator=(const RNTupleJoinProcessor &)=delete
RNTupleJoinProcessor operator=(RNTupleJoinProcessor &&)=delete
std::vector< REntry::RFieldToken > fJoinFieldTokens
Tokens representing the join fields present in the main RNTuple.
The RNTupleModel encapulates the schema of an ntuple.
Manager for a field as part of the RNTupleProcessor.
RFieldContext(std::unique_ptr< RFieldBase > protoField, REntry::RFieldToken token, std::size_t ntupleIdx=0)
void ResetConcreteField()
Concrete pages need to be reset explicitly before the page source they belong to is destroyed.
Iterator over the entries of an RNTuple, or vertical concatenation thereof.
RIterator(RNTupleProcessor &processor, NTupleSize_t entryNumber)
friend bool operator==(const iterator &lh, const iterator &rh)
friend bool operator!=(const iterator &lh, const iterator &rh)
Interface for iterating over entries of RNTuples and vertically concatenated RNTuples (chains).
const REntry & GetEntry() const
Returns a reference to the entry used by the processor.
RNTupleProcessor(const std::vector< RNTupleOpenSpec > &ntuples)
virtual void LoadEntry()=0
Fill the entry with values belonging to the current entry number.
NTupleSize_t GetNEntriesProcessed() const
Get the total number of entries processed so far.
RNTupleProcessor(RNTupleProcessor &&)=delete
std::size_t GetCurrentNTupleNumber() const
Get the index to the RNTuple currently being processed, according to the sources specified upon creat...
static std::unique_ptr< RNTupleProcessor > CreateChain(const std::vector< RNTupleOpenSpec > &ntuples, std::unique_ptr< RNTupleModel > model=nullptr)
Create a new RNTuple processor chain for vertical concatenation of RNTuples.
std::unordered_map< std::string, RFieldContext > fFieldContexts
void SetLocalEntryNumber(NTupleSize_t entryNumber)
Set the local (i.e.
static std::unique_ptr< RNTupleProcessor > CreateJoin(const std::vector< RNTupleOpenSpec > &ntuples, const std::vector< std::string > &joinFields, std::vector< std::unique_ptr< RNTupleModel > > models={})
Create a new RNTuple processor for horizontallly concatenated RNTuples.
static std::unique_ptr< RNTupleProcessor > Create(const RNTupleOpenSpec &ntuple)
RNTupleProcessor(const RNTupleProcessor &)=delete
RNTupleProcessor & operator=(RNTupleProcessor &&)=delete
void ConnectField(RFieldContext &fieldContext, Internal::RPageSource &pageSource, REntry &entry)
Creates and connects a concrete field to the current page source, based on its proto field.
virtual NTupleSize_t Advance()=0
Advance the processor to the next available entry.
std::unique_ptr< Internal::RPageSource > fPageSource
NTupleSize_t GetLocalEntryNumber() const
Get the entry number local to the RNTuple that is currently being processed.
std::vector< RNTupleOpenSpec > fNTuples
RNTupleProcessor & operator=(const RNTupleProcessor &)=delete
Processor specializiation for processing a single RNTuple.
void LoadEntry()
Fill the entry with values belonging to the current entry number.
NTupleSize_t Advance() final
Advance the processor to the next available entry.
std::uint64_t NTupleSize_t
Integer type long enough to hold the maximum number of entries in a column.
constexpr NTupleSize_t kInvalidNTupleIndex
tbb::task_arena is an alias of tbb::interface7::task_arena, which doesn't allow to forward declare tb...
Used to specify the underlying RNTuples in RNTupleProcessor.