Logo ROOT  
Reference Guide
 
Loading...
Searching...
No Matches
RNTupleProcessor.hxx
Go to the documentation of this file.
1/// \file ROOT/RNTupleProcessor.hxx
2/// \ingroup NTuple ROOT7
3/// \author Florine de Geus <florine.de.geus@cern.ch>
4/// \date 2024-03-26
5/// \warning This is part of the ROOT 7 prototype! It will change without notice. It might trigger earthquakes. Feedback
6/// is welcome!
7
8/*************************************************************************
9 * Copyright (C) 1995-2024, Rene Brun and Fons Rademakers. *
10 * All rights reserved. *
11 * *
12 * For the licensing terms see $ROOTSYS/LICENSE. *
13 * For the list of contributors see $ROOTSYS/README/CREDITS. *
14 *************************************************************************/
15
16#ifndef ROOT7_RNTupleProcessor
17#define ROOT7_RNTupleProcessor
18
19#include <ROOT/REntry.hxx>
20#include <ROOT/RError.hxx>
22#include <ROOT/RNTupleModel.hxx>
23#include <ROOT/RNTupleUtil.hxx>
24#include <ROOT/RPageStorage.hxx>
25#include <ROOT/RSpan.hxx>
26
27#include <memory>
28#include <string>
29#include <string_view>
30#include <vector>
31
32namespace ROOT {
33namespace Experimental {
34
35// clang-format off
36/**
37\class ROOT::Experimental::RNTupleProcessor
38\ingroup NTuple
39\brief Interface for iterating over entries of RNTuples and vertically concatenated RNTuples (chains).
40
41Example usage (see ntpl012_processor.C for a full example):
42
43~~~{.cpp}
44#include <ROOT/RNTupleProcessor.hxx>
45using ROOT::Experimental::RNTupleProcessor;
46using ROOT::Experimental::RNTupleOpenSpec;
47
48std::vector<RNTupleOpenSpec> ntuples = {{"ntuple1", "ntuple1.root"}, {"ntuple2", "ntuple2.root"}};
49auto processor = RNTupleProcessor::CreateChain(ntuples);
50
51for (const auto &entry : processor) {
52 std::cout << "pt = " << *entry.GetPtr<float>("pt") << std::endl;
53}
54~~~
55
56An RNTupleProcessor is created by providing one or more RNTupleOpenSpecs, each of which contains the name and storage
57location of a single RNTuple. The RNTuples are processed in the order in which they were provided.
58
59The RNTupleProcessor constructor also (optionally) accepts an RNTupleModel, which determines which fields should be
60read. If no model is provided, a default model based on the descriptor of the first specified RNTuple will be used.
61If a field that was present in the first RNTuple is not found in a subsequent one, an error will be thrown.
62
63The RNTupleProcessor provides an iterator which gives access to the REntry containing the field data for the current
64entry. Additional bookkeeping information can be obtained through the RNTupleProcessor itself.
65*/
66// clang-format on
68protected:
69 // clang-format off
70 /**
71 \class ROOT::Experimental::RNTupleProcessor::RFieldContext
72 \ingroup NTuple
73 \brief Manager for a field as part of the RNTupleProcessor.
74
75 An RFieldContext contains two fields: a proto-field which is not connected to any page source but serves as the
76 blueprint for this particular field, and a concrete field that is connected to the page source currently connected
77 to the RNTupleProcessor for reading. When a new page source is connected, the current concrete field gets reset. A
78 new concrete field that is connected to this new page source is subsequently created from the proto-field.
79 */
80 // clang-format on
82 friend class RNTupleProcessor;
84
85 private:
86 std::unique_ptr<RFieldBase> fProtoField;
87 std::unique_ptr<RFieldBase> fConcreteField;
89
90 public:
91 RFieldContext(std::unique_ptr<RFieldBase> protoField, REntry::RFieldToken token)
93 {
94 }
95
96 const RFieldBase &GetProtoField() const { return *fProtoField; }
97 /// We need to disconnect the concrete fields before swapping the page sources
99 void SetConcreteField() { fConcreteField = fProtoField->Clone(fProtoField->GetFieldName()); }
100 };
101
102 std::vector<RNTupleOpenSpec> fNTuples;
103 std::unique_ptr<REntry> fEntry;
104 std::unique_ptr<Internal::RPageSource> fPageSource;
105 std::vector<RFieldContext> fFieldContexts;
106
107 NTupleSize_t fNEntriesProcessed; //< Total number of entries processed so far
108 std::size_t fCurrentNTupleNumber; //< Index of the currently open RNTuple
109 NTupleSize_t fLocalEntryNumber; //< Entry number within the current ntuple
110
111 /////////////////////////////////////////////////////////////////////////////
112 /// \brief Connect an RNTuple for processing.
113 ///
114 /// \param[in] ntuple The RNTupleOpenSpec describing the RNTuple to connect.
115 ///
116 /// \return The number of entries in the newly-connected RNTuple.
117 ///
118 /// Creates and attaches new page source for the specified RNTuple, and connects the fields that are known by
119 /// the processor to it.
121
122 /////////////////////////////////////////////////////////////////////////////
123 /// \brief Creates and connects concrete fields to the current page source, based on the proto-fields.
124 virtual void ConnectFields() = 0;
125
126 //////////////////////////////////////////////////////////////////////////
127 /// \brief Advance the processor to the next available entry.
128 ///
129 /// \return The new (global) entry number of after advancing, or kInvalidNTupleIndex if the last entry has been
130 /// processed.
131 ///
132 /// Checks if the end of the currently connected RNTuple is reached. If this is the case, either the next RNTuple
133 /// is connected or the iterator has reached the end.
134 virtual NTupleSize_t Advance() = 0;
135
136 RNTupleProcessor(const std::vector<RNTupleOpenSpec> &ntuples)
138 {
139 }
140
141public:
146 virtual ~RNTupleProcessor() = default;
147
148 /////////////////////////////////////////////////////////////////////////////
149 /// \brief Get the total number of entries processed so far.
150 ///
151 /// When only one RNTuple is present in the processor chain, the return value is equal to GetLocalEntryNumber.
153
154 /////////////////////////////////////////////////////////////////////////////
155 /// \brief Get the index to the RNTuple currently being processed, according to the sources specified upon creation.
156 std::size_t GetCurrentNTupleNumber() const { return fCurrentNTupleNumber; }
157
158 /////////////////////////////////////////////////////////////////////////////
159 /// \brief Get the entry number local to the RNTuple that is currently being processed.
160 ///
161 /// When only one RNTuple is present in the processor chain, the return value is equal to GetGlobalEntryNumber.
163
164 /////////////////////////////////////////////////////////////////////////////
165 /// \brief Returns a reference to the entry used by the processor.
166 ///
167 /// \return A reference to the entry used by the processor.
168 ///
169 const REntry &GetEntry() const { return *fEntry; }
170
171 // clang-format off
172 /**
173 \class ROOT::Experimental::RNTupleProcessor::RIterator
174 \ingroup NTuple
175 \brief Iterator over the entries of an RNTuple, or vertical concatenation thereof.
176 */
177 // clang-format on
178 class RIterator {
179 private:
182
183 public:
184 using iterator_category = std::forward_iterator_tag;
187 using difference_type = std::ptrdiff_t;
188 using pointer = REntry *;
189 using reference = const REntry &;
190
195
197 {
199 return *this;
200 }
201
203 {
204 auto obj = *this;
206 return obj;
207 }
208
214
215 friend bool operator!=(const iterator &lh, const iterator &rh)
216 {
217 return lh.fNEntriesProcessed != rh.fNEntriesProcessed;
218 }
219 friend bool operator==(const iterator &lh, const iterator &rh)
220 {
221 return lh.fNEntriesProcessed == rh.fNEntriesProcessed;
222 }
223 };
224
225 RIterator begin() { return RIterator(*this, 0); }
227
228 /////////////////////////////////////////////////////////////////////////////
229 /// \brief Create a new RNTuple processor chain for vertical concatenation of RNTuples.
230 ///
231 /// \param[in] ntuples A list specifying the names and locations of the ntuples to process.
232 /// \param[in] model An RNTupleModel specifying which fields can be read by the processor. If no model is provided,
233 /// one will be created based on the descriptor of the first ntuple specified.
234 ///
235 /// \return A pointer to the newly created RNTupleProcessor.
236 static std::unique_ptr<RNTupleProcessor>
237 CreateChain(const std::vector<RNTupleOpenSpec> &ntuples, std::unique_ptr<RNTupleModel> model = nullptr);
238};
239
240// clang-format off
241/**
242\class ROOT::Experimental::RNTupleChainProcessor
243\ingroup NTuple
244\brief Processor specializiation for vertically concatenated RNTuples (chains).
245*/
246// clang-format on
248 friend class RNTupleProcessor;
249
250private:
252 void ConnectFields() final;
254
255 /////////////////////////////////////////////////////////////////////////////
256 /// \brief Constructs a new RNTupleChainProcessor.
257 ///
258 /// \param[in] ntuples The source specification (name and storage location) for each RNTuple to process.
259 /// \param[in] model The model that specifies which fields should be read by the processor. The pointer returned by
260 /// RNTupleModel::MakeField can be used to access a field's value during the processor iteration. When no model is
261 /// specified, it is created from the descriptor of the first RNTuple specified in `ntuples`.
262 ///
263 /// RNTuples are processed in the order in which they are specified.
264 RNTupleChainProcessor(const std::vector<RNTupleOpenSpec> &ntuples, std::unique_ptr<RNTupleModel> model = nullptr);
265};
266
267} // namespace Experimental
268} // namespace ROOT
269
270#endif // ROOT7_RNTupleProcessor
The field token identifies a (sub)field in this entry.
Definition REntry.hxx:61
The REntry is a collection of values in an ntuple corresponding to a complete row in the data set.
Definition REntry.hxx:51
A field translates read and write calls from/to underlying columns to/from tree values.
Processor specializiation for vertically concatenated RNTuples (chains).
void ConnectFields() final
Creates and connects concrete fields to the current page source, based on the proto-fields.
NTupleSize_t Advance() final
Advance the processor to the next available entry.
NTupleSize_t ConnectNTuple(const RNTupleOpenSpec &ntuple) final
Connect an RNTuple for processing.
The RNTupleModel encapulates the schema of an ntuple.
Manager for a field as part of the RNTupleProcessor.
void ResetConcreteField()
We need to disconnect the concrete fields before swapping the page sources.
RFieldContext(std::unique_ptr< RFieldBase > protoField, REntry::RFieldToken token)
Iterator over the entries of an RNTuple, or vertical concatenation thereof.
RIterator(RNTupleProcessor &processor, NTupleSize_t globalEntryNumber)
friend bool operator==(const iterator &lh, const iterator &rh)
friend bool operator!=(const iterator &lh, const iterator &rh)
Interface for iterating over entries of RNTuples and vertically concatenated RNTuples (chains).
virtual NTupleSize_t ConnectNTuple(const RNTupleOpenSpec &ntuple)=0
Connect an RNTuple for processing.
const REntry & GetEntry() const
Returns a reference to the entry used by the processor.
RNTupleProcessor(const std::vector< RNTupleOpenSpec > &ntuples)
NTupleSize_t GetNEntriesProcessed() const
Get the total number of entries processed so far.
RNTupleProcessor(RNTupleProcessor &&)=delete
std::vector< RFieldContext > fFieldContexts
std::size_t GetCurrentNTupleNumber() const
Get the index to the RNTuple currently being processed, according to the sources specified upon creat...
static std::unique_ptr< RNTupleProcessor > CreateChain(const std::vector< RNTupleOpenSpec > &ntuples, std::unique_ptr< RNTupleModel > model=nullptr)
Create a new RNTuple processor chain for vertical concatenation of RNTuples.
virtual void ConnectFields()=0
Creates and connects concrete fields to the current page source, based on the proto-fields.
RNTupleProcessor(const RNTupleProcessor &)=delete
RNTupleProcessor & operator=(RNTupleProcessor &&)=delete
virtual NTupleSize_t Advance()=0
Advance the processor to the next available entry.
std::unique_ptr< Internal::RPageSource > fPageSource
NTupleSize_t GetLocalEntryNumber() const
Get the entry number local to the RNTuple that is currently being processed.
std::vector< RNTupleOpenSpec > fNTuples
RNTupleProcessor & operator=(const RNTupleProcessor &)=delete
constexpr NTupleSize_t kInvalidNTupleIndex
std::uint64_t NTupleSize_t
Integer type long enough to hold the maximum number of entries in a column.
tbb::task_arena is an alias of tbb::interface7::task_arena, which doesn't allow to forward declare tb...
Used to specify the underlying RNTuples in RNTupleProcessor and RNTupleReader::OpenFriends()