Logo ROOT  
Reference Guide
 
Loading...
Searching...
No Matches
RNTupleProcessor.hxx
Go to the documentation of this file.
1/// \file ROOT/RNTupleProcessor.hxx
2/// \ingroup NTuple ROOT7
3/// \author Florine de Geus <florine.de.geus@cern.ch>
4/// \date 2024-03-26
5/// \warning This is part of the ROOT 7 prototype! It will change without notice. It might trigger earthquakes. Feedback
6/// is welcome!
7
8/*************************************************************************
9 * Copyright (C) 1995-2024, Rene Brun and Fons Rademakers. *
10 * All rights reserved. *
11 * *
12 * For the licensing terms see $ROOTSYS/LICENSE. *
13 * For the list of contributors see $ROOTSYS/README/CREDITS. *
14 *************************************************************************/
15
16#ifndef ROOT7_RNTupleProcessor
17#define ROOT7_RNTupleProcessor
18
19#include <ROOT/REntry.hxx>
20#include <ROOT/RError.hxx>
22#include <ROOT/RNTupleModel.hxx>
23#include <ROOT/RNTupleUtil.hxx>
24#include <ROOT/RPageStorage.hxx>
25#include <ROOT/RSpan.hxx>
26
27#include <memory>
28#include <string>
29#include <string_view>
30#include <vector>
31
32namespace ROOT {
33namespace Experimental {
34namespace Internal {
35
36/// Helper type representing the name and storage location of an RNTuple.
38 std::string fName;
39 std::string fLocation;
40
41 RNTupleSourceSpec() = default;
42 RNTupleSourceSpec(std::string_view n, std::string_view s) : fName(n), fLocation(s) {}
43};
44
45// clang-format off
46/**
47\class ROOT::Experimental::RNTupleProcessor
48\ingroup NTuple
49\brief Interface for iterating over entries of RNTuples and vertically concatenated RNTuples (chains).
50
51Example usage (see ntpl012_processor.C for a full example):
52
53~~~{.cpp}
54#include <ROOT/RNTupleProcessor.hxx>
55using ROOT::Experimental::RNTupleProcessor;
56using ROOT::Experimental::RNTupleSourceSpec;
57
58std::vector<RNTupleSourceSpec> ntuples = {{"ntuple1", "ntuple1.root"}, {"ntuple2", "ntuple2.root"}};
59RNTupleProcessor processor(ntuples);
60auto ptrPt = processor.GetEntry().GetPtr<float>("pt");
61
62for (const auto &entry : processor) {
63 std::cout << "pt = " << *ptrPt << std::endl;
64}
65~~~
66
67An RNTupleProcessor is created by providing one or more RNTupleSourceSpecs, each of which contains the name and storage
68location of a single RNTuple. The RNTuples are processed in the order in which they were provided.
69
70The RNTupleProcessor constructor also (optionally) accepts an RNTupleModel, which determines which fields should be
71read. If no model is provided, a default model based on the descriptor of the first specified RNTuple will be used.
72If a field that was present in the first RNTuple is not found in a subsequent one, an error will be thrown.
73
74The object returned by the RNTupleProcessor iterator is a view on the current state of the processor, and provides
75access to the global entry index (i.e., the entry index taking into account all processed ntuples), local entry index
76(i.e. the entry index for only the currently processed ntuple), the index of the ntuple currently being processed (with
77respect to the order of provided RNTupleSpecs) and the actual REntry containing the values for the current entry.
78*/
79// clang-format on
81private:
82 // clang-format off
83 /**
84 \class ROOT::Experimental::RNTupleProcessor::RFieldContext
85 \ingroup NTuple
86 \brief Manager for a field as part of the RNTupleProcessor.
87
88 An RFieldContext contains two fields: a proto-field which is not connected to any page source but serves as the
89 blueprint for this particular field, and a concrete field that is connected to the page source currently connected
90 to the RNTupleProcessor for reading. When a new page source is connected, the current concrete field gets reset. A
91 new concrete field that is connected to this new page source is subsequently created from the proto-field.
92 */
93 // clang-format on
95 friend class RNTupleProcessor;
96
97 private:
98 std::unique_ptr<RFieldBase> fProtoField;
99 std::unique_ptr<RFieldBase> fConcreteField;
101
102 public:
103 RFieldContext(std::unique_ptr<RFieldBase> protoField, REntry::RFieldToken token)
104 : fProtoField(std::move(protoField)), fToken(token)
105 {
106 }
107
108 const RFieldBase &GetProtoField() const { return *fProtoField; }
109 /// We need to disconnect the concrete fields before swapping the page sources
111 void SetConcreteField() { fConcreteField = fProtoField->Clone(fProtoField->GetFieldName()); }
112 };
113
114 std::vector<RNTupleSourceSpec> fNTuples;
115 std::unique_ptr<REntry> fEntry;
116 std::unique_ptr<Internal::RPageSource> fPageSource;
117 std::vector<RFieldContext> fFieldContexts;
118
119 /////////////////////////////////////////////////////////////////////////////
120 /// \brief Connect an RNTuple for processing.
121 ///
122 /// \param[in] ntuple The RNTupleSourceSpec describing the RNTuple to connect.
123 ///
124 /// \return The number of entries in the newly-connected RNTuple.
125 ///
126 /// Creates and attaches new page source for the specified RNTuple, and connects the fields that are known by
127 /// the processor to it.
129
130 /////////////////////////////////////////////////////////////////////////////
131 /// \brief Creates and connects concrete fields to the current page source, based on the proto-fields.
132 void ConnectFields();
133
134public:
135 /////////////////////////////////////////////////////////////////////////////
136 /// \brief Returns a reference to the entry used by the processor.
137 ///
138 /// \return A reference to the entry used by the processor.
139 ///
140 const REntry &GetEntry() const { return *fEntry; }
141
142 // clang-format off
143 /**
144 \class ROOT::Experimental::RNTupleProcessor::RIterator
145 \ingroup NTuple
146 \brief Iterator over the entries of an RNTuple, or vertical concatenation thereof.
147 */
148 // clang-format on
149 class RIterator {
150 public:
151 // clang-format off
152 /**
153 \class ROOT::Experimental::RNTupleProcessor::RIterator::RProcessorState
154 \ingroup NTuple
155 \brief View on the RNTupleProcessor iterator state.
156 */
157 // clang-format on
159 friend class RIterator;
160
161 private:
165 /// Index of the currently open RNTuple in the chain of ntuples
166 std::size_t fNTupleIndex;
167
168 public:
169 RProcessorState(const REntry &entry, NTupleSize_t globalEntryIndex, NTupleSize_t localEntryIndex,
170 std::size_t ntupleIndex)
171 : fEntry(entry),
172 fGlobalEntryIndex(globalEntryIndex),
173 fLocalEntryIndex(localEntryIndex),
174 fNTupleIndex(ntupleIndex)
175 {
176 }
177
178 const REntry *operator->() const { return &fEntry; }
179 const REntry &GetEntry() const { return fEntry; }
182 std::size_t GetNTupleIndex() const { return fNTupleIndex; }
183 };
184
185 private:
188
189 public:
190 using iterator_category = std::forward_iterator_tag;
193 using difference_type = std::ptrdiff_t;
195 using reference = const RProcessorState &;
196
197 RIterator(RNTupleProcessor &processor, std::size_t ntupleIndex, NTupleSize_t globalEntryIndex)
198 : fProcessor(processor), fState(processor.GetEntry(), globalEntryIndex, 0, ntupleIndex)
199 {
200 }
201
202 //////////////////////////////////////////////////////////////////////////
203 /// \brief Increments the entry index.
204 ///
205 /// Checks if the end of the currently connected RNTuple is reached. If this is the case, either the next RNTuple
206 /// is connected or the iterator has reached the end.
207 void Advance()
208 {
210
211 if (++fState.fLocalEntryIndex >= fProcessor.fPageSource->GetNEntries()) {
212 do {
213 if (++fState.fNTupleIndex >= fProcessor.fNTuples.size()) {
215 return;
216 }
217 // Skip over empty ntuples we might encounter.
219
221 }
223 }
224
226 {
227 Advance();
228 return *this;
229 }
230
232 {
233 auto obj = *this;
234 Advance();
235 return obj;
236 }
237
239 {
241 return fState;
242 }
243
244 bool operator!=(const iterator &rh) const { return fState.fGlobalEntryIndex != rh.fState.fGlobalEntryIndex; }
245 bool operator==(const iterator &rh) const { return fState.fGlobalEntryIndex == rh.fState.fGlobalEntryIndex; }
246 };
247
248 /////////////////////////////////////////////////////////////////////////////
249 /// \brief Constructs a new RNTupleProcessor.
250 ///
251 /// \param[in] ntuples The source specification (name and storage location) for each RNTuple to process.
252 /// \param[in] model The model that specifies which fields should be read by the processor. The pointer returned by
253 /// RNTupleModel::MakeField can be used to access a field's value during the processor iteration. When no model is
254 /// specified, it is created from the descriptor of the first RNTuple specified in `ntuples`.
255 ///
256 /// RNTuples are processed in the order in which they are specified.
257 RNTupleProcessor(const std::vector<RNTupleSourceSpec> &ntuples, std::unique_ptr<RNTupleModel> model = nullptr);
258
259 RIterator begin() { return RIterator(*this, 0, 0); }
260 RIterator end() { return RIterator(*this, fNTuples.size(), kInvalidNTupleIndex); }
261};
262
263} // namespace Internal
264} // namespace Experimental
265} // namespace ROOT
266
267#endif // ROOT7_RNTupleProcessor
RFieldContext(std::unique_ptr< RFieldBase > protoField, REntry::RFieldToken token)
void ResetConcreteField()
We need to disconnect the concrete fields before swapping the page sources.
std::size_t fNTupleIndex
Index of the currently open RNTuple in the chain of ntuples.
RProcessorState(const REntry &entry, NTupleSize_t globalEntryIndex, NTupleSize_t localEntryIndex, std::size_t ntupleIndex)
RIterator(RNTupleProcessor &processor, std::size_t ntupleIndex, NTupleSize_t globalEntryIndex)
std::unique_ptr< Internal::RPageSource > fPageSource
NTupleSize_t ConnectNTuple(const RNTupleSourceSpec &ntuple)
Connect an RNTuple for processing.
void ConnectFields()
Creates and connects concrete fields to the current page source, based on the proto-fields.
const REntry & GetEntry() const
Returns a reference to the entry used by the processor.
The field token identifies a top-level field in this entry.
Definition REntry.hxx:59
The REntry is a collection of values in an ntuple corresponding to a complete row in the data set.
Definition REntry.hxx:50
A field translates read and write calls from/to underlying columns to/from tree values.
const Int_t n
Definition legend1.C:16
std::uint64_t NTupleSize_t
Integer type long enough to hold the maximum number of entries in a column.
constexpr NTupleSize_t kInvalidNTupleIndex
tbb::task_arena is an alias of tbb::interface7::task_arena, which doesn't allow to forward declare tb...
Helper type representing the name and storage location of an RNTuple.
RNTupleSourceSpec(std::string_view n, std::string_view s)