Logo ROOT  
Reference Guide
 
Loading...
Searching...
No Matches
RNTupleReader.hxx
Go to the documentation of this file.
1/// \file ROOT/RNTupleReader.hxx
2/// \ingroup NTuple ROOT7
3/// \author Jakob Blomer <jblomer@cern.ch>
4/// \date 2024-02-20
5/// \warning This is part of the ROOT 7 prototype! It will change without notice. It might trigger earthquakes. Feedback
6/// is welcome!
7
8/*************************************************************************
9 * Copyright (C) 1995-2024, Rene Brun and Fons Rademakers. *
10 * All rights reserved. *
11 * *
12 * For the licensing terms see $ROOTSYS/LICENSE. *
13 * For the list of contributors see $ROOTSYS/README/CREDITS. *
14 *************************************************************************/
15
16#ifndef ROOT7_RNTupleReader
17#define ROOT7_RNTupleReader
18
19#include <ROOT/RConfig.hxx> // for R__unlikely
20#include <ROOT/RError.hxx>
23#include <ROOT/RNTupleModel.hxx>
25#include <ROOT/RNTupleUtil.hxx>
26#include <ROOT/RNTupleView.hxx>
27#include <ROOT/RPageStorage.hxx>
28#include <ROOT/RSpan.hxx>
29
30#include <iostream>
31#include <iterator>
32#include <memory>
33#include <string>
34#include <string_view>
35
36namespace ROOT {
37namespace Experimental {
38
39class REntry;
40class RNTuple;
41
42/// Listing of the different options that can be printed by RNTupleReader::GetInfo()
43enum class ENTupleInfo {
44 kSummary, // The ntuple name, description, number of entries
45 kStorageDetails, // size on storage, page sizes, compression factor, etc.
46 kMetrics, // internals performance counters, requires that EnableMetrics() was called
47};
48
49// clang-format off
50/**
51\class ROOT::Experimental::RNTupleReader
52\ingroup NTuple
53\brief An RNTuple that is used to read data from storage
54
55An input ntuple provides data from storage as C++ objects. The ntuple model can be created from the data on storage
56or it can be imposed by the user. The latter case allows users to read into a specialized ntuple model that covers
57only a subset of the fields in the ntuple. The ntuple model is used when reading complete entries.
58Individual fields can be read as well by instantiating a tree view.
59
60~~~ {.cpp}
61#include <ROOT/RNTupleReader.hxx>
62using ROOT::Experimental::RNTupleReader;
63
64#include <iostream>
65
66auto ntuple = RNTupleReader::Open("myNTuple", "some/file.root");
67std::cout << "myNTuple has " << ntuple->GetNEntries() << " entries\n";
68~~~
69*/
70// clang-format on
72private:
73 /// Set as the page source's scheduler for parallel page decompression if IMT is on
74 /// Needs to be destructed after the pages source is destructed (an thus be declared before)
75 std::unique_ptr<Internal::RPageStorage::RTaskScheduler> fUnzipTasks;
76
77 std::unique_ptr<Internal::RPageSource> fSource;
78 /// Needs to be destructed before fSource
79 std::unique_ptr<RNTupleModel> fModel;
80 /// We use a dedicated on-demand reader for Show() and Scan(). Printing data uses all the fields
81 /// from the full model even if the analysis code uses only a subset of fields. The display reader
82 /// is a clone of the original reader.
83 std::unique_ptr<RNTupleReader> fDisplayReader;
84 /// The ntuple descriptor in the page source is protected by a read-write lock. We don't expose that to the
85 /// users of RNTupleReader::GetDescriptor(). Instead, if descriptor information is needed, we clone the
86 /// descriptor. Using the descriptor's generation number, we know if the cached descriptor is stale.
87 /// Retrieving descriptor data from an RNTupleReader is supposed to be for testing and information purposes,
88 /// not on a hot code path.
89 std::unique_ptr<RNTupleDescriptor> fCachedDescriptor;
91
92 RNTupleReader(std::unique_ptr<RNTupleModel> model, std::unique_ptr<Internal::RPageSource> source);
93 /// The model is generated from the ntuple metadata on storage.
94 explicit RNTupleReader(std::unique_ptr<Internal::RPageSource> source);
95
96 void ConnectModel(RNTupleModel &model);
98 void InitPageSource();
99
100 DescriptorId_t RetrieveFieldId(std::string_view fieldName) const;
101
102public:
103 // Browse through the entries
104 class RIterator {
105 private:
107
108 public:
110 using iterator_category = std::forward_iterator_tag;
115
116 RIterator() = default;
118 ~RIterator() = default;
119
120 iterator operator++(int) /* postfix */
121 {
122 auto r = *this;
123 fIndex++;
124 return r;
125 }
126 iterator &operator++() /* prefix */
127 {
128 ++fIndex;
129 return *this;
130 }
132 pointer operator->() { return &fIndex; }
133 bool operator==(const iterator &rh) const { return fIndex == rh.fIndex; }
134 bool operator!=(const iterator &rh) const { return fIndex != rh.fIndex; }
135 };
136
137 /// Used to specify the underlying RNTuples in OpenFriends()
138 struct ROpenSpec {
139 std::string fNTupleName;
140 std::string fStorage;
142
143 ROpenSpec() = default;
144 ROpenSpec(std::string_view n, std::string_view s) : fNTupleName(n), fStorage(s) {}
145 };
146
147 /// Open an RNTuple for reading.
148 ///
149 /// Throws an RException if there is no RNTuple with the given name.
150 ///
151 /// **Example: open an RNTuple and print the number of entries**
152 /// ~~~ {.cpp}
153 /// #include <ROOT/RNTupleReader.hxx>
154 /// using ROOT::Experimental::RNTupleReader;
155 ///
156 /// #include <iostream>
157 ///
158 /// auto ntuple = RNTupleReader::Open("myNTuple", "some/file.root");
159 /// std::cout << "myNTuple has " << ntuple->GetNEntries() << " entries\n";
160 /// ~~~
161 static std::unique_ptr<RNTupleReader> Open(std::string_view ntupleName, std::string_view storage,
162 const RNTupleReadOptions &options = RNTupleReadOptions());
163 static std::unique_ptr<RNTupleReader>
164 Open(RNTuple *ntuple, const RNTupleReadOptions &options = RNTupleReadOptions());
165 /// The caller imposes a model, which must be compatible with the model found in the data on storage.
166 static std::unique_ptr<RNTupleReader> Open(std::unique_ptr<RNTupleModel> model, std::string_view ntupleName,
167 std::string_view storage,
168 const RNTupleReadOptions &options = RNTupleReadOptions());
169 static std::unique_ptr<RNTupleReader>
170 Open(std::unique_ptr<RNTupleModel> model, RNTuple *ntuple, const RNTupleReadOptions &options = RNTupleReadOptions());
171 /// Open RNTuples as one virtual, horizontally combined ntuple. The underlying RNTuples must
172 /// have an identical number of entries. Fields in the combined RNTuple are named with the ntuple name
173 /// as a prefix, e.g. myNTuple1.px and myNTuple2.pt (see tutorial ntpl006_friends)
174 static std::unique_ptr<RNTupleReader> OpenFriends(std::span<ROpenSpec> ntuples);
175 std::unique_ptr<RNTupleReader> Clone()
176 {
177 return std::unique_ptr<RNTupleReader>(new RNTupleReader(fSource->Clone()));
178 }
180
181 NTupleSize_t GetNEntries() const { return fSource->GetNEntries(); }
182 const RNTupleModel &GetModel();
183
184 /// Returns a cached copy of the page source descriptor. The returned pointer remains valid until the next call
185 /// to LoadEntry or to any of the views returned from the reader.
187
188 /// Prints a detailed summary of the ntuple, including a list of fields.
189 ///
190 /// **Example: print summary information to stdout**
191 /// ~~~ {.cpp}
192 /// #include <ROOT/RNTupleReader.hxx>
193 /// using ROOT::Experimental::ENTupleInfo;
194 /// using ROOT::Experimental::RNTupleReader;
195 ///
196 /// #include <iostream>
197 ///
198 /// auto ntuple = RNTupleReader::Open("myNTuple", "some/file.root");
199 /// ntuple->PrintInfo();
200 /// // or, equivalently:
201 /// ntuple->PrintInfo(ENTupleInfo::kSummary, std::cout);
202 /// ~~~
203 /// **Example: print detailed column storage data to stderr**
204 /// ~~~ {.cpp}
205 /// #include <ROOT/RNTupleReader.hxx>
206 /// using ROOT::Experimental::ENTupleInfo;
207 /// using ROOT::Experimental::RNTupleReader;
208 ///
209 /// #include <iostream>
210 ///
211 /// auto ntuple = RNTupleReader::Open("myNTuple", "some/file.root");
212 /// ntuple->PrintInfo(ENTupleInfo::kStorageDetails, std::cerr);
213 /// ~~~
214 ///
215 /// For use of ENTupleInfo::kMetrics, see #EnableMetrics.
216 void PrintInfo(const ENTupleInfo what = ENTupleInfo::kSummary, std::ostream &output = std::cout);
217
218 /// Shows the values of the i-th entry/row, starting with 0 for the first entry. By default,
219 /// prints the output in JSON format.
220 /// Uses the visitor pattern to traverse through each field of the given entry.
221 void Show(NTupleSize_t index, std::ostream &output = std::cout);
222
223 /// Analogous to Fill(), fills the default entry of the model. Returns false at the end of the ntuple.
224 /// On I/O errors, raises an exception.
226 {
227 // TODO(jblomer): can be templated depending on the factory method / constructor
228 if (R__unlikely(!fModel)) {
229 fModel = fSource->GetSharedDescriptorGuard()->CreateModel();
231 }
232 LoadEntry(index, fModel->GetDefaultEntry());
233 }
234 /// Fills a user provided entry after checking that the entry has been instantiated from the ntuple model
235 void LoadEntry(NTupleSize_t index, REntry &entry) { entry.Read(index); }
236
237 /// Returns an iterator over the entry indices of the RNTuple.
238 ///
239 /// **Example: iterate over all entries and print each entry in JSON format**
240 /// ~~~ {.cpp}
241 /// #include <ROOT/RNTupleReader.hxx>
242 /// using ROOT::Experimental::ENTupleShowFormat;
243 /// using ROOT::Experimental::RNTupleReader;
244 ///
245 /// #include <iostream>
246 ///
247 /// auto ntuple = RNTupleReader::Open("myNTuple", "some/file.root");
248 /// for (auto i : ntuple->GetEntryRange()) {
249 /// ntuple->Show(i);
250 /// }
251 /// ~~~
253
254 /// Provides access to an individual field that can contain either a scalar value or a collection, e.g.
255 /// GetView<double>("particles.pt") or GetView<std::vector<double>>("particle"). It can as well be the index
256 /// field of a collection itself, like GetView<NTupleSize_t>("particle").
257 ///
258 /// Raises an exception if there is no field with the given name.
259 ///
260 /// **Example: iterate over a field named "pt" of type `float`**
261 /// ~~~ {.cpp}
262 /// #include <ROOT/RNTupleReader.hxx>
263 /// using ROOT::Experimental::RNTupleReader;
264 ///
265 /// #include <iostream>
266 ///
267 /// auto ntuple = RNTupleReader::Open("myNTuple", "some/file.root");
268 /// auto pt = ntuple->GetView<float>("pt");
269 ///
270 /// for (auto i : ntuple->GetEntryRange()) {
271 /// std::cout << i << ": " << pt(i) << "\n";
272 /// }
273 /// ~~~
274 template <typename T>
275 RNTupleView<T, false> GetView(std::string_view fieldName)
276 {
277 return GetView<T>(RetrieveFieldId(fieldName));
278 }
279
280 template <typename T>
281 RNTupleView<T, true> GetView(std::string_view fieldName, std::shared_ptr<T> objPtr)
282 {
283 return GetView<T>(RetrieveFieldId(fieldName), objPtr);
284 }
285
286 template <typename T>
288 {
289 return RNTupleView<T, false>(fieldId, fSource.get());
290 }
291
292 template <typename T>
293 RNTupleView<T, true> GetView(DescriptorId_t fieldId, std::shared_ptr<T> objPtr)
294 {
295 return RNTupleView<T, true>(fieldId, fSource.get(), objPtr);
296 }
297
298 /// Raises an exception if:
299 /// * there is no field with the given name or,
300 /// * the field is not a collection
301 RNTupleCollectionView GetCollectionView(std::string_view fieldName)
302 {
303 auto fieldId = fSource->GetSharedDescriptorGuard()->FindFieldId(fieldName);
304 if (fieldId == kInvalidDescriptorId) {
305 throw RException(R__FAIL("no field named '" + std::string(fieldName) + "' in RNTuple '" +
306 fSource->GetSharedDescriptorGuard()->GetName() + "'"));
307 }
308 return GetCollectionView(fieldId);
309 }
310
312 {
313 return RNTupleCollectionView(fieldId, fSource.get());
314 }
315
316 RIterator begin() { return RIterator(0); }
318
319 /// Enable performance measurements (decompression time, bytes read from storage, etc.)
320 ///
321 /// **Example: inspect the reader metrics after loading every entry**
322 /// ~~~ {.cpp}
323 /// #include <ROOT/RNTupleReader.hxx>
324 /// using ROOT::Experimental::ENTupleInfo;
325 /// using ROOT::Experimental::RNTupleReader;
326 ///
327 /// #include <iostream>
328 ///
329 /// auto ntuple = RNTupleReader::Open("myNTuple", "some/file.root");
330 /// // metrics must be turned on beforehand
331 /// ntuple->EnableMetrics();
332 ///
333 /// for (auto i : ntuple->GetEntryRange()) {
334 /// ntuple->LoadEntry(i);
335 /// }
336 /// ntuple->PrintInfo(ENTupleInfo::kMetrics);
337 /// ~~~
339 const Detail::RNTupleMetrics &GetMetrics() const { return fMetrics; }
340}; // class RNTupleReader
341
342} // namespace Experimental
343} // namespace ROOT
344
345#endif // ROOT7_RNTupleReader
#define R__unlikely(expr)
Definition RConfig.hxx:578
#define R__FAIL(msg)
Short-hand to return an RResult<T> in an error state; the RError is implicitly converted into RResult...
Definition RError.hxx:290
Option_t Option_t TPoint TPoint const char GetTextMagnitude GetFillStyle GetLineColor GetLineWidth GetMarkerStyle GetTextAlign GetTextColor GetTextSize void char Point_t Rectangle_t WindowAttributes_t Float_t r
Option_t Option_t TPoint TPoint const char GetTextMagnitude GetFillStyle GetLineColor GetLineWidth GetMarkerStyle GetTextAlign GetTextColor GetTextSize void char Point_t Rectangle_t WindowAttributes_t index
A collection of Counter objects with a name, a unit, and a description.
The REntry is a collection of values in an ntuple corresponding to a complete row in the data set.
Definition REntry.hxx:46
void Read(NTupleSize_t index)
Definition REntry.hxx:93
Base class for all ROOT issued exceptions.
Definition RError.hxx:78
A view for a collection, that can itself generate new ntuple views for its nested fields.
The on-storage meta-data of an ntuple.
Used to loop over indexes (entries or collections) between start and end.
The RNTupleModel encapulates the schema of an ntuple.
Common user-tunable settings for reading ntuples.
bool operator!=(const iterator &rh) const
bool operator==(const iterator &rh) const
An RNTuple that is used to read data from storage.
static std::unique_ptr< RNTupleReader > OpenFriends(std::span< ROpenSpec > ntuples)
Open RNTuples as one virtual, horizontally combined ntuple.
DescriptorId_t RetrieveFieldId(std::string_view fieldName) const
void Show(NTupleSize_t index, std::ostream &output=std::cout)
Shows the values of the i-th entry/row, starting with 0 for the first entry.
std::unique_ptr< RNTupleReader > Clone()
Detail::RNTupleMetrics fMetrics
std::unique_ptr< RNTupleReader > fDisplayReader
We use a dedicated on-demand reader for Show() and Scan().
void EnableMetrics()
Enable performance measurements (decompression time, bytes read from storage, etc....
const Detail::RNTupleMetrics & GetMetrics() const
RNTupleView< T, false > GetView(std::string_view fieldName)
Provides access to an individual field that can contain either a scalar value or a collection,...
const RNTupleDescriptor & GetDescriptor()
Returns a cached copy of the page source descriptor.
std::unique_ptr< RNTupleDescriptor > fCachedDescriptor
The ntuple descriptor in the page source is protected by a read-write lock.
RNTupleCollectionView GetCollectionView(std::string_view fieldName)
Raises an exception if:
std::unique_ptr< Internal::RPageSource > fSource
RNTupleCollectionView GetCollectionView(DescriptorId_t fieldId)
static std::unique_ptr< RNTupleReader > Open(std::string_view ntupleName, std::string_view storage, const RNTupleReadOptions &options=RNTupleReadOptions())
Open an RNTuple for reading.
RNTupleGlobalRange GetEntryRange()
Returns an iterator over the entry indices of the RNTuple.
std::unique_ptr< Internal::RPageStorage::RTaskScheduler > fUnzipTasks
Set as the page source's scheduler for parallel page decompression if IMT is on Needs to be destructe...
RNTupleView< T, true > GetView(DescriptorId_t fieldId, std::shared_ptr< T > objPtr)
RNTupleView< T, true > GetView(std::string_view fieldName, std::shared_ptr< T > objPtr)
void LoadEntry(NTupleSize_t index)
Analogous to Fill(), fills the default entry of the model.
void PrintInfo(const ENTupleInfo what=ENTupleInfo::kSummary, std::ostream &output=std::cout)
Prints a detailed summary of the ntuple, including a list of fields.
std::unique_ptr< RNTupleModel > fModel
Needs to be destructed before fSource.
void LoadEntry(NTupleSize_t index, REntry &entry)
Fills a user provided entry after checking that the entry has been instantiated from the ntuple model...
RNTupleView< T, false > GetView(DescriptorId_t fieldId)
void ConnectModel(RNTupleModel &model)
An RNTupleView provides read-only access to a single field of the ntuple.
Representation of an RNTuple data set in a ROOT file.
Definition RNTuple.hxx:61
const Int_t n
Definition legend1.C:16
ENTupleInfo
Listing of the different options that can be printed by RNTupleReader::GetInfo()
std::uint64_t NTupleSize_t
Integer type long enough to hold the maximum number of entries in a column.
std::uint64_t DescriptorId_t
Distriniguishes elements of the same type within a descriptor, e.g. different fields.
constexpr NTupleSize_t kInvalidNTupleIndex
constexpr DescriptorId_t kInvalidDescriptorId
tbb::task_arena is an alias of tbb::interface7::task_arena, which doesn't allow to forward declare tb...
static const char * what
Definition stlLoader.cc:5
Used to specify the underlying RNTuples in OpenFriends()
ROpenSpec(std::string_view n, std::string_view s)
static void output()