Logo ROOT  
Reference Guide
 
Loading...
Searching...
No Matches
RNTupleReader.hxx
Go to the documentation of this file.
1/// \file ROOT/RNTupleReader.hxx
2/// \ingroup NTuple ROOT7
3/// \author Jakob Blomer <jblomer@cern.ch>
4/// \date 2024-02-20
5/// \warning This is part of the ROOT 7 prototype! It will change without notice. It might trigger earthquakes. Feedback
6/// is welcome!
7
8/*************************************************************************
9 * Copyright (C) 1995-2024, Rene Brun and Fons Rademakers. *
10 * All rights reserved. *
11 * *
12 * For the licensing terms see $ROOTSYS/LICENSE. *
13 * For the list of contributors see $ROOTSYS/README/CREDITS. *
14 *************************************************************************/
15
16#ifndef ROOT7_RNTupleReader
17#define ROOT7_RNTupleReader
18
19#include <ROOT/RConfig.hxx> // for R__unlikely
20#include <ROOT/RError.hxx>
23#include <ROOT/RNTupleModel.hxx>
25#include <ROOT/RNTupleUtil.hxx>
26#include <ROOT/RNTupleView.hxx>
27#include <ROOT/RPageStorage.hxx>
28#include <ROOT/RSpan.hxx>
29
30#include <iostream>
31#include <iterator>
32#include <memory>
33#include <string>
34#include <string_view>
35
36namespace ROOT {
37class RNTuple;
38
39namespace Experimental {
40class REntry;
41
42/// Listing of the different options that can be printed by RNTupleReader::GetInfo()
43enum class ENTupleInfo {
44 kSummary, // The ntuple name, description, number of entries
45 kStorageDetails, // size on storage, page sizes, compression factor, etc.
46 kMetrics, // internals performance counters, requires that EnableMetrics() was called
47};
48
49// clang-format off
50/**
51\class ROOT::Experimental::RNTupleReader
52\ingroup NTuple
53\brief An RNTuple that is used to read data from storage
54
55An input ntuple provides data from storage as C++ objects. The ntuple model can be created from the data on storage
56or it can be imposed by the user. The latter case allows users to read into a specialized ntuple model that covers
57only a subset of the fields in the ntuple. The ntuple model is used when reading complete entries.
58Individual fields can be read as well by instantiating a tree view.
59
60~~~ {.cpp}
61#include <ROOT/RNTupleReader.hxx>
62using ROOT::Experimental::RNTupleReader;
63
64#include <iostream>
65
66auto ntuple = RNTupleReader::Open("myNTuple", "some/file.root");
67std::cout << "myNTuple has " << ntuple->GetNEntries() << " entries\n";
68~~~
69*/
70// clang-format on
72private:
73 /// Set as the page source's scheduler for parallel page decompression if IMT is on
74 /// Needs to be destructed after the pages source is destructed (an thus be declared before)
75 std::unique_ptr<Internal::RPageStorage::RTaskScheduler> fUnzipTasks;
76
77 std::unique_ptr<Internal::RPageSource> fSource;
78 /// Needs to be destructed before fSource
79 std::unique_ptr<RNTupleModel> fModel;
80 /// We use a dedicated on-demand reader for Show() and Scan(). Printing data uses all the fields
81 /// from the full model even if the analysis code uses only a subset of fields. The display reader
82 /// is a clone of the original reader.
83 std::unique_ptr<RNTupleReader> fDisplayReader;
84 /// The ntuple descriptor in the page source is protected by a read-write lock. We don't expose that to the
85 /// users of RNTupleReader::GetDescriptor(). Instead, if descriptor information is needed, we clone the
86 /// descriptor. Using the descriptor's generation number, we know if the cached descriptor is stale.
87 /// Retrieving descriptor data from an RNTupleReader is supposed to be for testing and information purposes,
88 /// not on a hot code path.
89 std::unique_ptr<RNTupleDescriptor> fCachedDescriptor;
91 /// If not nullopt, these will used when creating the model
92 std::optional<RNTupleDescriptor::RCreateModelOptions> fCreateModelOptions;
93
94 RNTupleReader(std::unique_ptr<RNTupleModel> model, std::unique_ptr<Internal::RPageSource> source,
95 const RNTupleReadOptions &options);
96 /// The model is generated from the ntuple metadata on storage.
97 explicit RNTupleReader(std::unique_ptr<Internal::RPageSource> source, const RNTupleReadOptions &options);
98
99 void ConnectModel(RNTupleModel &model);
101 void InitPageSource(bool enableMetrics);
102
103 DescriptorId_t RetrieveFieldId(std::string_view fieldName) const;
104
105public:
106 // Browse through the entries
107 class RIterator {
108 private:
110
111 public:
113 using iterator_category = std::forward_iterator_tag;
118
119 RIterator() = default;
121 ~RIterator() = default;
122
123 iterator operator++(int) /* postfix */
124 {
125 auto r = *this;
126 fIndex++;
127 return r;
128 }
129 iterator &operator++() /* prefix */
130 {
131 ++fIndex;
132 return *this;
133 }
135 pointer operator->() { return &fIndex; }
136 bool operator==(const iterator &rh) const { return fIndex == rh.fIndex; }
137 bool operator!=(const iterator &rh) const { return fIndex != rh.fIndex; }
138 };
139
140 /// Open an RNTuple for reading.
141 ///
142 /// Throws an RException if there is no RNTuple with the given name.
143 ///
144 /// **Example: open an RNTuple and print the number of entries**
145 /// ~~~ {.cpp}
146 /// #include <ROOT/RNTupleReader.hxx>
147 /// using ROOT::Experimental::RNTupleReader;
148 ///
149 /// #include <iostream>
150 ///
151 /// auto ntuple = RNTupleReader::Open("myNTuple", "some/file.root");
152 /// std::cout << "myNTuple has " << ntuple->GetNEntries() << " entries\n";
153 /// ~~~
154 static std::unique_ptr<RNTupleReader> Open(std::string_view ntupleName, std::string_view storage,
155 const RNTupleReadOptions &options = RNTupleReadOptions());
156 static std::unique_ptr<RNTupleReader>
157 Open(const RNTuple &ntuple, const RNTupleReadOptions &options = RNTupleReadOptions());
158
159 /// The caller imposes a model, which must be compatible with the model found in the data on storage.
160 static std::unique_ptr<RNTupleReader> Open(std::unique_ptr<RNTupleModel> model, std::string_view ntupleName,
161 std::string_view storage,
162 const RNTupleReadOptions &options = RNTupleReadOptions());
163 static std::unique_ptr<RNTupleReader> Open(std::unique_ptr<RNTupleModel> model, const RNTuple &ntuple,
164 const RNTupleReadOptions &options = RNTupleReadOptions());
165
166 /// The caller imposes the way the model is reconstructed
167 static std::unique_ptr<RNTupleReader> Open(const RNTupleDescriptor::RCreateModelOptions &createModelOpts,
168 std::string_view ntupleName, std::string_view storage,
169 const RNTupleReadOptions &options = RNTupleReadOptions());
170 static std::unique_ptr<RNTupleReader> Open(const RNTupleDescriptor::RCreateModelOptions &createModelOpts,
171 const RNTuple &ntuple,
172 const RNTupleReadOptions &options = RNTupleReadOptions());
173 std::unique_ptr<RNTupleReader> Clone()
174 {
175 auto options = RNTupleReadOptions{};
177 return std::unique_ptr<RNTupleReader>(new RNTupleReader(fSource->Clone(), options));
178 }
180
181 NTupleSize_t GetNEntries() const { return fSource->GetNEntries(); }
182 const RNTupleModel &GetModel();
183
184 /// Returns a cached copy of the page source descriptor. The returned pointer remains valid until the next call
185 /// to LoadEntry or to any of the views returned from the reader.
187
188 /// Prints a detailed summary of the ntuple, including a list of fields.
189 ///
190 /// **Example: print summary information to stdout**
191 /// ~~~ {.cpp}
192 /// #include <ROOT/RNTupleReader.hxx>
193 /// using ROOT::Experimental::ENTupleInfo;
194 /// using ROOT::Experimental::RNTupleReader;
195 ///
196 /// #include <iostream>
197 ///
198 /// auto ntuple = RNTupleReader::Open("myNTuple", "some/file.root");
199 /// ntuple->PrintInfo();
200 /// // or, equivalently:
201 /// ntuple->PrintInfo(ENTupleInfo::kSummary, std::cout);
202 /// ~~~
203 /// **Example: print detailed column storage data to stderr**
204 /// ~~~ {.cpp}
205 /// #include <ROOT/RNTupleReader.hxx>
206 /// using ROOT::Experimental::ENTupleInfo;
207 /// using ROOT::Experimental::RNTupleReader;
208 ///
209 /// #include <iostream>
210 ///
211 /// auto ntuple = RNTupleReader::Open("myNTuple", "some/file.root");
212 /// ntuple->PrintInfo(ENTupleInfo::kStorageDetails, std::cerr);
213 /// ~~~
214 ///
215 /// For use of ENTupleInfo::kMetrics, see #EnableMetrics.
216 void PrintInfo(const ENTupleInfo what = ENTupleInfo::kSummary, std::ostream &output = std::cout) const;
217
218 /// Shows the values of the i-th entry/row, starting with 0 for the first entry. By default,
219 /// prints the output in JSON format.
220 /// Uses the visitor pattern to traverse through each field of the given entry.
221 void Show(NTupleSize_t index, std::ostream &output = std::cout);
222
223 /// Analogous to Fill(), fills the default entry of the model. Returns false at the end of the ntuple.
224 /// On I/O errors, raises an exception.
226 {
227 // TODO(jblomer): can be templated depending on the factory method / constructor
228 if (R__unlikely(!fModel)) {
229 fModel = fSource->GetSharedDescriptorGuard()->CreateModel(
232 }
233 LoadEntry(index, fModel->GetDefaultEntry());
234 }
235 /// Fills a user provided entry after checking that the entry has been instantiated from the ntuple model
236 void LoadEntry(NTupleSize_t index, REntry &entry) { entry.Read(index); }
237
238 /// Returns an iterator over the entry indices of the RNTuple.
239 ///
240 /// **Example: iterate over all entries and print each entry in JSON format**
241 /// ~~~ {.cpp}
242 /// #include <ROOT/RNTupleReader.hxx>
243 /// using ROOT::Experimental::ENTupleShowFormat;
244 /// using ROOT::Experimental::RNTupleReader;
245 ///
246 /// #include <iostream>
247 ///
248 /// auto ntuple = RNTupleReader::Open("myNTuple", "some/file.root");
249 /// for (auto i : ntuple->GetEntryRange()) {
250 /// ntuple->Show(i);
251 /// }
252 /// ~~~
254
255 /// Provides access to an individual field that can contain either a scalar value or a collection, e.g.
256 /// GetView<double>("particles.pt") or GetView<std::vector<double>>("particle"). It can as well be the index
257 /// field of a collection itself, like GetView<NTupleSize_t>("particle").
258 ///
259 /// Raises an exception if there is no field with the given name.
260 ///
261 /// **Example: iterate over a field named "pt" of type `float`**
262 /// ~~~ {.cpp}
263 /// #include <ROOT/RNTupleReader.hxx>
264 /// using ROOT::Experimental::RNTupleReader;
265 ///
266 /// #include <iostream>
267 ///
268 /// auto ntuple = RNTupleReader::Open("myNTuple", "some/file.root");
269 /// auto pt = ntuple->GetView<float>("pt");
270 ///
271 /// for (auto i : ntuple->GetEntryRange()) {
272 /// std::cout << i << ": " << pt(i) << "\n";
273 /// }
274 /// ~~~
275 template <typename T>
276 RNTupleView<T> GetView(std::string_view fieldName)
277 {
278 return GetView<T>(RetrieveFieldId(fieldName));
279 }
280
281 template <typename T>
282 RNTupleView<T> GetView(std::string_view fieldName, std::shared_ptr<T> objPtr)
283 {
284 return GetView<T>(RetrieveFieldId(fieldName), objPtr);
285 }
286
287 template <typename T>
288 RNTupleView<T> GetView(std::string_view fieldName, T *rawPtr)
289 {
290 return GetView<T>(RetrieveFieldId(fieldName), rawPtr);
291 }
292
293 template <typename T>
295 {
296 auto field = RNTupleView<T>::CreateField(fieldId, *fSource);
297 auto range = Internal::GetFieldRange(*field, *fSource);
298 return RNTupleView<T>(std::move(field), range);
299 }
300
301 template <typename T>
302 RNTupleView<T> GetView(DescriptorId_t fieldId, std::shared_ptr<T> objPtr)
303 {
304 auto field = RNTupleView<T>::CreateField(fieldId, *fSource);
305 auto range = Internal::GetFieldRange(*field, *fSource);
306 return RNTupleView<T>(std::move(field), range, objPtr);
307 }
308
309 template <typename T>
311 {
312 auto field = RNTupleView<T>::CreateField(fieldId, *fSource);
313 auto range = Internal::GetFieldRange(*field, *fSource);
314 return RNTupleView<T>(std::move(field), range, rawPtr);
315 }
316
317 template <typename T>
319 {
320 return GetDirectAccessView<T>(RetrieveFieldId(fieldName));
321 }
322
323 template <typename T>
325 {
326 auto field = RNTupleDirectAccessView<T>::CreateField(fieldId, *fSource);
327 auto range = Internal::GetFieldRange(field, *fSource);
328 return RNTupleDirectAccessView<T>(std::move(field), range);
329 }
330
331 /// Raises an exception if:
332 /// * there is no field with the given name or,
333 /// * the field is not a collection
334 RNTupleCollectionView GetCollectionView(std::string_view fieldName)
335 {
336 auto fieldId = fSource->GetSharedDescriptorGuard()->FindFieldId(fieldName);
337 if (fieldId == kInvalidDescriptorId) {
338 throw RException(R__FAIL("no field named '" + std::string(fieldName) + "' in RNTuple '" +
339 fSource->GetSharedDescriptorGuard()->GetName() + "'"));
340 }
341 return GetCollectionView(fieldId);
342 }
343
345 {
346 return RNTupleCollectionView::Create(fieldId, fSource.get());
347 }
348
349 RIterator begin() { return RIterator(0); }
351
352 /// Enable performance measurements (decompression time, bytes read from storage, etc.)
353 ///
354 /// **Example: inspect the reader metrics after loading every entry**
355 /// ~~~ {.cpp}
356 /// #include <ROOT/RNTupleReader.hxx>
357 /// using ROOT::Experimental::ENTupleInfo;
358 /// using ROOT::Experimental::RNTupleReader;
359 ///
360 /// #include <iostream>
361 ///
362 /// auto ntuple = RNTupleReader::Open("myNTuple", "some/file.root");
363 /// // metrics must be turned on beforehand
364 /// ntuple->EnableMetrics();
365 ///
366 /// for (auto i : ntuple->GetEntryRange()) {
367 /// ntuple->LoadEntry(i);
368 /// }
369 /// ntuple->PrintInfo(ENTupleInfo::kMetrics);
370 /// ~~~
372 const Detail::RNTupleMetrics &GetMetrics() const { return fMetrics; }
373}; // class RNTupleReader
374
375} // namespace Experimental
376} // namespace ROOT
377
378#endif // ROOT7_RNTupleReader
#define R__unlikely(expr)
Definition RConfig.hxx:594
#define R__FAIL(msg)
Short-hand to return an RResult<T> in an error state; the RError is implicitly converted into RResult...
Definition RError.hxx:299
Option_t Option_t TPoint TPoint const char GetTextMagnitude GetFillStyle GetLineColor GetLineWidth GetMarkerStyle GetTextAlign GetTextColor GetTextSize void char Point_t Rectangle_t WindowAttributes_t Float_t r
Option_t Option_t TPoint TPoint const char GetTextMagnitude GetFillStyle GetLineColor GetLineWidth GetMarkerStyle GetTextAlign GetTextColor GetTextSize void char Point_t Rectangle_t WindowAttributes_t index
A collection of Counter objects with a name, a unit, and a description.
The REntry is a collection of values in an ntuple corresponding to a complete row in the data set.
Definition REntry.hxx:51
void Read(NTupleSize_t index)
Definition REntry.hxx:119
A view for a collection, that can itself generate new ntuple views for its nested fields.
static RNTupleCollectionView Create(DescriptorId_t fieldId, Internal::RPageSource *source)
The on-storage meta-data of an ntuple.
A view variant that provides direct access to the I/O buffers.
static RField< T > CreateField(DescriptorId_t fieldId, Internal::RPageSource &pageSource)
Used to loop over indexes (entries or collections) between start and end.
The RNTupleModel encapulates the schema of an ntuple.
Common user-tunable settings for reading ntuples.
bool operator!=(const iterator &rh) const
bool operator==(const iterator &rh) const
An RNTuple that is used to read data from storage.
RNTupleView< T > GetView(std::string_view fieldName, T *rawPtr)
DescriptorId_t RetrieveFieldId(std::string_view fieldName) const
void Show(NTupleSize_t index, std::ostream &output=std::cout)
Shows the values of the i-th entry/row, starting with 0 for the first entry.
RNTupleDirectAccessView< T > GetDirectAccessView(DescriptorId_t fieldId)
std::unique_ptr< RNTupleReader > Clone()
Detail::RNTupleMetrics fMetrics
std::unique_ptr< RNTupleReader > fDisplayReader
We use a dedicated on-demand reader for Show() and Scan().
void EnableMetrics()
Enable performance measurements (decompression time, bytes read from storage, etc....
const Detail::RNTupleMetrics & GetMetrics() const
RNTupleView< T > GetView(DescriptorId_t fieldId, std::shared_ptr< T > objPtr)
RNTupleView< T > GetView(std::string_view fieldName)
Provides access to an individual field that can contain either a scalar value or a collection,...
RNTupleView< T > GetView(std::string_view fieldName, std::shared_ptr< T > objPtr)
const RNTupleDescriptor & GetDescriptor()
Returns a cached copy of the page source descriptor.
std::optional< RNTupleDescriptor::RCreateModelOptions > fCreateModelOptions
If not nullopt, these will used when creating the model.
std::unique_ptr< RNTupleDescriptor > fCachedDescriptor
The ntuple descriptor in the page source is protected by a read-write lock.
RNTupleCollectionView GetCollectionView(std::string_view fieldName)
Raises an exception if:
std::unique_ptr< Internal::RPageSource > fSource
RNTupleDirectAccessView< T > GetDirectAccessView(std::string_view fieldName)
RNTupleCollectionView GetCollectionView(DescriptorId_t fieldId)
static std::unique_ptr< RNTupleReader > Open(std::string_view ntupleName, std::string_view storage, const RNTupleReadOptions &options=RNTupleReadOptions())
Open an RNTuple for reading.
RNTupleGlobalRange GetEntryRange()
Returns an iterator over the entry indices of the RNTuple.
std::unique_ptr< Internal::RPageStorage::RTaskScheduler > fUnzipTasks
Set as the page source's scheduler for parallel page decompression if IMT is on Needs to be destructe...
void InitPageSource(bool enableMetrics)
RNTupleView< T > GetView(DescriptorId_t fieldId)
void PrintInfo(const ENTupleInfo what=ENTupleInfo::kSummary, std::ostream &output=std::cout) const
Prints a detailed summary of the ntuple, including a list of fields.
void LoadEntry(NTupleSize_t index)
Analogous to Fill(), fills the default entry of the model.
std::unique_ptr< RNTupleModel > fModel
Needs to be destructed before fSource.
void LoadEntry(NTupleSize_t index, REntry &entry)
Fills a user provided entry after checking that the entry has been instantiated from the ntuple model...
void ConnectModel(RNTupleModel &model)
RNTupleView< T > GetView(DescriptorId_t fieldId, T *rawPtr)
static std::unique_ptr< RFieldBase > CreateField(DescriptorId_t fieldId, Internal::RPageSource &pageSource)
An RNTupleView for a known type.
Base class for all ROOT issued exceptions.
Definition RError.hxx:79
Representation of an RNTuple data set in a ROOT file.
Definition RNTuple.hxx:69
RNTupleGlobalRange GetFieldRange(const RFieldBase &field, const RPageSource &pageSource)
Helper to get the iteration space of the given field that needs to be connected to the given page sou...
ENTupleInfo
Listing of the different options that can be printed by RNTupleReader::GetInfo()
std::uint64_t NTupleSize_t
Integer type long enough to hold the maximum number of entries in a column.
std::uint64_t DescriptorId_t
Distriniguishes elements of the same type within a descriptor, e.g. different fields.
constexpr NTupleSize_t kInvalidNTupleIndex
constexpr DescriptorId_t kInvalidDescriptorId
tbb::task_arena is an alias of tbb::interface7::task_arena, which doesn't allow to forward declare tb...
static const char * what
Definition stlLoader.cc:5
static void output()