Logo ROOT  
Reference Guide
 
Loading...
Searching...
No Matches
RNTupleReader.hxx
Go to the documentation of this file.
1/// \file ROOT/RNTupleReader.hxx
2/// \ingroup NTuple ROOT7
3/// \author Jakob Blomer <jblomer@cern.ch>
4/// \date 2024-02-20
5/// \warning This is part of the ROOT 7 prototype! It will change without notice. It might trigger earthquakes. Feedback
6/// is welcome!
7
8/*************************************************************************
9 * Copyright (C) 1995-2024, Rene Brun and Fons Rademakers. *
10 * All rights reserved. *
11 * *
12 * For the licensing terms see $ROOTSYS/LICENSE. *
13 * For the list of contributors see $ROOTSYS/README/CREDITS. *
14 *************************************************************************/
15
16#ifndef ROOT7_RNTupleReader
17#define ROOT7_RNTupleReader
18
19#include <ROOT/RConfig.hxx> // for R__unlikely
20#include <ROOT/RError.hxx>
23#include <ROOT/RNTupleModel.hxx>
25#include <ROOT/RNTupleUtil.hxx>
26#include <ROOT/RNTupleView.hxx>
27#include <ROOT/RPageStorage.hxx>
28#include <ROOT/RSpan.hxx>
29
30#include <iostream>
31#include <iterator>
32#include <memory>
33#include <string>
34#include <string_view>
35
36namespace ROOT {
37class RNTuple;
38
39namespace Experimental {
40class REntry;
41
42/// Listing of the different options that can be printed by RNTupleReader::GetInfo()
43enum class ENTupleInfo {
44 kSummary, // The ntuple name, description, number of entries
45 kStorageDetails, // size on storage, page sizes, compression factor, etc.
46 kMetrics, // internals performance counters, requires that EnableMetrics() was called
47};
48
49// clang-format off
50/**
51\class ROOT::Experimental::RNTupleReader
52\ingroup NTuple
53\brief An RNTuple that is used to read data from storage
54
55An input ntuple provides data from storage as C++ objects. The ntuple model can be created from the data on storage
56or it can be imposed by the user. The latter case allows users to read into a specialized ntuple model that covers
57only a subset of the fields in the ntuple. The ntuple model is used when reading complete entries.
58Individual fields can be read as well by instantiating a tree view.
59
60~~~ {.cpp}
61#include <ROOT/RNTupleReader.hxx>
62using ROOT::Experimental::RNTupleReader;
63
64#include <iostream>
65
66auto ntuple = RNTupleReader::Open("myNTuple", "some/file.root");
67std::cout << "myNTuple has " << ntuple->GetNEntries() << " entries\n";
68~~~
69*/
70// clang-format on
72private:
73 /// Set as the page source's scheduler for parallel page decompression if IMT is on
74 /// Needs to be destructed after the pages source is destructed (an thus be declared before)
75 std::unique_ptr<Internal::RPageStorage::RTaskScheduler> fUnzipTasks;
76
77 std::unique_ptr<Internal::RPageSource> fSource;
78 /// Needs to be destructed before fSource
79 std::unique_ptr<RNTupleModel> fModel;
80 /// We use a dedicated on-demand reader for Show() and Scan(). Printing data uses all the fields
81 /// from the full model even if the analysis code uses only a subset of fields. The display reader
82 /// is a clone of the original reader.
83 std::unique_ptr<RNTupleReader> fDisplayReader;
84 /// The ntuple descriptor in the page source is protected by a read-write lock. We don't expose that to the
85 /// users of RNTupleReader::GetDescriptor(). Instead, if descriptor information is needed, we clone the
86 /// descriptor. Using the descriptor's generation number, we know if the cached descriptor is stale.
87 /// Retrieving descriptor data from an RNTupleReader is supposed to be for testing and information purposes,
88 /// not on a hot code path.
89 std::unique_ptr<RNTupleDescriptor> fCachedDescriptor;
91
92 RNTupleReader(std::unique_ptr<RNTupleModel> model, std::unique_ptr<Internal::RPageSource> source,
93 const RNTupleReadOptions &options);
94 /// The model is generated from the ntuple metadata on storage.
95 explicit RNTupleReader(std::unique_ptr<Internal::RPageSource> source, const RNTupleReadOptions &options);
96
97 void ConnectModel(RNTupleModel &model);
99 void InitPageSource(bool enableMetrics);
100
101 DescriptorId_t RetrieveFieldId(std::string_view fieldName) const;
102
103public:
104 // Browse through the entries
105 class RIterator {
106 private:
108
109 public:
111 using iterator_category = std::forward_iterator_tag;
116
117 RIterator() = default;
119 ~RIterator() = default;
120
121 iterator operator++(int) /* postfix */
122 {
123 auto r = *this;
124 fIndex++;
125 return r;
126 }
127 iterator &operator++() /* prefix */
128 {
129 ++fIndex;
130 return *this;
131 }
133 pointer operator->() { return &fIndex; }
134 bool operator==(const iterator &rh) const { return fIndex == rh.fIndex; }
135 bool operator!=(const iterator &rh) const { return fIndex != rh.fIndex; }
136 };
137
138 /// Open an RNTuple for reading.
139 ///
140 /// Throws an RException if there is no RNTuple with the given name.
141 ///
142 /// **Example: open an RNTuple and print the number of entries**
143 /// ~~~ {.cpp}
144 /// #include <ROOT/RNTupleReader.hxx>
145 /// using ROOT::Experimental::RNTupleReader;
146 ///
147 /// #include <iostream>
148 ///
149 /// auto ntuple = RNTupleReader::Open("myNTuple", "some/file.root");
150 /// std::cout << "myNTuple has " << ntuple->GetNEntries() << " entries\n";
151 /// ~~~
152 static std::unique_ptr<RNTupleReader> Open(std::string_view ntupleName, std::string_view storage,
153 const RNTupleReadOptions &options = RNTupleReadOptions());
154 static std::unique_ptr<RNTupleReader>
155 Open(const RNTuple &ntuple, const RNTupleReadOptions &options = RNTupleReadOptions());
156 /// The caller imposes a model, which must be compatible with the model found in the data on storage.
157 static std::unique_ptr<RNTupleReader> Open(std::unique_ptr<RNTupleModel> model, std::string_view ntupleName,
158 std::string_view storage,
159 const RNTupleReadOptions &options = RNTupleReadOptions());
160 static std::unique_ptr<RNTupleReader> Open(std::unique_ptr<RNTupleModel> model, const RNTuple &ntuple,
161 const RNTupleReadOptions &options = RNTupleReadOptions());
162 /// Open RNTuples as one virtual, horizontally combined ntuple. The underlying RNTuples must
163 /// have an identical number of entries. Fields in the combined RNTuple are named with the ntuple name
164 /// as a prefix, e.g. myNTuple1.px and myNTuple2.pt (see tutorial ntpl006_friends)
165 static std::unique_ptr<RNTupleReader>
166 OpenFriends(std::span<RNTupleOpenSpec> ntuples, const RNTupleReadOptions &options = RNTupleReadOptions());
167 std::unique_ptr<RNTupleReader> Clone()
168 {
169 auto options = RNTupleReadOptions{};
171 return std::unique_ptr<RNTupleReader>(new RNTupleReader(fSource->Clone(), options));
172 }
174
175 NTupleSize_t GetNEntries() const { return fSource->GetNEntries(); }
176 const RNTupleModel &GetModel();
177
178 /// Returns a cached copy of the page source descriptor. The returned pointer remains valid until the next call
179 /// to LoadEntry or to any of the views returned from the reader.
181
182 /// Prints a detailed summary of the ntuple, including a list of fields.
183 ///
184 /// **Example: print summary information to stdout**
185 /// ~~~ {.cpp}
186 /// #include <ROOT/RNTupleReader.hxx>
187 /// using ROOT::Experimental::ENTupleInfo;
188 /// using ROOT::Experimental::RNTupleReader;
189 ///
190 /// #include <iostream>
191 ///
192 /// auto ntuple = RNTupleReader::Open("myNTuple", "some/file.root");
193 /// ntuple->PrintInfo();
194 /// // or, equivalently:
195 /// ntuple->PrintInfo(ENTupleInfo::kSummary, std::cout);
196 /// ~~~
197 /// **Example: print detailed column storage data to stderr**
198 /// ~~~ {.cpp}
199 /// #include <ROOT/RNTupleReader.hxx>
200 /// using ROOT::Experimental::ENTupleInfo;
201 /// using ROOT::Experimental::RNTupleReader;
202 ///
203 /// #include <iostream>
204 ///
205 /// auto ntuple = RNTupleReader::Open("myNTuple", "some/file.root");
206 /// ntuple->PrintInfo(ENTupleInfo::kStorageDetails, std::cerr);
207 /// ~~~
208 ///
209 /// For use of ENTupleInfo::kMetrics, see #EnableMetrics.
210 void PrintInfo(const ENTupleInfo what = ENTupleInfo::kSummary, std::ostream &output = std::cout) const;
211
212 /// Shows the values of the i-th entry/row, starting with 0 for the first entry. By default,
213 /// prints the output in JSON format.
214 /// Uses the visitor pattern to traverse through each field of the given entry.
215 void Show(NTupleSize_t index, std::ostream &output = std::cout);
216
217 /// Analogous to Fill(), fills the default entry of the model. Returns false at the end of the ntuple.
218 /// On I/O errors, raises an exception.
220 {
221 // TODO(jblomer): can be templated depending on the factory method / constructor
222 if (R__unlikely(!fModel)) {
223 fModel = fSource->GetSharedDescriptorGuard()->CreateModel();
225 }
226 LoadEntry(index, fModel->GetDefaultEntry());
227 }
228 /// Fills a user provided entry after checking that the entry has been instantiated from the ntuple model
229 void LoadEntry(NTupleSize_t index, REntry &entry) { entry.Read(index); }
230
231 /// Returns an iterator over the entry indices of the RNTuple.
232 ///
233 /// **Example: iterate over all entries and print each entry in JSON format**
234 /// ~~~ {.cpp}
235 /// #include <ROOT/RNTupleReader.hxx>
236 /// using ROOT::Experimental::ENTupleShowFormat;
237 /// using ROOT::Experimental::RNTupleReader;
238 ///
239 /// #include <iostream>
240 ///
241 /// auto ntuple = RNTupleReader::Open("myNTuple", "some/file.root");
242 /// for (auto i : ntuple->GetEntryRange()) {
243 /// ntuple->Show(i);
244 /// }
245 /// ~~~
247
248 /// Provides access to an individual field that can contain either a scalar value or a collection, e.g.
249 /// GetView<double>("particles.pt") or GetView<std::vector<double>>("particle"). It can as well be the index
250 /// field of a collection itself, like GetView<NTupleSize_t>("particle").
251 ///
252 /// Raises an exception if there is no field with the given name.
253 ///
254 /// **Example: iterate over a field named "pt" of type `float`**
255 /// ~~~ {.cpp}
256 /// #include <ROOT/RNTupleReader.hxx>
257 /// using ROOT::Experimental::RNTupleReader;
258 ///
259 /// #include <iostream>
260 ///
261 /// auto ntuple = RNTupleReader::Open("myNTuple", "some/file.root");
262 /// auto pt = ntuple->GetView<float>("pt");
263 ///
264 /// for (auto i : ntuple->GetEntryRange()) {
265 /// std::cout << i << ": " << pt(i) << "\n";
266 /// }
267 /// ~~~
268 template <typename T>
269 RNTupleView<T> GetView(std::string_view fieldName)
270 {
271 return GetView<T>(RetrieveFieldId(fieldName));
272 }
273
274 template <typename T>
275 RNTupleView<T> GetView(std::string_view fieldName, std::shared_ptr<T> objPtr)
276 {
277 return GetView<T>(RetrieveFieldId(fieldName), objPtr);
278 }
279
280 template <typename T>
281 RNTupleView<T> GetView(std::string_view fieldName, T *rawPtr)
282 {
283 return GetView<T>(RetrieveFieldId(fieldName), rawPtr);
284 }
285
286 template <typename T>
288 {
289 auto field = RNTupleView<T>::CreateField(fieldId, *fSource);
290 auto range = Internal::GetFieldRange(*field, *fSource);
291 return RNTupleView<T>(std::move(field), range);
292 }
293
294 template <typename T>
295 RNTupleView<T> GetView(DescriptorId_t fieldId, std::shared_ptr<T> objPtr)
296 {
297 auto field = RNTupleView<T>::CreateField(fieldId, *fSource);
298 auto range = Internal::GetFieldRange(*field, *fSource);
299 return RNTupleView<T>(std::move(field), range, objPtr);
300 }
301
302 template <typename T>
304 {
305 auto field = RNTupleView<T>::CreateField(fieldId, *fSource);
306 auto range = Internal::GetFieldRange(*field, *fSource);
307 return RNTupleView<T>(std::move(field), range, rawPtr);
308 }
309
310 template <typename T>
312 {
313 return GetDirectAccessView<T>(RetrieveFieldId(fieldName));
314 }
315
316 template <typename T>
318 {
319 auto field = RNTupleDirectAccessView<T>::CreateField(fieldId, *fSource);
320 auto range = Internal::GetFieldRange(field, *fSource);
321 return RNTupleDirectAccessView<T>(std::move(field), range);
322 }
323
324 /// Raises an exception if:
325 /// * there is no field with the given name or,
326 /// * the field is not a collection
327 RNTupleCollectionView GetCollectionView(std::string_view fieldName)
328 {
329 auto fieldId = fSource->GetSharedDescriptorGuard()->FindFieldId(fieldName);
330 if (fieldId == kInvalidDescriptorId) {
331 throw RException(R__FAIL("no field named '" + std::string(fieldName) + "' in RNTuple '" +
332 fSource->GetSharedDescriptorGuard()->GetName() + "'"));
333 }
334 return GetCollectionView(fieldId);
335 }
336
338 {
339 return RNTupleCollectionView::Create(fieldId, fSource.get());
340 }
341
342 RIterator begin() { return RIterator(0); }
344
345 /// Enable performance measurements (decompression time, bytes read from storage, etc.)
346 ///
347 /// **Example: inspect the reader metrics after loading every entry**
348 /// ~~~ {.cpp}
349 /// #include <ROOT/RNTupleReader.hxx>
350 /// using ROOT::Experimental::ENTupleInfo;
351 /// using ROOT::Experimental::RNTupleReader;
352 ///
353 /// #include <iostream>
354 ///
355 /// auto ntuple = RNTupleReader::Open("myNTuple", "some/file.root");
356 /// // metrics must be turned on beforehand
357 /// ntuple->EnableMetrics();
358 ///
359 /// for (auto i : ntuple->GetEntryRange()) {
360 /// ntuple->LoadEntry(i);
361 /// }
362 /// ntuple->PrintInfo(ENTupleInfo::kMetrics);
363 /// ~~~
365 const Detail::RNTupleMetrics &GetMetrics() const { return fMetrics; }
366}; // class RNTupleReader
367
368} // namespace Experimental
369} // namespace ROOT
370
371#endif // ROOT7_RNTupleReader
#define R__unlikely(expr)
Definition RConfig.hxx:586
#define R__FAIL(msg)
Short-hand to return an RResult<T> in an error state; the RError is implicitly converted into RResult...
Definition RError.hxx:290
Option_t Option_t TPoint TPoint const char GetTextMagnitude GetFillStyle GetLineColor GetLineWidth GetMarkerStyle GetTextAlign GetTextColor GetTextSize void char Point_t Rectangle_t WindowAttributes_t Float_t r
Option_t Option_t TPoint TPoint const char GetTextMagnitude GetFillStyle GetLineColor GetLineWidth GetMarkerStyle GetTextAlign GetTextColor GetTextSize void char Point_t Rectangle_t WindowAttributes_t index
A collection of Counter objects with a name, a unit, and a description.
The REntry is a collection of values in an ntuple corresponding to a complete row in the data set.
Definition REntry.hxx:51
void Read(NTupleSize_t index)
Definition REntry.hxx:109
Base class for all ROOT issued exceptions.
Definition RError.hxx:78
A view for a collection, that can itself generate new ntuple views for its nested fields.
static RNTupleCollectionView Create(DescriptorId_t fieldId, Internal::RPageSource *source)
The on-storage meta-data of an ntuple.
A view variant that provides direct access to the I/O buffers.
static RField< T > CreateField(DescriptorId_t fieldId, Internal::RPageSource &pageSource)
Used to loop over indexes (entries or collections) between start and end.
The RNTupleModel encapulates the schema of an ntuple.
Common user-tunable settings for reading ntuples.
bool operator!=(const iterator &rh) const
bool operator==(const iterator &rh) const
An RNTuple that is used to read data from storage.
RNTupleView< T > GetView(std::string_view fieldName, T *rawPtr)
DescriptorId_t RetrieveFieldId(std::string_view fieldName) const
void Show(NTupleSize_t index, std::ostream &output=std::cout)
Shows the values of the i-th entry/row, starting with 0 for the first entry.
RNTupleDirectAccessView< T > GetDirectAccessView(DescriptorId_t fieldId)
std::unique_ptr< RNTupleReader > Clone()
Detail::RNTupleMetrics fMetrics
std::unique_ptr< RNTupleReader > fDisplayReader
We use a dedicated on-demand reader for Show() and Scan().
void EnableMetrics()
Enable performance measurements (decompression time, bytes read from storage, etc....
const Detail::RNTupleMetrics & GetMetrics() const
RNTupleView< T > GetView(DescriptorId_t fieldId, std::shared_ptr< T > objPtr)
RNTupleView< T > GetView(std::string_view fieldName)
Provides access to an individual field that can contain either a scalar value or a collection,...
RNTupleView< T > GetView(std::string_view fieldName, std::shared_ptr< T > objPtr)
const RNTupleDescriptor & GetDescriptor()
Returns a cached copy of the page source descriptor.
std::unique_ptr< RNTupleDescriptor > fCachedDescriptor
The ntuple descriptor in the page source is protected by a read-write lock.
static std::unique_ptr< RNTupleReader > OpenFriends(std::span< RNTupleOpenSpec > ntuples, const RNTupleReadOptions &options=RNTupleReadOptions())
Open RNTuples as one virtual, horizontally combined ntuple.
RNTupleCollectionView GetCollectionView(std::string_view fieldName)
Raises an exception if:
std::unique_ptr< Internal::RPageSource > fSource
RNTupleDirectAccessView< T > GetDirectAccessView(std::string_view fieldName)
RNTupleCollectionView GetCollectionView(DescriptorId_t fieldId)
static std::unique_ptr< RNTupleReader > Open(std::string_view ntupleName, std::string_view storage, const RNTupleReadOptions &options=RNTupleReadOptions())
Open an RNTuple for reading.
RNTupleGlobalRange GetEntryRange()
Returns an iterator over the entry indices of the RNTuple.
std::unique_ptr< Internal::RPageStorage::RTaskScheduler > fUnzipTasks
Set as the page source's scheduler for parallel page decompression if IMT is on Needs to be destructe...
void InitPageSource(bool enableMetrics)
RNTupleView< T > GetView(DescriptorId_t fieldId)
void PrintInfo(const ENTupleInfo what=ENTupleInfo::kSummary, std::ostream &output=std::cout) const
Prints a detailed summary of the ntuple, including a list of fields.
void LoadEntry(NTupleSize_t index)
Analogous to Fill(), fills the default entry of the model.
std::unique_ptr< RNTupleModel > fModel
Needs to be destructed before fSource.
void LoadEntry(NTupleSize_t index, REntry &entry)
Fills a user provided entry after checking that the entry has been instantiated from the ntuple model...
void ConnectModel(RNTupleModel &model)
RNTupleView< T > GetView(DescriptorId_t fieldId, T *rawPtr)
static std::unique_ptr< RFieldBase > CreateField(DescriptorId_t fieldId, Internal::RPageSource &pageSource)
An RNTupleView for a known type.
Representation of an RNTuple data set in a ROOT file.
Definition RNTuple.hxx:69
RNTupleGlobalRange GetFieldRange(const RFieldBase &field, const RPageSource &pageSource)
Helper to get the iteration space of the given field that needs to be connected to the given page sou...
ENTupleInfo
Listing of the different options that can be printed by RNTupleReader::GetInfo()
std::uint64_t NTupleSize_t
Integer type long enough to hold the maximum number of entries in a column.
std::uint64_t DescriptorId_t
Distriniguishes elements of the same type within a descriptor, e.g. different fields.
constexpr NTupleSize_t kInvalidNTupleIndex
constexpr DescriptorId_t kInvalidDescriptorId
tbb::task_arena is an alias of tbb::interface7::task_arena, which doesn't allow to forward declare tb...
static const char * what
Definition stlLoader.cc:5
static void output()