Logo ROOT  
Reference Guide
 
All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Properties Friends Macros Modules Pages
Loading...
Searching...
No Matches
RNTupleReader.hxx
Go to the documentation of this file.
1/// \file ROOT/RNTupleReader.hxx
2/// \ingroup NTuple ROOT7
3/// \author Jakob Blomer <jblomer@cern.ch>
4/// \date 2024-02-20
5/// \warning This is part of the ROOT 7 prototype! It will change without notice. It might trigger earthquakes. Feedback
6/// is welcome!
7
8/*************************************************************************
9 * Copyright (C) 1995-2024, Rene Brun and Fons Rademakers. *
10 * All rights reserved. *
11 * *
12 * For the licensing terms see $ROOTSYS/LICENSE. *
13 * For the list of contributors see $ROOTSYS/README/CREDITS. *
14 *************************************************************************/
15
16#ifndef ROOT7_RNTupleReader
17#define ROOT7_RNTupleReader
18
19#include <ROOT/RConfig.hxx> // for R__unlikely
20#include <ROOT/REntry.hxx>
21#include <ROOT/RError.hxx>
24#include <ROOT/RNTupleModel.hxx>
26#include <ROOT/RNTupleUtil.hxx>
27#include <ROOT/RNTupleView.hxx>
28#include <ROOT/RPageStorage.hxx>
29#include <ROOT/RSpan.hxx>
30
31#include <iostream>
32#include <iterator>
33#include <memory>
34#include <string>
35#include <string_view>
36
37namespace ROOT {
38class RNTuple;
39
40namespace Experimental {
41
42/// Listing of the different options that can be printed by RNTupleReader::GetInfo()
43enum class ENTupleInfo {
44 kSummary, // The RNTuple name, description, number of entries
45 kStorageDetails, // size on storage, page sizes, compression factor, etc.
46 kMetrics, // internals performance counters, requires that EnableMetrics() was called
47};
48
49// clang-format off
50/**
51\class ROOT::Experimental::RNTupleReader
52\ingroup NTuple
53\brief Reads RNTuple data from storage
54
55The RNTupleReader provides access to data stored in the RNTuple binary format as C++ objects, using an RNTupleModel.
56It infers this model from the RNTuple's on-disk metadata, or uses a model imposed by the user.
57The latter case allows users to read into a specialized RNTuple model that covers
58only a subset of the fields in the RNTuple. The RNTuple model is used when reading complete entries through LoadEntry().
59Individual fields can be read as well by instantiating a tree view.
60
61~~~ {.cpp}
62#include <ROOT/RNTupleReader.hxx>
63using ROOT::Experimental::RNTupleReader;
64
65#include <iostream>
66
67auto reader = RNTupleReader::Open("myNTuple", "some/file.root");
68std::cout << "myNTuple has " << reader->GetNEntries() << " entries\n";
69~~~
70*/
71// clang-format on
73private:
74 /// Set as the page source's scheduler for parallel page decompression if implicit multi-threading (IMT) is on.
75 /// Needs to be destructed after the page source is destructed (and thus be declared before)
76 std::unique_ptr<Internal::RPageStorage::RTaskScheduler> fUnzipTasks;
77
78 std::unique_ptr<Internal::RPageSource> fSource;
79 /// Needs to be destructed before fSource
80 std::unique_ptr<ROOT::RNTupleModel> fModel;
81 /// We use a dedicated on-demand reader for Show(). Printing data uses all the fields
82 /// from the full model even if the analysis code uses only a subset of fields. The display reader
83 /// is a clone of the original reader.
84 std::unique_ptr<RNTupleReader> fDisplayReader;
85 /// The RNTuple descriptor in the page source is protected by a read-write lock. We don't expose that to the
86 /// users of RNTupleReader::GetDescriptor(). Instead, if descriptor information is needed, we clone the
87 /// descriptor. Using the descriptor's generation number, we know if the cached descriptor is stale.
88 /// Retrieving descriptor data from an RNTupleReader is supposed to be for testing and information purposes,
89 /// not on a hot code path.
90 std::optional<ROOT::RNTupleDescriptor> fCachedDescriptor;
92 /// If not nullopt, these will be used when creating the model
93 std::optional<ROOT::RNTupleDescriptor::RCreateModelOptions> fCreateModelOptions;
94
95 RNTupleReader(std::unique_ptr<ROOT::RNTupleModel> model, std::unique_ptr<Internal::RPageSource> source,
96 const ROOT::RNTupleReadOptions &options);
97 /// The model is generated from the RNTuple metadata on storage.
98 explicit RNTupleReader(std::unique_ptr<Internal::RPageSource> source, const ROOT::RNTupleReadOptions &options);
99
100 void ConnectModel(ROOT::RNTupleModel &model);
102 void InitPageSource(bool enableMetrics);
103
104 ROOT::DescriptorId_t RetrieveFieldId(std::string_view fieldName) const;
105
106public:
107 // Browse through the entries
108 class RIterator {
109 private:
111
112 public:
114 using iterator_category = std::forward_iterator_tag;
119
120 RIterator() = default;
122 ~RIterator() = default;
123
124 iterator operator++(int) /* postfix */
125 {
126 auto r = *this;
127 fIndex++;
128 return r;
129 }
130 iterator &operator++() /* prefix */
131 {
132 ++fIndex;
133 return *this;
134 }
136 pointer operator->() { return &fIndex; }
137 bool operator==(const iterator &rh) const { return fIndex == rh.fIndex; }
138 bool operator!=(const iterator &rh) const { return fIndex != rh.fIndex; }
139 };
140
141 /// Open an RNTuple for reading.
142 ///
143 /// Throws an RException if there is no RNTuple with the given name.
144 ///
145 /// **Example: open an RNTuple and print the number of entries**
146 /// ~~~ {.cpp}
147 /// #include <ROOT/RNTupleReader.hxx>
148 /// using ROOT::Experimental::RNTupleReader;
149 ///
150 /// #include <iostream>
151 ///
152 /// auto reader = RNTupleReader::Open("myNTuple", "some/file.root");
153 /// std::cout << "myNTuple has " << reader->GetNEntries() << " entries\n";
154 /// ~~~
155 static std::unique_ptr<RNTupleReader> Open(std::string_view ntupleName, std::string_view storage,
157 static std::unique_ptr<RNTupleReader>
159
160 /// The caller imposes a model, which must be compatible with the model found in the data on storage.
161 static std::unique_ptr<RNTupleReader> Open(std::unique_ptr<ROOT::RNTupleModel> model, std::string_view ntupleName,
162 std::string_view storage,
164 static std::unique_ptr<RNTupleReader> Open(std::unique_ptr<ROOT::RNTupleModel> model, const RNTuple &ntuple,
166
167 /// The caller imposes the way the model is reconstructed
168 static std::unique_ptr<RNTupleReader> Open(const ROOT::RNTupleDescriptor::RCreateModelOptions &createModelOpts,
169 std::string_view ntupleName, std::string_view storage,
171 static std::unique_ptr<RNTupleReader> Open(const ROOT::RNTupleDescriptor::RCreateModelOptions &createModelOpts,
172 const RNTuple &ntuple,
174 std::unique_ptr<RNTupleReader> Clone()
175 {
176 auto options = ROOT::RNTupleReadOptions{};
178 return std::unique_ptr<RNTupleReader>(new RNTupleReader(fSource->Clone(), options));
179 }
181
182 ROOT::NTupleSize_t GetNEntries() const { return fSource->GetNEntries(); }
184 std::unique_ptr<ROOT::REntry> CreateEntry();
185
186 /// Returns a cached copy of the page source descriptor. The returned pointer remains valid until the next call
187 /// to LoadEntry() or to any of the views returned from the reader.
189
190 /// Prints a detailed summary of the RNTuple, including a list of fields.
191 ///
192 /// **Example: print summary information to stdout**
193 /// ~~~ {.cpp}
194 /// #include <ROOT/RNTupleReader.hxx>
195 /// using ROOT::Experimental::ENTupleInfo;
196 /// using ROOT::Experimental::RNTupleReader;
197 ///
198 /// #include <iostream>
199 ///
200 /// auto reader = RNTupleReader::Open("myNTuple", "some/file.root");
201 /// reader->PrintInfo();
202 /// // or, equivalently:
203 /// reader->PrintInfo(ENTupleInfo::kSummary, std::cout);
204 /// ~~~
205 /// **Example: print detailed column storage data to stderr**
206 /// ~~~ {.cpp}
207 /// #include <ROOT/RNTupleReader.hxx>
208 /// using ROOT::Experimental::ENTupleInfo;
209 /// using ROOT::Experimental::RNTupleReader;
210 ///
211 /// #include <iostream>
212 ///
213 /// auto reader = RNTupleReader::Open("myNTuple", "some/file.root");
214 /// reader->PrintInfo(ENTupleInfo::kStorageDetails, std::cerr);
215 /// ~~~
216 ///
217 /// For use of ENTupleInfo::kMetrics, see #EnableMetrics.
218 void PrintInfo(const ENTupleInfo what = ENTupleInfo::kSummary, std::ostream &output = std::cout) const;
219
220 /// Shows the values of the i-th entry/row, starting with 0 for the first entry. By default,
221 /// prints the output in JSON format.
222 /// Uses the visitor pattern to traverse through each field of the given entry.
223 void Show(ROOT::NTupleSize_t index, std::ostream &output = std::cout);
224
225 /// Fills the default entry of the model.
226 /// Raises an exception when `index` is greater than the number of entries present in the RNTuple
228 {
229 // TODO(jblomer): can be templated depending on the factory method / constructor
230 if (R__unlikely(!fModel)) {
231 fModel = fSource->GetSharedDescriptorGuard()->CreateModel(
234 }
235 LoadEntry(index, fModel->GetDefaultEntry());
236 }
237 /// Fills a user provided entry after checking that the entry has been instantiated from the RNTuple model
239 {
240 if (R__unlikely(entry.GetModelId() != fModel->GetModelId()))
241 throw RException(R__FAIL("mismatch between entry and model"));
242
243 entry.Read(index);
244 }
245
246 /// Returns an iterator over the entry indices of the RNTuple.
247 ///
248 /// **Example: iterate over all entries and print each entry in JSON format**
249 /// ~~~ {.cpp}
250 /// #include <ROOT/RNTupleReader.hxx>
251 /// using ROOT::Experimental::ENTupleShowFormat;
252 /// using ROOT::Experimental::RNTupleReader;
253 ///
254 /// #include <iostream>
255 ///
256 /// auto reader = RNTupleReader::Open("myNTuple", "some/file.root");
257 /// for (auto i : ntuple->GetEntryRange()) {
258 /// reader->Show(i);
259 /// }
260 /// ~~~
262
263 /// Provides access to an individual (sub)field,
264 /// e.g. `GetView<Particle>("particle")`, `GetView<double>("particle.pt")` or `GetView<std::vector<Particle>>("particles")`.
265 /// It is possible to directly get the size of a collection (without reading the collection itself)
266 /// using RNTupleCardinality: `GetView<ROOT::RNTupleCardinality<std::uint64_t>>("particles")`.
267 ///
268 /// Raises an exception if there is no field with the given name.
269 ///
270 /// **Example: iterate over a field named "pt" of type `float`**
271 /// ~~~ {.cpp}
272 /// #include <ROOT/RNTupleReader.hxx>
273 /// using ROOT::Experimental::RNTupleReader;
274 ///
275 /// #include <iostream>
276 ///
277 /// auto reader = RNTupleReader::Open("myNTuple", "some/file.root");
278 /// auto pt = reader->GetView<float>("pt");
279 ///
280 /// for (auto i : reader->GetEntryRange()) {
281 /// std::cout << i << ": " << pt(i) << "\n";
282 /// }
283 /// ~~~
284 template <typename T>
286 {
288 }
289
290 template <typename T>
291 RNTupleView<T> GetView(std::string_view fieldName, std::shared_ptr<T> objPtr)
292 {
294 }
295
296 template <typename T>
298 {
300 }
301
302 template <typename T>
309
310 template <typename T>
317
318 template <typename T>
325
326 template <typename T>
331
332 template <typename T>
339
340 /// Raises an exception if:
341 /// * there is no field with the given name or,
342 /// * the field is not a collection
344 {
345 auto fieldId = fSource->GetSharedDescriptorGuard()->FindFieldId(fieldName);
347 throw RException(R__FAIL("no field named '" + std::string(fieldName) + "' in RNTuple '" +
348 fSource->GetSharedDescriptorGuard()->GetName() + "'"));
349 }
351 }
352
357
358 RIterator begin() { return RIterator(0); }
360
361 /// Enable performance measurements (decompression time, bytes read from storage, etc.)
362 ///
363 /// **Example: inspect the reader metrics after loading every entry**
364 /// ~~~ {.cpp}
365 /// #include <ROOT/RNTupleReader.hxx>
366 /// using ROOT::Experimental::ENTupleInfo;
367 /// using ROOT::Experimental::RNTupleReader;
368 ///
369 /// #include <iostream>
370 ///
371 /// auto ntuple = RNTupleReader::Open("myNTuple", "some/file.root");
372 /// // metrics must be turned on beforehand
373 /// reader->EnableMetrics();
374 ///
375 /// for (auto i : ntuple->GetEntryRange()) {
376 /// reader->LoadEntry(i);
377 /// }
378 /// reader->PrintInfo(ENTupleInfo::kMetrics);
379 /// ~~~
381 const Detail::RNTupleMetrics &GetMetrics() const { return fMetrics; }
382}; // class RNTupleReader
383
384} // namespace Experimental
385} // namespace ROOT
386
387#endif // ROOT7_RNTupleReader
#define R__unlikely(expr)
Definition RConfig.hxx:602
#define R__FAIL(msg)
Short-hand to return an RResult<T> in an error state; the RError is implicitly converted into RResult...
Definition RError.hxx:299
ROOT::Detail::TRangeCast< T, true > TRangeDynCast
TRangeDynCast is an adapter class that allows the typed iteration through a TCollection.
Option_t Option_t TPoint TPoint const char GetTextMagnitude GetFillStyle GetLineColor GetLineWidth GetMarkerStyle GetTextAlign GetTextColor GetTextSize void char Point_t Rectangle_t WindowAttributes_t Float_t r
Option_t Option_t TPoint TPoint const char GetTextMagnitude GetFillStyle GetLineColor GetLineWidth GetMarkerStyle GetTextAlign GetTextColor GetTextSize void char Point_t Rectangle_t WindowAttributes_t index
A collection of Counter objects with a name, a unit, and a description.
A view for a collection, that can itself generate new ntuple views for its nested fields.
static RNTupleCollectionView Create(ROOT::DescriptorId_t fieldId, Internal::RPageSource *source)
static ROOT::RField< T > CreateField(ROOT::DescriptorId_t fieldId, Internal::RPageSource &pageSource)
bool operator!=(const iterator &rh) const
bool operator==(const iterator &rh) const
Reads RNTuple data from storage.
ROOT::NTupleSize_t GetNEntries() const
RNTupleReader(std::unique_ptr< ROOT::RNTupleModel > model, std::unique_ptr< Internal::RPageSource > source, const ROOT::RNTupleReadOptions &options)
ROOT::DescriptorId_t RetrieveFieldId(std::string_view fieldName) const
std::optional< ROOT::RNTupleDescriptor::RCreateModelOptions > fCreateModelOptions
If not nullopt, these will be used when creating the model.
RNTupleView< T > GetView(std::string_view fieldName, T *rawPtr)
RNTupleView< T > GetView(ROOT::DescriptorId_t fieldId, std::shared_ptr< T > objPtr)
std::unique_ptr< RNTupleReader > Clone()
Detail::RNTupleMetrics fMetrics
std::unique_ptr< RNTupleReader > fDisplayReader
We use a dedicated on-demand reader for Show().
void EnableMetrics()
Enable performance measurements (decompression time, bytes read from storage, etc....
const Detail::RNTupleMetrics & GetMetrics() const
RNTupleDirectAccessView< T > GetDirectAccessView(ROOT::DescriptorId_t fieldId)
RNTupleCollectionView GetCollectionView(ROOT::DescriptorId_t fieldId)
std::optional< ROOT::RNTupleDescriptor > fCachedDescriptor
The RNTuple descriptor in the page source is protected by a read-write lock.
const ROOT::RNTupleModel & GetModel()
RNTupleView< T > GetView(std::string_view fieldName)
Provides access to an individual (sub)field, e.g.
RNTupleView< T > GetView(std::string_view fieldName, std::shared_ptr< T > objPtr)
RNTupleView< T > GetView(ROOT::DescriptorId_t fieldId)
const ROOT::RNTupleDescriptor & GetDescriptor()
Returns a cached copy of the page source descriptor.
RNTupleCollectionView GetCollectionView(std::string_view fieldName)
Raises an exception if:
std::unique_ptr< Internal::RPageSource > fSource
RNTupleDirectAccessView< T > GetDirectAccessView(std::string_view fieldName)
RNTupleView< T > GetView(ROOT::DescriptorId_t fieldId, T *rawPtr)
std::unique_ptr< Internal::RPageStorage::RTaskScheduler > fUnzipTasks
Set as the page source's scheduler for parallel page decompression if implicit multi-threading (IMT) ...
std::unique_ptr< ROOT::RNTupleModel > fModel
Needs to be destructed before fSource.
static std::unique_ptr< RNTupleReader > Open(std::string_view ntupleName, std::string_view storage, const ROOT::RNTupleReadOptions &options=ROOT::RNTupleReadOptions())
Open an RNTuple for reading.
void InitPageSource(bool enableMetrics)
void PrintInfo(const ENTupleInfo what=ENTupleInfo::kSummary, std::ostream &output=std::cout) const
Prints a detailed summary of the RNTuple, including a list of fields.
std::unique_ptr< ROOT::REntry > CreateEntry()
void LoadEntry(ROOT::NTupleSize_t index)
Fills the default entry of the model.
void Show(ROOT::NTupleSize_t index, std::ostream &output=std::cout)
Shows the values of the i-th entry/row, starting with 0 for the first entry.
ROOT::RNTupleGlobalRange GetEntryRange()
Returns an iterator over the entry indices of the RNTuple.
void LoadEntry(ROOT::NTupleSize_t index, ROOT::REntry &entry)
Fills a user provided entry after checking that the entry has been instantiated from the RNTuple mode...
void ConnectModel(ROOT::RNTupleModel &model)
static std::unique_ptr< ROOT::RFieldBase > CreateField(ROOT::DescriptorId_t fieldId, Internal::RPageSource &pageSource)
The REntry is a collection of values in an ntuple corresponding to a complete row in the data set.
Definition REntry.hxx:54
Base class for all ROOT issued exceptions.
Definition RError.hxx:79
The on-storage metadata of an RNTuple.
Used to loop over indexes (entries or collections) between start and end.
The RNTupleModel encapulates the schema of an ntuple.
Common user-tunable settings for reading RNTuples.
Representation of an RNTuple data set in a ROOT file.
Definition RNTuple.hxx:69
ROOT::RNTupleGlobalRange GetFieldRange(const ROOT::RFieldBase &field, const RPageSource &pageSource)
Helper to get the iteration space of the given field that needs to be connected to the given page sou...
ENTupleInfo
Listing of the different options that can be printed by RNTupleReader::GetInfo()
tbb::task_arena is an alias of tbb::interface7::task_arena, which doesn't allow to forward declare tb...
std::uint64_t DescriptorId_t
Distriniguishes elements of the same type within a descriptor, e.g. different fields.
constexpr NTupleSize_t kInvalidNTupleIndex
std::uint64_t NTupleSize_t
Integer type long enough to hold the maximum number of entries in a column.
constexpr DescriptorId_t kInvalidDescriptorId
static const char * what
Definition stlLoader.cc:5
static void output()