Logo ROOT  
Reference Guide
 
Loading...
Searching...
No Matches
RNTuple.hxx
Go to the documentation of this file.
1/// \file ROOT/RNTuple.hxx
2/// \ingroup NTuple ROOT7
3/// \author Jakob Blomer <jblomer@cern.ch>
4/// \date 2018-10-04
5/// \warning This is part of the ROOT 7 prototype! It will change without notice. It might trigger earthquakes. Feedback
6/// is welcome!
7
8/*************************************************************************
9 * Copyright (C) 1995-2019, Rene Brun and Fons Rademakers. *
10 * All rights reserved. *
11 * *
12 * For the licensing terms see $ROOTSYS/LICENSE. *
13 * For the list of contributors see $ROOTSYS/README/CREDITS. *
14 *************************************************************************/
15
16#ifndef ROOT7_RNTuple
17#define ROOT7_RNTuple
18
19#include <ROOT/RConfig.hxx> // for R__unlikely
21#include <ROOT/RNTupleModel.hxx>
23#include <ROOT/RNTupleUtil.hxx>
24#include <ROOT/RNTupleView.hxx>
25#include <ROOT/RPageStorage.hxx>
26#include <ROOT/RSpan.hxx>
27#include <ROOT/RStringView.hxx>
28
29#include <iterator>
30#include <memory>
31#include <sstream>
32#include <utility>
33
34class TFile;
35
36namespace ROOT {
37namespace Experimental {
38
39class REntry;
40class RNTupleModel;
41
42namespace Detail {
43class RPageSink;
44class RPageSource;
45}
46
47
48/**
49 * Listing of the different options that can be printed by RNTupleReader::GetInfo()
50 */
51enum class ENTupleInfo {
52 kSummary, // The ntuple name, description, number of entries
53 kStorageDetails, // size on storage, page sizes, compression factor, etc.
54 kMetrics, // internals performance counters, requires that EnableMetrics() was called
55};
56
57/**
58 * Listing of the different entry output formats of RNTupleReader::Show()
59 */
61 kCurrentModelJSON, // prints a single entry/row with the current active model in JSON format.
62 kCompleteJSON, // prints a single entry/row with all the fields in JSON format.
63};
64
65
66#ifdef R__USE_IMT
67class TTaskGroup;
69private:
70 std::unique_ptr<TTaskGroup> fTaskGroup;
71public:
73 virtual ~RNTupleImtTaskScheduler() = default;
74 void Reset() final;
75 void AddTask(const std::function<void(void)> &taskFunc) final;
76 void Wait() final;
77};
78#endif
79
80// clang-format off
81/**
82\class ROOT::Experimental::RNTupleReader
83\ingroup NTuple
84\brief An RNTuple that is used to read data from storage
85
86An input ntuple provides data from storage as C++ objects. The ntuple model can be created from the data on storage
87or it can be imposed by the user. The latter case allows users to read into a specialized ntuple model that covers
88only a subset of the fields in the ntuple. The ntuple model is used when reading complete entries.
89Individual fields can be read as well by instantiating a tree view.
90
91~~~ {.cpp}
92#include <ROOT/RNTuple.hxx>
93using ROOT::Experimental::RNTupleReader;
94
95#include <iostream>
96
97auto ntuple = RNTupleReader::Open("myNTuple", "some/file.root");
98std::cout << "myNTuple has " << ntuple->GetNEntries() << " entries\n";
99~~~
100*/
101// clang-format on
103private:
104 /// Set as the page source's scheduler for parallel page decompression if IMT is on
105 /// Needs to be destructed after the pages source is destructed (an thus be declared before)
106 std::unique_ptr<Detail::RPageStorage::RTaskScheduler> fUnzipTasks;
107
108 std::unique_ptr<Detail::RPageSource> fSource;
109 /// Needs to be destructed before fSource
110 std::unique_ptr<RNTupleModel> fModel;
111 /// We use a dedicated on-demand reader for Show() and Scan(). Printing data uses all the fields
112 /// from the full model even if the analysis code uses only a subset of fields. The display reader
113 /// is a clone of the original reader.
114 std::unique_ptr<RNTupleReader> fDisplayReader;
116
117 void ConnectModel(const RNTupleModel &model);
118 RNTupleReader *GetDisplayReader();
119 void InitPageSource();
120
121public:
122 // Browse through the entries
123 class RIterator {
124 private:
126 public:
128 using iterator_category = std::forward_iterator_tag;
133
134 RIterator() = default;
135 explicit RIterator(NTupleSize_t index) : fIndex(index) {}
136 ~RIterator() = default;
137
138 iterator operator++(int) /* postfix */ { auto r = *this; fIndex++; return r; }
139 iterator& operator++() /* prefix */ { ++fIndex; return *this; }
140 reference operator* () { return fIndex; }
141 pointer operator->() { return &fIndex; }
142 bool operator==(const iterator& rh) const { return fIndex == rh.fIndex; }
143 bool operator!=(const iterator& rh) const { return fIndex != rh.fIndex; }
144 };
145
146 /// Used to specify the underlying RNTuples in OpenFriends()
147 struct ROpenSpec {
148 std::string fNTupleName;
149 std::string fStorage;
151
152 ROpenSpec() = default;
153 ROpenSpec(std::string_view n, std::string_view s) : fNTupleName(n), fStorage(s) {}
154 };
155
156 /// Throws an exception if the model is null.
157 static std::unique_ptr<RNTupleReader> Open(std::unique_ptr<RNTupleModel> model,
158 std::string_view ntupleName,
159 std::string_view storage,
160 const RNTupleReadOptions &options = RNTupleReadOptions());
161 /// Open an RNTuple for reading.
162 ///
163 /// Throws an RException if there is no RNTuple with the given name.
164 ///
165 /// **Example: open an RNTuple and print the number of entries**
166 /// ~~~ {.cpp}
167 /// #include <ROOT/RNTuple.hxx>
168 /// using ROOT::Experimental::RNTupleReader;
169 ///
170 /// #include <iostream>
171 ///
172 /// auto ntuple = RNTupleReader::Open("myNTuple", "some/file.root");
173 /// std::cout << "myNTuple has " << ntuple->GetNEntries() << " entries\n";
174 /// ~~~
175 static std::unique_ptr<RNTupleReader> Open(std::string_view ntupleName,
176 std::string_view storage,
177 const RNTupleReadOptions &options = RNTupleReadOptions());
178 /// Open RNTuples as one virtual, horizontally combined ntuple. The underlying RNTuples must
179 /// have an identical number of entries. Fields in the combined RNTuple are named with the ntuple name
180 /// as a prefix, e.g. myNTuple1.px and myNTuple2.pt (see tutorial ntpl006_friends)
181 static std::unique_ptr<RNTupleReader> OpenFriends(std::span<ROpenSpec> ntuples);
182
183 /// The user imposes an ntuple model, which must be compatible with the model found in the data on
184 /// storage.
185 ///
186 /// Throws an exception if the model or the source is null.
187 RNTupleReader(std::unique_ptr<RNTupleModel> model, std::unique_ptr<Detail::RPageSource> source);
188 /// The model is generated from the ntuple metadata on storage
189 ///
190 /// Throws an exception if the source is null.
191 explicit RNTupleReader(std::unique_ptr<Detail::RPageSource> source);
192 std::unique_ptr<RNTupleReader> Clone() { return std::make_unique<RNTupleReader>(fSource->Clone()); }
194
195 RNTupleModel *GetModel();
196 NTupleSize_t GetNEntries() const { return fSource->GetNEntries(); }
197 const RNTupleDescriptor &GetDescriptor() const { return fSource->GetDescriptor(); }
198
199 /// Prints a detailed summary of the ntuple, including a list of fields.
200 ///
201 /// **Example: print summary information to stdout**
202 /// ~~~ {.cpp}
203 /// #include <ROOT/RNTuple.hxx>
204 /// using ROOT::Experimental::ENTupleInfo;
205 /// using ROOT::Experimental::RNTupleReader;
206 ///
207 /// #include <iostream>
208 ///
209 /// auto ntuple = RNTupleReader::Open("myNTuple", "some/file.root");
210 /// ntuple->PrintInfo();
211 /// // or, equivalently:
212 /// ntuple->PrintInfo(ENTupleInfo::kSummary, std::cout);
213 /// ~~~
214 /// **Example: print detailed column storage data to stderr**
215 /// ~~~ {.cpp}
216 /// #include <ROOT/RNTuple.hxx>
217 /// using ROOT::Experimental::ENTupleInfo;
218 /// using ROOT::Experimental::RNTupleReader;
219 ///
220 /// #include <iostream>
221 ///
222 /// auto ntuple = RNTupleReader::Open("myNTuple", "some/file.root");
223 /// ntuple->PrintInfo(ENTupleInfo::kStorageDetails, std::cerr);
224 /// ~~~
225 ///
226 /// For use of ENTupleInfo::kMetrics, see #EnableMetrics.
227 void PrintInfo(const ENTupleInfo what = ENTupleInfo::kSummary, std::ostream &output = std::cout);
228
229 /// Shows the values of the i-th entry/row, starting with 0 for the first entry. By default,
230 /// prints the output in JSON format.
231 /// Uses the visitor pattern to traverse through each field of the given entry.
232 void Show(NTupleSize_t index, const ENTupleShowFormat format = ENTupleShowFormat::kCurrentModelJSON,
233 std::ostream &output = std::cout);
234
235 /// Analogous to Fill(), fills the default entry of the model. Returns false at the end of the ntuple.
236 /// On I/O errors, raises an exception.
238 // TODO(jblomer): can be templated depending on the factory method / constructor
239 if (R__unlikely(!fModel)) {
240 fModel = fSource->GetDescriptor().GenerateModel();
241 ConnectModel(*fModel);
242 }
243 LoadEntry(index, *fModel->GetDefaultEntry());
244 }
245 /// Fills a user provided entry after checking that the entry has been instantiated from the ntuple model
246 void LoadEntry(NTupleSize_t index, REntry &entry) {
247 for (auto& value : entry) {
248 value.GetField()->Read(index, &value);
249 }
250 }
251
252 /// Returns an iterator over the entry indices of the RNTuple.
253 ///
254 /// **Example: iterate over all entries and print each entry in JSON format**
255 /// ~~~ {.cpp}
256 /// #include <ROOT/RNTuple.hxx>
257 /// using ROOT::Experimental::ENTupleShowFormat;
258 /// using ROOT::Experimental::RNTupleReader;
259 ///
260 /// #include <iostream>
261 ///
262 /// auto ntuple = RNTupleReader::Open("myNTuple", "some/file.root");
263 /// for (auto i : ntuple->GetEntryRange()) {
264 /// ntuple->Show(i, ENTupleShowFormat::kCompleteJSON);
265 /// }
266 /// ~~~
268
269 /// Provides access to an individual field that can contain either a scalar value or a collection, e.g.
270 /// GetView<double>("particles.pt") or GetView<std::vector<double>>("particle"). It can as well be the index
271 /// field of a collection itself, like GetView<NTupleSize_t>("particle").
272 ///
273 /// Raises an exception if there is no field with the given name.
274 ///
275 /// **Example: iterate over a field named "pt" of type `float`**
276 /// ~~~ {.cpp}
277 /// #include <ROOT/RNTuple.hxx>
278 /// using ROOT::Experimental::RNTupleReader;
279 ///
280 /// #include <iostream>
281 ///
282 /// auto ntuple = RNTupleReader::Open("myNTuple", "some/file.root");
283 /// auto pt = ntuple->GetView<float>("pt");
284 ///
285 /// for (auto i : ntuple->GetEntryRange()) {
286 /// std::cout << i << ": " << pt(i) << "\n";
287 /// }
288 /// ~~~
289 template <typename T>
290 RNTupleView<T> GetView(std::string_view fieldName) {
291 auto fieldId = fSource->GetDescriptor().FindFieldId(fieldName);
292 if (fieldId == kInvalidDescriptorId) {
293 throw RException(R__FAIL("no field named '" + std::string(fieldName) + "' in RNTuple '"
294 + fSource->GetDescriptor().GetName() + "'"
295 ));
296 }
297 return RNTupleView<T>(fieldId, fSource.get());
298 }
299
300 /// Raises an exception if:
301 /// * there is no field with the given name or,
302 /// * the field is not a collection
303 RNTupleViewCollection GetViewCollection(std::string_view fieldName) {
304 auto fieldId = fSource->GetDescriptor().FindFieldId(fieldName);
305 if (fieldId == kInvalidDescriptorId) {
306 throw RException(R__FAIL("no field named '" + std::string(fieldName) + "' in RNTuple '"
307 + fSource->GetDescriptor().GetName() + "'"
308 ));
309 }
310 return RNTupleViewCollection(fieldId, fSource.get());
311 }
312
313 RIterator begin() { return RIterator(0); }
314 RIterator end() { return RIterator(GetNEntries()); }
315
316 /// Enable performance measurements (decompression time, bytes read from storage, etc.)
317 ///
318 /// **Example: inspect the reader metrics after loading every entry**
319 /// ~~~ {.cpp}
320 /// #include <ROOT/RNTuple.hxx>
321 /// using ROOT::Experimental::ENTupleInfo;
322 /// using ROOT::Experimental::RNTupleReader;
323 ///
324 /// #include <iostream>
325 ///
326 /// auto ntuple = RNTupleReader::Open("myNTuple", "some/file.root");
327 /// // metrics must be turned on beforehand
328 /// ntuple->EnableMetrics();
329 ///
330 /// for (auto i : ntuple->GetEntryRange()) {
331 /// ntuple->LoadEntry(i);
332 /// }
333 /// ntuple->PrintInfo(ENTupleInfo::kMetrics);
334 /// ~~~
335 void EnableMetrics() { fMetrics.Enable(); }
336 const Detail::RNTupleMetrics &GetMetrics() const { return fMetrics; }
337};
338
339// clang-format off
340/**
341\class ROOT::Experimental::RNTupleWriter
342\ingroup NTuple
343\brief An RNTuple that gets filled with entries (data) and writes them to storage
344
345An output ntuple can be filled with entries. The caller has to make sure that the data that gets filled into an ntuple
346is not modified for the time of the Fill() call. The fill call serializes the C++ object into the column format and
347writes data into the corresponding column page buffers. Writing of the buffers to storage is deferred and can be
348triggered by Flush() or by destructing the ntuple. On I/O errors, an exception is thrown.
349*/
350// clang-format on
352private:
353 /// The page sink's parallel page compression scheduler if IMT is on.
354 /// Needs to be destructed after the page sink is destructed and so declared before.
355 std::unique_ptr<Detail::RPageStorage::RTaskScheduler> fZipTasks;
356 std::unique_ptr<Detail::RPageSink> fSink;
357 /// Needs to be destructed before fSink
358 std::unique_ptr<RNTupleModel> fModel;
360 NTupleSize_t fLastCommitted = 0;
361 NTupleSize_t fNEntries = 0;
362 /// Keeps track of the number of bytes written into the current cluster
363 std::size_t fUnzippedClusterSize = 0;
364 /// The total number of bytes written to storage (i.e., after compression)
365 std::uint64_t fNBytesCommitted = 0;
366 /// The total number of bytes filled into all the so far committed clusters,
367 /// i.e. the uncompressed size of the written clusters
368 std::uint64_t fNBytesFilled = 0;
369 /// Limit for committing cluster no matter the other tunables
371 /// Estimator of uncompressed cluster size, taking into account the estimated compression ratio
373
374public:
375 /// Throws an exception if the model is null.
376 static std::unique_ptr<RNTupleWriter> Recreate(std::unique_ptr<RNTupleModel> model,
377 std::string_view ntupleName,
378 std::string_view storage,
379 const RNTupleWriteOptions &options = RNTupleWriteOptions());
380 /// Throws an exception if the model is null.
381 static std::unique_ptr<RNTupleWriter> Append(std::unique_ptr<RNTupleModel> model,
382 std::string_view ntupleName,
383 TFile &file,
384 const RNTupleWriteOptions &options = RNTupleWriteOptions());
385 /// Throws an exception if the model or the sink is null.
386 RNTupleWriter(std::unique_ptr<RNTupleModel> model, std::unique_ptr<Detail::RPageSink> sink);
387 RNTupleWriter(const RNTupleWriter&) = delete;
390
391 /// The simplest user interface if the default entry that comes with the ntuple model is used
392 void Fill() { Fill(*fModel->GetDefaultEntry()); }
393 /// Multiple entries can have been instantiated from the tnuple model. This method will perform
394 /// a light check whether the entry comes from the ntuple's own model
395 void Fill(REntry &entry) {
396 for (auto& value : entry) {
397 fUnzippedClusterSize += value.GetField()->Append(value);
398 }
399 fNEntries++;
400 if ((fUnzippedClusterSize >= fMaxUnzippedClusterSize) || (fUnzippedClusterSize >= fUnzippedClusterSizeEst))
401 CommitCluster();
402 }
403 /// Ensure that the data from the so far seen Fill calls has been written to storage
404 void CommitCluster();
405
406 std::unique_ptr<REntry> CreateEntry() { return fModel->CreateEntry(); }
407
408 void EnableMetrics() { fMetrics.Enable(); }
409 const Detail::RNTupleMetrics &GetMetrics() const { return fMetrics; }
410};
411
412// clang-format off
413/**
414\class ROOT::Experimental::RCollectionNTuple
415\ingroup NTuple
416\brief A virtual ntuple used for writing untyped collections that can be used to some extent like an RNTupleWriter
417*
418* This class is between a field and a ntuple. It carries the offset column for the collection and the default entry
419* taken from the collection model. It does not, however, own an ntuple model because the collection model has been
420* merged into the larger ntuple model.
421*/
422// clang-format on
424private:
426 std::unique_ptr<REntry> fDefaultEntry;
427public:
428 explicit RCollectionNTupleWriter(std::unique_ptr<REntry> defaultEntry);
432
433 void Fill() { Fill(fDefaultEntry.get()); }
434 void Fill(REntry *entry) {
435 for (auto &value : *entry) {
436 value.GetField()->Append(value);
437 }
438 fOffset++;
439 }
440
441 ClusterSize_t *GetOffsetPtr() { return &fOffset; }
442};
443
444} // namespace Experimental
445} // namespace ROOT
446
447#endif
ROOT::R::TRInterface & r
Definition Object.C:4
#define R__unlikely(expr)
Definition RConfig.hxx:598
#define R__FAIL(msg)
Short-hand to return an RResult<T> in an error state; the RError is implicitly converted into RResult...
Definition RError.hxx:291
TTime operator*(const TTime &t1, const TTime &t2)
Definition TTime.h:85
A collection of Counter objects with a name, a unit, and a description.
The interface of a task scheduler to schedule page (de)compression tasks.
RCollectionNTupleWriter(const RCollectionNTupleWriter &)=delete
RCollectionNTupleWriter & operator=(const RCollectionNTupleWriter &)=delete
std::unique_ptr< REntry > fDefaultEntry
Definition RNTuple.hxx:426
The REntry is a collection of values in an ntuple corresponding to a complete row in the data set.
Definition REntry.hxx:42
Base class for all ROOT issued exceptions.
Definition RError.hxx:114
The on-storage meta-data of an ntuple.
Used to loop over indexes (entries or collections) between start and end.
void Reset() final
Start a new set of tasks.
Definition RNTuple.cxx:48
std::unique_ptr< TTaskGroup > fTaskGroup
Definition RNTuple.hxx:70
void AddTask(const std::function< void(void)> &taskFunc) final
Take a callable that represents a task.
Definition RNTuple.cxx:54
void Wait() final
Blocks until all scheduled tasks finished.
Definition RNTuple.cxx:60
The RNTupleModel encapulates the schema of an ntuple.
Common user-tunable settings for reading ntuples.
bool operator!=(const iterator &rh) const
Definition RNTuple.hxx:143
bool operator==(const iterator &rh) const
Definition RNTuple.hxx:142
std::forward_iterator_tag iterator_category
Definition RNTuple.hxx:128
An RNTuple that is used to read data from storage.
Definition RNTuple.hxx:102
std::unique_ptr< RNTupleReader > Clone()
Definition RNTuple.hxx:192
Detail::RNTupleMetrics fMetrics
Definition RNTuple.hxx:115
std::unique_ptr< Detail::RPageStorage::RTaskScheduler > fUnzipTasks
Set as the page source's scheduler for parallel page decompression if IMT is on Needs to be destructe...
Definition RNTuple.hxx:106
std::unique_ptr< RNTupleReader > fDisplayReader
We use a dedicated on-demand reader for Show() and Scan().
Definition RNTuple.hxx:114
void EnableMetrics()
Enable performance measurements (decompression time, bytes read from storage, etc....
Definition RNTuple.hxx:335
const Detail::RNTupleMetrics & GetMetrics() const
Definition RNTuple.hxx:336
const RNTupleDescriptor & GetDescriptor() const
Definition RNTuple.hxx:197
RNTupleView< T > GetView(std::string_view fieldName)
Provides access to an individual field that can contain either a scalar value or a collection,...
Definition RNTuple.hxx:290
NTupleSize_t GetNEntries() const
Definition RNTuple.hxx:196
std::unique_ptr< Detail::RPageSource > fSource
Definition RNTuple.hxx:108
RNTupleGlobalRange GetEntryRange()
Returns an iterator over the entry indices of the RNTuple.
Definition RNTuple.hxx:267
RNTupleViewCollection GetViewCollection(std::string_view fieldName)
Raises an exception if:
Definition RNTuple.hxx:303
void LoadEntry(NTupleSize_t index)
Analogous to Fill(), fills the default entry of the model.
Definition RNTuple.hxx:237
std::unique_ptr< RNTupleModel > fModel
Needs to be destructed before fSource.
Definition RNTuple.hxx:110
void LoadEntry(NTupleSize_t index, REntry &entry)
Fills a user provided entry after checking that the entry has been instantiated from the ntuple model...
Definition RNTuple.hxx:246
A view for a collection, that can itself generate new ntuple views for its nested fields.
An RNTupleView provides read-only access to a single field of the ntuple.
Common user-tunable settings for storing ntuples.
An RNTuple that gets filled with entries (data) and writes them to storage.
Definition RNTuple.hxx:351
NTupleSize_t fUnzippedClusterSizeEst
Estimator of uncompressed cluster size, taking into account the estimated compression ratio.
Definition RNTuple.hxx:372
std::size_t fMaxUnzippedClusterSize
Limit for committing cluster no matter the other tunables.
Definition RNTuple.hxx:370
const Detail::RNTupleMetrics & GetMetrics() const
Definition RNTuple.hxx:409
std::unique_ptr< REntry > CreateEntry()
Definition RNTuple.hxx:406
std::unique_ptr< RNTupleModel > fModel
Needs to be destructed before fSink.
Definition RNTuple.hxx:358
RNTupleWriter(const RNTupleWriter &)=delete
Detail::RNTupleMetrics fMetrics
Definition RNTuple.hxx:359
RNTupleWriter & operator=(const RNTupleWriter &)=delete
std::unique_ptr< Detail::RPageSink > fSink
Definition RNTuple.hxx:356
void Fill()
The simplest user interface if the default entry that comes with the ntuple model is used.
Definition RNTuple.hxx:392
std::unique_ptr< Detail::RPageStorage::RTaskScheduler > fZipTasks
The page sink's parallel page compression scheduler if IMT is on.
Definition RNTuple.hxx:355
void Fill(REntry &entry)
Multiple entries can have been instantiated from the tnuple model.
Definition RNTuple.hxx:395
A ROOT file is a suite of consecutive data records (TKey instances) with a well defined format.
Definition TFile.h:54
const Int_t n
Definition legend1.C:16
ENTupleInfo
Listing of the different options that can be printed by RNTupleReader::GetInfo()
Definition RNTuple.hxx:51
std::uint64_t NTupleSize_t
Integer type long enough to hold the maximum number of entries in a column.
constexpr NTupleSize_t kInvalidNTupleIndex
ENTupleShowFormat
Listing of the different entry output formats of RNTupleReader::Show()
Definition RNTuple.hxx:60
constexpr DescriptorId_t kInvalidDescriptorId
tbb::task_arena is an alias of tbb::interface7::task_arena, which doesn't allow to forward declare tb...
Definition file.py:1
static const char * what
Definition stlLoader.cc:6
Wrap the 32bit integer in a struct in order to avoid template specialization clash with std::uint32_t...
Used to specify the underlying RNTuples in OpenFriends()
Definition RNTuple.hxx:147
ROpenSpec(std::string_view n, std::string_view s)
Definition RNTuple.hxx:153
static void output(int code)
Definition gifencode.c:226