Logo ROOT  
Reference Guide
 
Loading...
Searching...
No Matches
ntpl016_streaming_vector.C
Go to the documentation of this file.
1/// \file
2/// \ingroup tutorial_ntuple
3///
4/// Example of a streaming vector: a special purpose container that reads large vectors piece-wise.
5///
6/// \macro_code
7///
8/// \date November 2024
9/// \author Peter van Gemmeren, the ROOT Team
10
11#include <ROOT/RNTupleModel.hxx>
14#include <ROOT/RNTupleRange.hxx>
15#include <ROOT/RNTupleTypes.hxx>
16#include <ROOT/RNTupleView.hxx>
18
19#include <TRandom3.h>
20
21#include <cstdint>
22#include <iostream>
23#include <vector>
24#include <utility>
25
26constexpr char const *kFileName = "ntpl016_streaming_vector.root";
27constexpr char const *kNTupleName = "ntpl";
28constexpr char const *kFieldName = "LargeVector";
29constexpr unsigned int kNEvents = 10;
30constexpr unsigned int kNElementsPerVector = 1000000;
31
32// Create an RNTuple with a single vector field. Every entry contains a large vector of random integers.
33// The vector should be seen as too large to be held entirely in memory during reading.
34void CreateRNTuple()
35{
36 auto model = ROOT::RNTupleModel::Create();
37 auto ptrLargeVector = model->MakeField<std::vector<std::uint32_t>>(kFieldName);
38 auto writer = ROOT::RNTupleWriter::Recreate(std::move(model), kNTupleName, kFileName);
39
40 auto prng = std::make_unique<TRandom3>();
41 prng->SetSeed();
42
43 for (ROOT::NTupleSize_t i = 0; i < kNEvents; i++) {
44 ptrLargeVector->clear();
45 for (std::size_t j = 0; j < kNElementsPerVector; j++)
46 ptrLargeVector->emplace_back(prng->Integer(-1));
47 writer->Fill();
48 }
49 std::cout << "RNTuple written" << std::endl;
50}
51
52/*
53 * ==================================================================================================
54 */
55
56// For comparison, the canonical read function that reads the entire vector for every entry.
58{
60
61 const auto nEntries = reader->GetNEntries();
62 std::cout << "Simple reading, found " << nEntries << " entries" << std::endl;
63
64 auto ptrLargeVector = reader->GetModel().GetDefaultEntry().GetPtr<std::vector<std::uint32_t>>(kFieldName);
65 for (ROOT::NTupleSize_t i = 0; i < nEntries; i++) {
66 reader->LoadEntry(i);
67
68 const auto vectorSize = ptrLargeVector->size();
69 uint64_t sum = 0;
70 for (auto val : *ptrLargeVector)
71 sum += val;
72
73 std::cout << "Size and sum of vector: " << vectorSize << " " << sum << std::endl;
74 }
75 std::cout << "RNTuple simple read" << std::endl;
76}
77
78/*
79 * ==================================================================================================
80 */
81
82// The StreamingVectorView class allows iteration over an RNTuple on-disk vector of element type T.
83// Unlike an std::vector, this class does not provide random-access but only allows to iterate the data elements
84// from beginning to end.
85// Internally, it uses an RNTupleCollection view and an item view to load chunks of the vector elements into memory,
86// so that never the entire vector needs to stay in memory.
87// Note that we don't need to implement loading chunks of data explicitly. Simply by asking for a single vector element
88// at every iteration step, the RNTuple views will take care of keeping only the currently required data pages
89// in memory. This results in the minimal possible memory footprint of RNTuple.
90// Note that for effective streaming, the cluster cache read option needs to be turned off. This may change in the
91// future with more fine-grained control of the data preloading.
92template <class T>
94 // For a certain entry, the collection view provides the information about the size of the collection and
95 // the index range of the item view, which is required to read the values of the collection at hand.
97 // The "data view" provides access to the vector elements
99 // Given an entry number, the start end end index in the item view to read the corresponding vector elements
101 // The index of the entry from which the vector should be read
102 ROOT::NTupleSize_t fEntry{0};
103 // The size of the collection in fEntry
105
106public:
107 // A lightweight iterator used in StreamingVectorView::begin() and StreamingVectorView::end().
108 // Used to iterate over the elements of an RNTuple on-disk vector for a certain entry.
109 // Dereferencing the iterator returns the corresponding value of the item view.
110 class Iterator {
113
114 public:
115 using iterator = Iterator;
116 using iterator_category = std::input_iterator_tag;
117 using value_type = T;
118 using pointer = const T *;
119 using reference = const T &;
120
122 : fRangeItr(rangeItr), fView(view)
123 {
124 }
125
126 iterator operator++(int) /* postfix */
127 {
128 auto r = *this;
129 ++(*this);
130 return r;
131 }
132 iterator &operator++() /* prefix */
133 {
134 ++fRangeItr;
135 return *this;
136 }
137 reference operator*() { return fView.operator()(*fRangeItr); }
138 pointer operator->() { return &fView.operator()(*fRangeItr); }
139 bool operator==(const iterator &rh) const { return fRangeItr == rh.fRangeItr; }
140 bool operator!=(const iterator &rh) const { return fRangeItr != rh.fRangeItr; }
141 };
142
144 : fVectorView(std::move(vectorView)), fItemView(fVectorView.GetView<T>("_0"))
145 {
146 }
147
148 ROOT::NTupleSize_t size() const { return fSize; }
149
150 // The begin() and end() methods enable range-based for loops like `for (auto val : streamingVector)`
151 Iterator begin() { return Iterator(fRange.begin(), fItemView); }
152 Iterator end() { return Iterator(fRange.end(), fItemView); }
153
154 void LoadEntry(ROOT::NTupleSize_t entry)
155 {
156 fEntry = entry;
157 fRange = fVectorView.GetCollectionRange(fEntry);
158 fSize = fVectorView.operator()(fEntry);
159 }
160};
161
162// For the streaming vector read, we use a custom class `StreamingVectorView` that implements the piece-wise
163// loading of the data during iteration of elements of the on-disk vector. The class has been built such that
164// the event loop is almost identical to the simple reading case above.
166{
168 // Don't preload data; we want to populate data into memory only as needed
170 auto reader = ROOT::RNTupleReader::Open(kNTupleName, kFileName, options);
171
172 const auto nEntries = reader->GetNEntries();
173 std::cout << "Streamed reading, found " << nEntries << " entries" << std::endl;
174
176
177 for (ROOT::NTupleSize_t i = 0; i < nEntries; i++) {
178 // Instead of `reader->LoadEntry()`, we tell the streaming vector which entry we want to read.
179 streamingVector.LoadEntry(i);
180
181 // We can ask for the size of the vector without loading the data
182 const auto vectorSize = streamingVector.size();
183
184 // The iteration works exactly as in the simple case
185 uint64_t sum = 0;
186 for (auto val : streamingVector)
187 sum += val;
188
189 std::cout << "Size and sum of vector: " << vectorSize << " " << sum << std::endl;
190 }
191 std::cout << "RNTuple streaming read" << std::endl;
192}
193
195{
199}
dim_t fSize
size_t size(const MatrixT &matrix)
retrieve the size of a square matrix
ROOT::Detail::TRangeCast< T, true > TRangeDynCast
TRangeDynCast is an adapter class that allows the typed iteration through a TCollection.
Bool_t operator!=(const TDatime &d1, const TDatime &d2)
Definition TDatime.h:104
Bool_t operator==(const TDatime &d1, const TDatime &d2)
Definition TDatime.h:102
Option_t Option_t TPoint TPoint const char GetTextMagnitude GetFillStyle GetLineColor GetLineWidth GetMarkerStyle GetTextAlign GetTextColor GetTextSize void char Point_t Rectangle_t WindowAttributes_t Float_t r
TTime operator*(const TTime &t1, const TTime &t2)
Definition TTime.h:85
A view for a collection, that can itself generate new ntuple views for its nested fields.
Used to loop over entries of collections in a single cluster.
static std::unique_ptr< RNTupleModel > Create()
Common user-tunable settings for reading RNTuples.
void SetClusterCache(EClusterCache val)
static std::unique_ptr< RNTupleReader > Open(std::string_view ntupleName, std::string_view storage, const ROOT::RNTupleReadOptions &options=ROOT::RNTupleReadOptions())
Open an RNTuple for reading.
An RNTupleView for a known type.
static std::unique_ptr< RNTupleWriter > Recreate(std::unique_ptr< ROOT::RNTupleModel > model, std::string_view ntupleName, std::string_view storage, const ROOT::RNTupleWriteOptions &options=ROOT::RNTupleWriteOptions())
Creates an RNTupleWriter backed by storage, overwriting it if one with the same URI exists.
constexpr NTupleSize_t kInvalidNTupleIndex
std::uint64_t NTupleSize_t
Integer type long enough to hold the maximum number of entries in a column.
constexpr DescriptorId_t kInvalidDescriptorId
static uint64_t sum(uint64_t i)
Definition Factory.cxx:2339