Logo ROOT  
Reference Guide
Loading...
Searching...
No Matches
RColumn.hxx
Go to the documentation of this file.
1/// \file ROOT/RColumn.hxx
2/// \author Jakob Blomer <jblomer@cern.ch>
3/// \date 2018-10-09
4
5/*************************************************************************
6 * Copyright (C) 1995-2019, Rene Brun and Fons Rademakers. *
7 * All rights reserved. *
8 * *
9 * For the licensing terms see $ROOTSYS/LICENSE. *
10 * For the list of contributors see $ROOTSYS/README/CREDITS. *
11 *************************************************************************/
12
13#ifndef ROOT_RColumn
14#define ROOT_RColumn
15
16#include <ROOT/RConfig.hxx> // for R__likely
18#include <ROOT/RNTupleTypes.hxx>
19#include <ROOT/RPage.hxx>
20#include <ROOT/RPageStorage.hxx>
21
22#include <TError.h>
23
24#include <cstring> // for memcpy
25#include <memory>
26#include <utility>
27
28namespace ROOT::Internal {
29
30// clang-format off
31/**
32\class ROOT::Internal::RColumn
33\ingroup NTuple
34\brief A column is a storage-backed array of a simple, fixed-size type, from which pages can be mapped into memory.
35*/
36// clang-format on
37class RColumn {
38private:
40 /// Columns belonging to the same field are distinguished by their order. E.g. for an std::string field, there is
41 /// the offset column with index 0 and the character value column with index 1.
42 std::uint32_t fIndex;
43 /// Fields can have multiple column representations, distinguished by representation index
44 std::uint16_t fRepresentationIndex;
49 /// The page into which new elements are being written. The page will initially be small
50 /// (RNTupleWriteOptions::fInitialUnzippedPageSize, which corresponds to fInitialElements) and expand as needed and
51 /// as memory for page buffers is still available (RNTupleWriteOptions::fPageBufferBudget) or the maximum page
52 /// size is reached (RNTupleWriteOptions::fMaxUnzippedPageSize).
54 /// The initial number of elements in a page
56 /// The number of elements written resp. available in the column
58 /// The currently mapped page for reading
60 /// The column id in the column descriptor, once connected to a sink or source
62 /// Global index of the first element in this column; usually == 0, unless it is a deferred column
64 /// Used to pack and unpack pages on writing/reading
65 std::unique_ptr<ROOT::Internal::RColumnElementBase> fElement;
66 /// The column team is a set of columns that serve the same column index for different representation IDs.
67 /// Initially, the team has only one member, the very column it belongs to. Through MergeTeams(), two columns
68 /// can join forces. The team is used to react on suppressed columns: if the current team member has a suppressed
69 /// column for a MapPage() call, it get the page from the active column in the corresponding cluster.
70 std::vector<RColumn *> fTeam;
71 /// Points into fTeam to the column that successfully returned the last page.
72 std::size_t fLastGoodTeamIdx = 0;
73
74 RColumn(ROOT::ENTupleColumnType type, std::uint32_t columnIndex, std::uint16_t representationIndex);
75
76 /// Used when trying to append to a full write page. If possible, expand the page. Otherwise, flush and reset
77 /// to the minimal size.
79 {
80 auto newMaxElements = fWritePage.GetMaxElements() * 2;
81 if (newMaxElements * fElement->GetSize() > fPageSink->GetWriteOptions().GetMaxUnzippedPageSize()) {
82 newMaxElements = fPageSink->GetWriteOptions().GetMaxUnzippedPageSize() / fElement->GetSize();
83 }
84
85 if (newMaxElements == fWritePage.GetMaxElements()) {
86 // Maximum page size reached, flush and reset
87 Flush();
88 } else {
89 auto expandedPage = fPageSink->ReservePage(fHandleSink, newMaxElements);
90 if (expandedPage.IsNull()) {
91 Flush();
92 } else {
93 memcpy(expandedPage.GetBuffer(), fWritePage.GetBuffer(), fWritePage.GetNBytes());
94 expandedPage.Reset(fNElements);
95 expandedPage.GrowUnchecked(fWritePage.GetNElements());
96 fWritePage = std::move(expandedPage);
97 }
98 }
99
100 assert(fWritePage.GetNElements() < fWritePage.GetMaxElements());
101 }
102
103public:
104 template <typename CppT>
105 static std::unique_ptr<RColumn>
106 Create(ROOT::ENTupleColumnType type, std::uint32_t columnIdx, std::uint16_t representationIdx)
107 {
108 auto column = std::unique_ptr<RColumn>(new RColumn(type, columnIdx, representationIdx));
110 return column;
111 }
112
113 RColumn(const RColumn &) = delete;
114 RColumn &operator=(const RColumn &) = delete;
115 ~RColumn();
116
117 /// Connect the column to a page sink. `firstElementIndex` can be used to specify the first column element index
118 /// with backing storage for this column. On read back, elements before `firstElementIndex` will cause the zero page
119 /// to be mapped.
121 ROOT::NTupleSize_t firstElementIndex = 0U);
122 /// Connect the column to a page source.
124
125 void Append(const void *from)
126 {
127 if (fWritePage.GetNElements() == fWritePage.GetMaxElements()) {
129 }
130
131 void *dst = fWritePage.GrowUnchecked(1);
132
133 std::memcpy(dst, from, fElement->GetSize());
134 fNElements++;
135 }
136
137 void AppendV(const void *from, std::size_t count)
138 {
139 auto src = reinterpret_cast<const unsigned char *>(from);
140 // TODO(jblomer): A future optimization should grow the page in one go, up to the maximum unzipped page size
141 while (count > 0) {
142 std::size_t nElementsRemaining = fWritePage.GetMaxElements() - fWritePage.GetNElements();
143 if (nElementsRemaining == 0) {
145 nElementsRemaining = fWritePage.GetMaxElements() - fWritePage.GetNElements();
146 }
147
148 assert(nElementsRemaining > 0);
149 auto nBatch = std::min(count, nElementsRemaining);
150
151 void *dst = fWritePage.GrowUnchecked(nBatch);
152 std::memcpy(dst, src, nBatch * fElement->GetSize());
153 src += nBatch * fElement->GetSize();
154 count -= nBatch;
155 fNElements += nBatch;
156 }
157 }
158
159 void Read(const ROOT::NTupleSize_t globalIndex, void *to)
160 {
161 if (!fReadPageRef.Get().Contains(globalIndex)) {
162 MapPage(globalIndex);
163 }
164 const auto elemSize = fElement->GetSize();
165 void *from = static_cast<unsigned char *>(fReadPageRef.Get().GetBuffer()) +
166 (globalIndex - fReadPageRef.Get().GetGlobalRangeFirst()) * elemSize;
167 std::memcpy(to, from, elemSize);
168 }
169
170 void Read(RNTupleLocalIndex localIndex, void *to)
171 {
172 if (!fReadPageRef.Get().Contains(localIndex)) {
173 MapPage(localIndex);
174 }
175 const auto elemSize = fElement->GetSize();
176 void *from = static_cast<unsigned char *>(fReadPageRef.Get().GetBuffer()) +
177 (localIndex.GetIndexInCluster() - fReadPageRef.Get().GetLocalRangeFirst()) * elemSize;
178 std::memcpy(to, from, elemSize);
179 }
180
181 void ReadV(ROOT::NTupleSize_t globalIndex, ROOT::NTupleSize_t count, void *to)
182 {
183 const auto elemSize = fElement->GetSize();
184 auto tail = static_cast<unsigned char *>(to);
185
186 while (count > 0) {
187 if (!fReadPageRef.Get().Contains(globalIndex)) {
188 MapPage(globalIndex);
189 }
190 const ROOT::NTupleSize_t idxInPage = globalIndex - fReadPageRef.Get().GetGlobalRangeFirst();
191
192 const void *from = static_cast<unsigned char *>(fReadPageRef.Get().GetBuffer()) + idxInPage * elemSize;
193 const ROOT::NTupleSize_t nBatch = std::min(fReadPageRef.Get().GetNElements() - idxInPage, count);
194
195 std::memcpy(tail, from, elemSize * nBatch);
196
197 tail += nBatch * elemSize;
198 count -= nBatch;
199 globalIndex += nBatch;
200 }
201 }
202
203 void ReadV(RNTupleLocalIndex localIndex, ROOT::NTupleSize_t count, void *to)
204 {
205 const auto elemSize = fElement->GetSize();
206 auto tail = static_cast<unsigned char *>(to);
207
208 while (count > 0) {
209 if (!fReadPageRef.Get().Contains(localIndex)) {
210 MapPage(localIndex);
211 }
212 ROOT::NTupleSize_t idxInPage = localIndex.GetIndexInCluster() - fReadPageRef.Get().GetLocalRangeFirst();
213
214 const void *from = static_cast<unsigned char *>(fReadPageRef.Get().GetBuffer()) + idxInPage * elemSize;
215 const ROOT::NTupleSize_t nBatch = std::min(count, fReadPageRef.Get().GetNElements() - idxInPage);
216
217 std::memcpy(tail, from, elemSize * nBatch);
218
219 tail += nBatch * elemSize;
220 count -= nBatch;
221 localIndex = RNTupleLocalIndex(localIndex.GetClusterId(), localIndex.GetIndexInCluster() + nBatch);
222 }
223 }
224
225 template <typename CppT>
226 CppT *Map(const ROOT::NTupleSize_t globalIndex)
227 {
228 ROOT::NTupleSize_t nItems;
229 return MapV<CppT>(globalIndex, nItems);
230 }
231
232 template <typename CppT>
233 CppT *Map(RNTupleLocalIndex localIndex)
234 {
235 ROOT::NTupleSize_t nItems;
236 return MapV<CppT>(localIndex, nItems);
237 }
238
239 template <typename CppT>
240 CppT *MapV(const ROOT::NTupleSize_t globalIndex, ROOT::NTupleSize_t &nItems)
241 {
242 if (R__unlikely(!fReadPageRef.Get().Contains(globalIndex))) {
243 MapPage(globalIndex);
244 }
245 // +1 to go from 0-based indexing to 1-based number of items
246 nItems = fReadPageRef.Get().GetGlobalRangeLast() - globalIndex + 1;
247 return reinterpret_cast<CppT *>(static_cast<unsigned char *>(fReadPageRef.Get().GetBuffer()) +
248 (globalIndex - fReadPageRef.Get().GetGlobalRangeFirst()) * sizeof(CppT));
249 }
250
251 template <typename CppT>
252 CppT *MapV(RNTupleLocalIndex localIndex, ROOT::NTupleSize_t &nItems)
253 {
254 if (!fReadPageRef.Get().Contains(localIndex)) {
255 MapPage(localIndex);
256 }
257 // +1 to go from 0-based indexing to 1-based number of items
258 nItems = fReadPageRef.Get().GetLocalRangeLast() - localIndex.GetIndexInCluster() + 1;
259 return reinterpret_cast<CppT *>(static_cast<unsigned char *>(fReadPageRef.Get().GetBuffer()) +
260 (localIndex.GetIndexInCluster() - fReadPageRef.Get().GetLocalRangeFirst()) *
261 sizeof(CppT));
262 }
263
265 {
266 if (!fReadPageRef.Get().Contains(clusterIndex)) {
267 MapPage(clusterIndex);
268 }
269 return fReadPageRef.Get().GetClusterInfo().GetIndexOffset() + clusterIndex.GetIndexInCluster();
270 }
271
273 {
274 if (!fReadPageRef.Get().Contains(globalIndex)) {
275 MapPage(globalIndex);
276 }
277 return RNTupleLocalIndex(fReadPageRef.Get().GetClusterInfo().GetId(),
278 globalIndex - fReadPageRef.Get().GetClusterInfo().GetIndexOffset());
279 }
280
281 /// For offset columns only, look at the two adjacent values that define a collection's coordinates
282 void GetCollectionInfo(const ROOT::NTupleSize_t globalIndex, RNTupleLocalIndex *collectionStart,
283 ROOT::NTupleSize_t *collectionSize)
284 {
285 ROOT::NTupleSize_t idxStart = 0;
286 ROOT::NTupleSize_t idxEnd;
287 // Try to avoid jumping back to the previous page and jumping back to the previous cluster
288 if (R__likely(globalIndex > 0)) {
289 if (R__likely(fReadPageRef.Get().Contains(globalIndex - 1))) {
290 idxStart = *Map<ROOT::Internal::RColumnIndex>(globalIndex - 1);
291 idxEnd = *Map<ROOT::Internal::RColumnIndex>(globalIndex);
292 if (R__unlikely(fReadPageRef.Get().GetClusterInfo().GetIndexOffset() == globalIndex))
293 idxStart = 0;
294 } else {
295 idxEnd = *Map<ROOT::Internal::RColumnIndex>(globalIndex);
296 auto selfOffset = fReadPageRef.Get().GetClusterInfo().GetIndexOffset();
297 idxStart = (globalIndex == selfOffset) ? 0 : *Map<ROOT::Internal::RColumnIndex>(globalIndex - 1);
298 }
299 } else {
300 idxEnd = *Map<ROOT::Internal::RColumnIndex>(globalIndex);
301 }
302 *collectionSize = idxEnd - idxStart;
303 *collectionStart = RNTupleLocalIndex(fReadPageRef.Get().GetClusterInfo().GetId(), idxStart);
304 }
305
306 void GetCollectionInfo(RNTupleLocalIndex localIndex, RNTupleLocalIndex *collectionStart,
307 ROOT::NTupleSize_t *collectionSize)
308 {
309 auto index = localIndex.GetIndexInCluster();
310 auto idxStart = (index == 0) ? 0 : *Map<ROOT::Internal::RColumnIndex>(localIndex - 1);
311 auto idxEnd = *Map<ROOT::Internal::RColumnIndex>(localIndex);
312 *collectionSize = idxEnd - idxStart;
313 *collectionStart = RNTupleLocalIndex(localIndex.GetClusterId(), idxStart);
314 }
315
316 /// Get the currently active cluster id
317 void GetSwitchInfo(ROOT::NTupleSize_t globalIndex, RNTupleLocalIndex *varIndex, std::uint32_t *tag)
318 {
319 auto varSwitch = Map<ROOT::Internal::RColumnSwitch>(globalIndex);
320 *varIndex = RNTupleLocalIndex(fReadPageRef.Get().GetClusterInfo().GetId(), varSwitch->GetIndex());
321 *tag = varSwitch->GetTag();
322 }
323
324 void Flush();
325 void CommitSuppressed();
326
327 void MapPage(ROOT::NTupleSize_t globalIndex) { R__ASSERT(TryMapPage(globalIndex)); }
328 void MapPage(RNTupleLocalIndex localIndex) { R__ASSERT(TryMapPage(localIndex)); }
329 bool TryMapPage(ROOT::NTupleSize_t globalIndex);
330 bool TryMapPage(RNTupleLocalIndex localIndex);
331
332 bool ReadPageContains(ROOT::NTupleSize_t globalIndex) const { return fReadPageRef.Get().Contains(globalIndex); }
333 bool ReadPageContains(RNTupleLocalIndex localIndex) const { return fReadPageRef.Get().Contains(localIndex); }
334
335 void MergeTeams(RColumn &other);
336
340 std::uint16_t GetBitsOnStorage() const
341 {
342 assert(fElement);
343 return static_cast<std::uint16_t>(fElement->GetBitsOnStorage());
344 }
345 std::optional<std::pair<double, double>> GetValueRange() const
346 {
347 assert(fElement);
348 return fElement->GetValueRange();
349 }
350 std::uint32_t GetIndex() const { return fIndex; }
351 std::uint16_t GetRepresentationIndex() const { return fRepresentationIndex; }
358
359 void SetBitsOnStorage(std::size_t bits) { fElement->SetBitsOnStorage(bits); }
360 std::size_t GetWritePageCapacity() const { return fWritePage.GetCapacity(); }
361 void SetValueRange(double min, double max) { fElement->SetValueRange(min, max); }
362}; // class RColumn
363
364} // namespace ROOT::Internal
365
366#endif
#define R__likely(expr)
Definition RConfig.hxx:587
#define R__unlikely(expr)
Definition RConfig.hxx:586
#define R__ASSERT(e)
Checks condition e and reports a fatal error if it's false.
Definition TError.h:125
RColumn(ROOT::ENTupleColumnType type, std::uint32_t columnIndex, std::uint16_t representationIndex)
A column element encapsulates the translation between basic C++ types and their column representation...
static std::unique_ptr< RColumnElementBase > Generate(ROOT::ENTupleColumnType type)
If CppT == void, use the default C++ type for the given column type.
A column is a storage-backed array of a simple, fixed-size type, from which pages can be mapped into ...
Definition RColumn.hxx:37
void GetSwitchInfo(ROOT::NTupleSize_t globalIndex, RNTupleLocalIndex *varIndex, std::uint32_t *tag)
Get the currently active cluster id.
Definition RColumn.hxx:317
void GetCollectionInfo(RNTupleLocalIndex localIndex, RNTupleLocalIndex *collectionStart, ROOT::NTupleSize_t *collectionSize)
Definition RColumn.hxx:306
void Read(RNTupleLocalIndex localIndex, void *to)
Definition RColumn.hxx:170
static std::unique_ptr< RColumn > Create(ROOT::ENTupleColumnType type, std::uint32_t columnIdx, std::uint16_t representationIdx)
Definition RColumn.hxx:106
RColumn(ROOT::ENTupleColumnType type, std::uint32_t columnIndex, std::uint16_t representationIndex)
ROOT::Internal::RPageSource * GetPageSource() const
Definition RColumn.hxx:354
ROOT::NTupleSize_t GetGlobalIndex(RNTupleLocalIndex clusterIndex)
Definition RColumn.hxx:264
void HandleWritePageIfFull()
Used when trying to append to a full write page.
Definition RColumn.hxx:78
std::optional< std::pair< double, double > > GetValueRange() const
Definition RColumn.hxx:345
void MergeTeams(RColumn &other)
Definition RColumn.cxx:114
RColumn & operator=(const RColumn &)=delete
ROOT::Internal::RPageStorage::ColumnHandle_t GetHandleSink() const
Definition RColumn.hxx:357
void ReadV(RNTupleLocalIndex localIndex, ROOT::NTupleSize_t count, void *to)
Definition RColumn.hxx:203
std::vector< RColumn * > fTeam
The column team is a set of columns that serve the same column index for different representation IDs...
Definition RColumn.hxx:70
std::uint16_t GetRepresentationIndex() const
Definition RColumn.hxx:351
bool ReadPageContains(ROOT::NTupleSize_t globalIndex) const
Definition RColumn.hxx:332
std::size_t fLastGoodTeamIdx
Points into fTeam to the column that successfully returned the last page.
Definition RColumn.hxx:72
CppT * Map(const ROOT::NTupleSize_t globalIndex)
Definition RColumn.hxx:226
CppT * MapV(RNTupleLocalIndex localIndex, ROOT::NTupleSize_t &nItems)
Definition RColumn.hxx:252
RNTupleLocalIndex GetClusterIndex(ROOT::NTupleSize_t globalIndex)
Definition RColumn.hxx:272
RColumn(const RColumn &)=delete
std::uint16_t fRepresentationIndex
Fields can have multiple column representations, distinguished by representation index.
Definition RColumn.hxx:44
void ConnectPageSource(ROOT::DescriptorId_t fieldId, ROOT::Internal::RPageSource &pageSource)
Connect the column to a page source.
Definition RColumn.cxx:56
ROOT::Internal::RPageSource * fPageSource
Definition RColumn.hxx:46
ROOT::Internal::RColumnElementBase * GetElement() const
Definition RColumn.hxx:338
ROOT::Internal::RPageStorage::ColumnHandle_t GetHandleSource() const
Definition RColumn.hxx:356
void SetBitsOnStorage(std::size_t bits)
Definition RColumn.hxx:359
void Read(const ROOT::NTupleSize_t globalIndex, void *to)
Definition RColumn.hxx:159
CppT * Map(RNTupleLocalIndex localIndex)
Definition RColumn.hxx:233
ROOT::ENTupleColumnType GetType() const
Definition RColumn.hxx:339
ROOT::NTupleSize_t GetFirstElementIndex() const
Definition RColumn.hxx:353
bool ReadPageContains(RNTupleLocalIndex localIndex) const
Definition RColumn.hxx:333
void GetCollectionInfo(const ROOT::NTupleSize_t globalIndex, RNTupleLocalIndex *collectionStart, ROOT::NTupleSize_t *collectionSize)
For offset columns only, look at the two adjacent values that define a collection's coordinates.
Definition RColumn.hxx:282
void AppendV(const void *from, std::size_t count)
Definition RColumn.hxx:137
void ReadV(ROOT::NTupleSize_t globalIndex, ROOT::NTupleSize_t count, void *to)
Definition RColumn.hxx:181
std::size_t GetWritePageCapacity() const
Definition RColumn.hxx:360
ROOT::NTupleSize_t GetNElements() const
Definition RColumn.hxx:337
void SetValueRange(double min, double max)
Definition RColumn.hxx:361
ROOT::DescriptorId_t GetOnDiskId() const
Definition RColumn.hxx:352
ROOT::Internal::RPageRef fReadPageRef
The currently mapped page for reading.
Definition RColumn.hxx:59
ROOT::Internal::RPage fWritePage
The page into which new elements are being written.
Definition RColumn.hxx:53
CppT * MapV(const ROOT::NTupleSize_t globalIndex, ROOT::NTupleSize_t &nItems)
Definition RColumn.hxx:240
ROOT::Internal::RPageSink * fPageSink
Definition RColumn.hxx:45
ROOT::DescriptorId_t fOnDiskId
The column id in the column descriptor, once connected to a sink or source.
Definition RColumn.hxx:61
std::uint32_t fIndex
Columns belonging to the same field are distinguished by their order.
Definition RColumn.hxx:42
bool TryMapPage(ROOT::NTupleSize_t globalIndex)
Definition RColumn.cxx:84
ROOT::Internal::RPageStorage::ColumnHandle_t fHandleSink
Definition RColumn.hxx:47
ROOT::NTupleSize_t fFirstElementIndex
Global index of the first element in this column; usually == 0, unless it is a deferred column.
Definition RColumn.hxx:63
ROOT::NTupleSize_t fNElements
The number of elements written resp. available in the column.
Definition RColumn.hxx:57
void MapPage(ROOT::NTupleSize_t globalIndex)
Definition RColumn.hxx:327
void ConnectPageSink(ROOT::DescriptorId_t fieldId, ROOT::Internal::RPageSink &pageSink, ROOT::NTupleSize_t firstElementIndex=0U)
Connect the column to a page sink.
Definition RColumn.cxx:39
ROOT::Internal::RPageStorage::ColumnHandle_t fHandleSource
Definition RColumn.hxx:48
std::uint16_t GetBitsOnStorage() const
Definition RColumn.hxx:340
std::uint32_t GetIndex() const
Definition RColumn.hxx:350
void Append(const void *from)
Definition RColumn.hxx:125
std::unique_ptr< ROOT::Internal::RColumnElementBase > fElement
Used to pack and unpack pages on writing/reading.
Definition RColumn.hxx:65
ROOT::Internal::RPageSink * GetPageSink() const
Definition RColumn.hxx:355
void MapPage(RNTupleLocalIndex localIndex)
Definition RColumn.hxx:328
ROOT::ENTupleColumnType fType
Definition RColumn.hxx:39
ROOT::NTupleSize_t fInitialNElements
The initial number of elements in a page.
Definition RColumn.hxx:55
Reference to a page stored in the page pool.
Abstract interface to write data into an ntuple.
Abstract interface to read data from an ntuple.
RColumnHandle ColumnHandle_t
The column handle identifies a column with the current open page storage.
A page is a slice of a column that is mapped into memory.
Definition RPage.hxx:43
Addresses a column element or field item relative to a particular cluster, instead of a global NTuple...
ROOT::NTupleSize_t GetIndexInCluster() const
ROOT::DescriptorId_t GetClusterId() const
std::uint64_t DescriptorId_t
Distriniguishes elements of the same type within a descriptor, e.g. different fields.
std::uint64_t NTupleSize_t
Integer type long enough to hold the maximum number of entries in a column.
constexpr DescriptorId_t kInvalidDescriptorId
ENTupleColumnType