Logo ROOT  
Reference Guide
 
Loading...
Searching...
No Matches
RColumn.hxx
Go to the documentation of this file.
1/// \file ROOT/RColumn.hxx
2/// \ingroup NTuple ROOT7
3/// \author Jakob Blomer <jblomer@cern.ch>
4/// \date 2018-10-09
5/// \warning This is part of the ROOT 7 prototype! It will change without notice. It might trigger earthquakes. Feedback
6/// is welcome!
7
8/*************************************************************************
9 * Copyright (C) 1995-2019, Rene Brun and Fons Rademakers. *
10 * All rights reserved. *
11 * *
12 * For the licensing terms see $ROOTSYS/LICENSE. *
13 * For the list of contributors see $ROOTSYS/README/CREDITS. *
14 *************************************************************************/
15
16#ifndef ROOT7_RColumn
17#define ROOT7_RColumn
18
19#include <ROOT/RConfig.hxx> // for R__likely
21#include <ROOT/RColumnModel.hxx>
22#include <ROOT/RNTupleUtil.hxx>
23#include <ROOT/RPage.hxx>
24#include <ROOT/RPageStorage.hxx>
25
26#include <TError.h>
27
28#include <cstring> // for memcpy
29#include <memory>
30#include <utility>
31
32namespace ROOT {
33namespace Experimental {
34namespace Detail {
35
36// clang-format off
37/**
38\class ROOT::Experimental::RColumn
39\ingroup NTuple
40\brief A column is a storage-backed array of a simple, fixed-size type, from which pages can be mapped into memory.
41*/
42// clang-format on
43class RColumn {
44private:
46 /**
47 * Columns belonging to the same field are distinguished by their order. E.g. for an std::string field, there is
48 * the offset column with index 0 and the character value column with index 1.
49 */
50 std::uint32_t fIndex;
51 RPageSink *fPageSink = nullptr;
55 /// A set of open pages into which new elements are being written. The pages are used
56 /// in rotation. They are 50% bigger than the target size given by the write options.
57 /// The current page is filled until the target size, but it is only committed once the other
58 /// write page is filled at least 50%. If a flush occurs earlier, a slightly oversized, single
59 /// page will be committed.
61 /// Index of the current write page
63 /// For writing, the targeted number of elements, given by `fApproxNElementsPerPage` (in the write options) and the element size.
64 /// We ensure this value to be >= 2 in Connect() so that we have meaningful
65 /// "page full" and "page half full" events when writing the page.
66 std::uint32_t fApproxNElementsPerPage = 0;
67 /// The number of elements written resp. available in the column
69 /// The currently mapped page for reading
71 /// The column id is used to find matching pages with content when reading
73 /// Global index of the first element in this column; usually == 0, unless it is a deferred column
75 /// Used to pack and unpack pages on writing/reading
76 std::unique_ptr<RColumnElementBase> fElement;
77
78 RColumn(const RColumnModel &model, std::uint32_t index);
79
80 /// Used in Append() and AppendV() to switch pages when the main page reached the target size
81 /// The other page has been flushed when the main page reached 50%.
84 return;
85
86 fWritePageIdx = 1 - fWritePageIdx; // == (fWritePageIdx + 1) % 2
89 }
90
91 /// When the main write page surpasses the 50% fill level, the (full) shadow write page gets flushed
93 auto otherIdx = 1 - fWritePageIdx;
94 if (fWritePage[otherIdx].IsEmpty())
95 return;
97 // Mark the page as flushed; the rangeFirst is zero for now but will be reset to
98 // fNElements in SwapWritePagesIfFull() when the pages swap
99 fWritePage[otherIdx].Reset(0);
100 }
101
102public:
103 template <typename CppT>
104 static std::unique_ptr<RColumn> Create(const RColumnModel &model, std::uint32_t index)
105 {
106 auto column = std::unique_ptr<RColumn>(new RColumn(model, index));
107 column->fElement = RColumnElementBase::Generate<CppT>(model.GetType());
108 return column;
109 }
110
111 RColumn(const RColumn&) = delete;
112 RColumn &operator =(const RColumn&) = delete;
113 ~RColumn();
114
115 /// Connect the column to a page storage. If `pageStorage` is a page sink, `firstElementIndex` can be used to
116 /// specify the first column element index with backing storage for this column. On read back, elements before
117 /// `firstElementIndex` will cause the zero page to be mapped.
118 /// TODO(jalopezg): at this point it would be nicer to distinguish between connecting a page sink and a page source
119 void Connect(DescriptorId_t fieldId, RPageStorage *pageStorage, NTupleSize_t firstElementIndex = 0U);
120
121 void Append(const void *from)
122 {
123 void *dst = fWritePage[fWritePageIdx].GrowUnchecked(1);
124
127 }
128
129 std::memcpy(dst, from, fElement->GetSize());
130 fNElements++;
131
133 }
134
135 void AppendV(const void *from, std::size_t count)
136 {
137 // We might not have enough space in the current page. In this case, fall back to one by one filling.
139 // TODO(jblomer): use (fewer) calls to AppendV to write the data page-by-page
140 for (unsigned i = 0; i < count; ++i) {
141 Append(static_cast<const unsigned char *>(from) + fElement->GetSize() * i);
142 }
143 return;
144 }
145
146 // The check for flushing the shadow page is more complicated than for the Append() case
147 // because we don't necessarily fill up to exactly fApproxNElementsPerPage / 2 elements;
148 // we might instead jump over the 50% fill level.
149 // This check should be done before calling `RPage::GrowUnchecked()` as the latter affects the return value of
150 // `RPage::GetNElements()`.
153 {
155 }
156
157 void *dst = fWritePage[fWritePageIdx].GrowUnchecked(count);
158
159 std::memcpy(dst, from, fElement->GetSize() * count);
160 fNElements += count;
161
162 // Note that by the very first check in AppendV, we cannot have filled more than fApproxNElementsPerPage elements
164 }
165
166 void Read(const NTupleSize_t globalIndex, void *to)
167 {
168 if (!fReadPage.Contains(globalIndex)) {
169 MapPage(globalIndex);
170 }
171 const auto elemSize = fElement->GetSize();
172 void *from = static_cast<unsigned char *>(fReadPage.GetBuffer()) +
173 (globalIndex - fReadPage.GetGlobalRangeFirst()) * elemSize;
174 std::memcpy(to, from, elemSize);
175 }
176
177 void Read(const RClusterIndex &clusterIndex, void *to)
178 {
179 if (!fReadPage.Contains(clusterIndex)) {
180 MapPage(clusterIndex);
181 }
182 const auto elemSize = fElement->GetSize();
183 void *from = static_cast<unsigned char *>(fReadPage.GetBuffer()) +
184 (clusterIndex.GetIndex() - fReadPage.GetClusterRangeFirst()) * elemSize;
185 std::memcpy(to, from, elemSize);
186 }
187
188 void ReadV(const NTupleSize_t globalIndex, const ClusterSize_t::ValueType count, void *to)
189 {
190 if (!fReadPage.Contains(globalIndex)) {
191 MapPage(globalIndex);
192 }
193 NTupleSize_t idxInPage = globalIndex - fReadPage.GetGlobalRangeFirst();
194
195 const auto elemSize = fElement->GetSize();
196 const void *from = static_cast<unsigned char *>(fReadPage.GetBuffer()) + idxInPage * elemSize;
197 if (globalIndex + count <= fReadPage.GetGlobalRangeLast() + 1) {
198 std::memcpy(to, from, elemSize * count);
199 } else {
200 ClusterSize_t::ValueType nBatch = fReadPage.GetNElements() - idxInPage;
201 std::memcpy(to, from, elemSize * nBatch);
202 auto tail = static_cast<unsigned char *>(to) + nBatch * elemSize;
203 ReadV(globalIndex + nBatch, count - nBatch, tail);
204 }
205 }
206
207 void ReadV(const RClusterIndex &clusterIndex, const ClusterSize_t::ValueType count, void *to)
208 {
209 if (!fReadPage.Contains(clusterIndex)) {
210 MapPage(clusterIndex);
211 }
212 NTupleSize_t idxInPage = clusterIndex.GetIndex() - fReadPage.GetClusterRangeFirst();
213
214 const auto elemSize = fElement->GetSize();
215 const void *from = static_cast<unsigned char *>(fReadPage.GetBuffer()) + idxInPage * elemSize;
216 if (clusterIndex.GetIndex() + count <= fReadPage.GetClusterRangeLast() + 1) {
217 std::memcpy(to, from, elemSize * count);
218 } else {
219 ClusterSize_t::ValueType nBatch = fReadPage.GetNElements() - idxInPage;
220 std::memcpy(to, from, elemSize * nBatch);
221 auto tail = static_cast<unsigned char *>(to) + nBatch * elemSize;
222 ReadV(RClusterIndex(clusterIndex.GetClusterId(), clusterIndex.GetIndex() + nBatch), count - nBatch, tail);
223 }
224 }
225
226 template <typename CppT>
227 CppT *Map(const NTupleSize_t globalIndex) {
228 NTupleSize_t nItems;
229 return MapV<CppT>(globalIndex, nItems);
230 }
231
232 template <typename CppT>
233 CppT *Map(const RClusterIndex &clusterIndex) {
234 NTupleSize_t nItems;
235 return MapV<CppT>(clusterIndex, nItems);
236 }
237
238 template <typename CppT>
239 CppT *MapV(const NTupleSize_t globalIndex, NTupleSize_t &nItems) {
240 if (R__unlikely(!fReadPage.Contains(globalIndex))) {
241 MapPage(globalIndex);
242 }
243 // +1 to go from 0-based indexing to 1-based number of items
244 nItems = fReadPage.GetGlobalRangeLast() - globalIndex + 1;
245 return reinterpret_cast<CppT*>(
246 static_cast<unsigned char *>(fReadPage.GetBuffer()) +
248 }
249
250 template <typename CppT>
251 CppT *MapV(const RClusterIndex &clusterIndex, NTupleSize_t &nItems) {
252 if (!fReadPage.Contains(clusterIndex)) {
253 MapPage(clusterIndex);
254 }
255 // +1 to go from 0-based indexing to 1-based number of items
256 nItems = fReadPage.GetClusterRangeLast() - clusterIndex.GetIndex() + 1;
257 return reinterpret_cast<CppT*>(
258 static_cast<unsigned char *>(fReadPage.GetBuffer()) +
260 }
261
263 if (!fReadPage.Contains(clusterIndex)) {
264 MapPage(clusterIndex);
265 }
266 return fReadPage.GetClusterInfo().GetIndexOffset() + clusterIndex.GetIndex();
267 }
268
270 if (!fReadPage.Contains(globalIndex)) {
271 MapPage(globalIndex);
272 }
274 globalIndex - fReadPage.GetClusterInfo().GetIndexOffset());
275 }
276
277 /// For offset columns only, look at the two adjacent values that define a collection's coordinates
278 void GetCollectionInfo(const NTupleSize_t globalIndex, RClusterIndex *collectionStart, ClusterSize_t *collectionSize)
279 {
280 NTupleSize_t idxStart = 0;
281 NTupleSize_t idxEnd;
282 // Try to avoid jumping back to the previous page and jumping back to the previous cluster
283 if (R__likely(globalIndex > 0)) {
284 if (R__likely(fReadPage.Contains(globalIndex - 1))) {
285 idxStart = *Map<ClusterSize_t>(globalIndex - 1);
286 idxEnd = *Map<ClusterSize_t>(globalIndex);
287 if (R__unlikely(fReadPage.GetClusterInfo().GetIndexOffset() == globalIndex))
288 idxStart = 0;
289 } else {
290 idxEnd = *Map<ClusterSize_t>(globalIndex);
291 auto selfOffset = fReadPage.GetClusterInfo().GetIndexOffset();
292 idxStart = (globalIndex == selfOffset) ? 0 : *Map<ClusterSize_t>(globalIndex - 1);
293 }
294 } else {
295 idxEnd = *Map<ClusterSize_t>(globalIndex);
296 }
297 *collectionSize = idxEnd - idxStart;
298 *collectionStart = RClusterIndex(fReadPage.GetClusterInfo().GetId(), idxStart);
299 }
300
301 void GetCollectionInfo(const RClusterIndex &clusterIndex,
302 RClusterIndex *collectionStart, ClusterSize_t *collectionSize)
303 {
304 auto index = clusterIndex.GetIndex();
305 auto idxStart = (index == 0) ? 0 : *Map<ClusterSize_t>(clusterIndex - 1);
306 auto idxEnd = *Map<ClusterSize_t>(clusterIndex);
307 *collectionSize = idxEnd - idxStart;
308 *collectionStart = RClusterIndex(clusterIndex.GetClusterId(), idxStart);
309 }
310
311 /// Get the currently active cluster id
312 void GetSwitchInfo(NTupleSize_t globalIndex, RClusterIndex *varIndex, std::uint32_t *tag) {
313 auto varSwitch = Map<RColumnSwitch>(globalIndex);
314 *varIndex = RClusterIndex(fReadPage.GetClusterInfo().GetId(), varSwitch->GetIndex());
315 *tag = varSwitch->GetTag();
316 }
317
318 void Flush();
319 void MapPage(const NTupleSize_t index);
320 void MapPage(const RClusterIndex &clusterIndex);
322 RColumnElementBase *GetElement() const { return fElement.get(); }
323 const RColumnModel &GetModel() const { return fModel; }
324 std::uint32_t GetIndex() const { return fIndex; }
328 RPageSink *GetPageSink() const { return fPageSink; }
331};
332
333} // namespace Detail
334
335} // namespace Experimental
336} // namespace ROOT
337
338#endif
#define R__likely(expr)
Definition RConfig.hxx:587
#define R__unlikely(expr)
Definition RConfig.hxx:586
#define R__ASSERT(e)
Definition TError.h:118
Option_t Option_t TPoint TPoint const char GetTextMagnitude GetFillStyle GetLineColor GetLineWidth GetMarkerStyle GetTextAlign GetTextColor GetTextSize void char Point_t Rectangle_t WindowAttributes_t index
A column element encapsulates the translation between basic C++ types and their column representation...
RPageStorage::ColumnHandle_t GetHandleSource() const
Definition RColumn.hxx:329
void AppendV(const void *from, std::size_t count)
Definition RColumn.hxx:135
RColumn & operator=(const RColumn &)=delete
void GetCollectionInfo(const RClusterIndex &clusterIndex, RClusterIndex *collectionStart, ClusterSize_t *collectionSize)
Definition RColumn.hxx:301
const RColumnModel & GetModel() const
Definition RColumn.hxx:323
void ReadV(const NTupleSize_t globalIndex, const ClusterSize_t::ValueType count, void *to)
Definition RColumn.hxx:188
ColumnId_t GetColumnIdSource() const
Definition RColumn.hxx:325
void Append(const void *from)
Definition RColumn.hxx:121
RPageSink * GetPageSink() const
Definition RColumn.hxx:328
std::uint32_t GetIndex() const
Definition RColumn.hxx:324
RPage fReadPage
The currently mapped page for reading.
Definition RColumn.hxx:70
CppT * MapV(const RClusterIndex &clusterIndex, NTupleSize_t &nItems)
Definition RColumn.hxx:251
RColumnElementBase * GetElement() const
Definition RColumn.hxx:322
void Connect(DescriptorId_t fieldId, RPageStorage *pageStorage, NTupleSize_t firstElementIndex=0U)
Connect the column to a page storage.
Definition RColumn.cxx:42
void Read(const RClusterIndex &clusterIndex, void *to)
Definition RColumn.hxx:177
CppT * MapV(const NTupleSize_t globalIndex, NTupleSize_t &nItems)
Definition RColumn.hxx:239
void FlushShadowWritePage()
When the main write page surpasses the 50% fill level, the (full) shadow write page gets flushed.
Definition RColumn.hxx:92
void MapPage(const NTupleSize_t index)
Definition RColumn.cxx:92
NTupleSize_t GetFirstElementIndex() const
Definition RColumn.hxx:326
void SwapWritePagesIfFull()
Used in Append() and AppendV() to switch pages when the main page reached the target size The other p...
Definition RColumn.hxx:82
void GetSwitchInfo(NTupleSize_t globalIndex, RClusterIndex *varIndex, std::uint32_t *tag)
Get the currently active cluster id.
Definition RColumn.hxx:312
CppT * Map(const NTupleSize_t globalIndex)
Definition RColumn.hxx:227
std::uint32_t fApproxNElementsPerPage
For writing, the targeted number of elements, given by fApproxNElementsPerPage (in the write options)...
Definition RColumn.hxx:66
void Read(const NTupleSize_t globalIndex, void *to)
Definition RColumn.hxx:166
CppT * Map(const RClusterIndex &clusterIndex)
Definition RColumn.hxx:233
NTupleSize_t GetGlobalIndex(const RClusterIndex &clusterIndex)
Definition RColumn.hxx:262
std::unique_ptr< RColumnElementBase > fElement
Used to pack and unpack pages on writing/reading.
Definition RColumn.hxx:76
int fWritePageIdx
Index of the current write page.
Definition RColumn.hxx:62
NTupleSize_t GetNElements() const
Definition RColumn.hxx:321
RColumn(const RColumn &)=delete
std::uint32_t fIndex
Columns belonging to the same field are distinguished by their order.
Definition RColumn.hxx:50
static std::unique_ptr< RColumn > Create(const RColumnModel &model, std::uint32_t index)
Definition RColumn.hxx:104
RPageStorage::ColumnHandle_t GetHandleSink() const
Definition RColumn.hxx:330
NTupleSize_t fFirstElementIndex
Global index of the first element in this column; usually == 0, unless it is a deferred column.
Definition RColumn.hxx:74
void ReadV(const RClusterIndex &clusterIndex, const ClusterSize_t::ValueType count, void *to)
Definition RColumn.hxx:207
RPageSource * GetPageSource() const
Definition RColumn.hxx:327
NTupleSize_t fNElements
The number of elements written resp. available in the column.
Definition RColumn.hxx:68
ColumnId_t fColumnIdSource
The column id is used to find matching pages with content when reading.
Definition RColumn.hxx:72
RPageStorage::ColumnHandle_t fHandleSink
Definition RColumn.hxx:53
RPageStorage::ColumnHandle_t fHandleSource
Definition RColumn.hxx:54
void GetCollectionInfo(const NTupleSize_t globalIndex, RClusterIndex *collectionStart, ClusterSize_t *collectionSize)
For offset columns only, look at the two adjacent values that define a collection's coordinates.
Definition RColumn.hxx:278
RClusterIndex GetClusterIndex(NTupleSize_t globalIndex)
Definition RColumn.hxx:269
RPage fWritePage[2]
A set of open pages into which new elements are being written.
Definition RColumn.hxx:60
Abstract interface to write data into an ntuple.
void CommitPage(ColumnHandle_t columnHandle, const RPage &page)
Write a page to the storage. The column must have been added before.
Abstract interface to read data from an ntuple.
Common functionality of an ntuple storage for both reading and writing.
A page is a slice of a column that is mapped into memory.
Definition RPage.hxx:42
ClusterSize_t::ValueType GetClusterRangeLast() const
Definition RPage.hxx:91
bool Contains(NTupleSize_t globalIndex) const
Definition RPage.hxx:96
void * GrowUnchecked(ClusterSize_t::ValueType nElements)
Called during writing: returns a pointer after the last element and increases the element counter in ...
Definition RPage.hxx:112
const RClusterInfo & GetClusterInfo() const
Definition RPage.hxx:94
std::uint32_t GetNElements() const
Definition RPage.hxx:86
void Reset(NTupleSize_t rangeFirst)
Forget all currently stored elements (size == 0) and set a new starting index.
Definition RPage.hxx:123
NTupleSize_t GetGlobalRangeFirst() const
Definition RPage.hxx:88
NTupleSize_t GetGlobalRangeLast() const
Definition RPage.hxx:89
ClusterSize_t::ValueType GetClusterRangeFirst() const
Definition RPage.hxx:90
Addresses a column element or field item relative to a particular cluster, instead of a global NTuple...
DescriptorId_t GetClusterId() const
ClusterSize_t::ValueType GetIndex() const
Holds the static meta-data of an RNTuple column.
std::uint64_t NTupleSize_t
Integer type long enough to hold the maximum number of entries in a column.
constexpr ColumnId_t kInvalidColumnId
std::uint64_t DescriptorId_t
Distriniguishes elements of the same type within a descriptor, e.g. different fields.
std::int64_t ColumnId_t
Uniquely identifies a physical column within the scope of the current process, used to tag pages.
This file contains a specialised ROOT message handler to test for diagnostic in unit tests.
Wrap the integer in a struct in order to avoid template specialization clash with std::uint32_t.