Logo ROOT  
Reference Guide
RColumn.hxx
Go to the documentation of this file.
1/// \file ROOT/RColumn.hxx
2/// \ingroup NTuple ROOT7
3/// \author Jakob Blomer <jblomer@cern.ch>
4/// \date 2018-10-09
5/// \warning This is part of the ROOT 7 prototype! It will change without notice. It might trigger earthquakes. Feedback
6/// is welcome!
7
8/*************************************************************************
9 * Copyright (C) 1995-2019, Rene Brun and Fons Rademakers. *
10 * All rights reserved. *
11 * *
12 * For the licensing terms see $ROOTSYS/LICENSE. *
13 * For the list of contributors see $ROOTSYS/README/CREDITS. *
14 *************************************************************************/
15
16#ifndef ROOT7_RColumn
17#define ROOT7_RColumn
18
19#include <ROOT/RConfig.hxx> // for R__likely
21#include <ROOT/RColumnModel.hxx>
22#include <ROOT/RNTupleUtil.hxx>
23#include <ROOT/RPage.hxx>
24#include <ROOT/RPageStorage.hxx>
25
26#include <TError.h>
27
28#include <memory>
29#include <utility>
30
31namespace ROOT {
32namespace Experimental {
33namespace Detail {
34
35// clang-format off
36/**
37\class ROOT::Experimental::RColumn
38\ingroup NTuple
39\brief A column is a storage-backed array of a simple, fixed-size type, from which pages can be mapped into memory.
40
41On the primitives data layer, the RColumn and RColumnElement are the equivalents to RField and RFieldValue on the
42logical data layer.
43*/
44// clang-format on
45class RColumn {
46private:
48 /**
49 * Columns belonging to the same field are distinguished by their order. E.g. for an std::string field, there is
50 * the offset column with index 0 and the character value column with index 1.
51 */
52 std::uint32_t fIndex;
53 RPageSink *fPageSink = nullptr;
57 /// A set of open pages into which new elements are being written. The pages are used
58 /// in rotation. They are 50% bigger than the target size given by the write options.
59 /// The current page is filled until the target size, but it is only committed once the other
60 /// write page is filled at least 50%. If a flush occurs earlier, a slightly oversized, single
61 /// page will be committed.
63 /// Index of the current write page
65 /// For writing, the targeted number of elements, given by `fApproxNElementsPerPage` (in the write options) and the element size.
66 /// We ensure this value to be >= 2 in Connect() so that we have meaningful
67 /// "page full" and "page half full" events when writing the page.
68 std::uint32_t fApproxNElementsPerPage = 0;
69 /// The number of elements written resp. available in the column
71 /// The currently mapped page for reading
73 /// The column id is used to find matching pages with content when reading
75 /// Used to pack and unpack pages on writing/reading
76 std::unique_ptr<RColumnElementBase> fElement;
77
78 RColumn(const RColumnModel &model, std::uint32_t index);
79
80 /// Used in Append() and AppendV() to switch pages when the main page reached the target size
81 /// The other page has been flushed when the main page reached 50%.
84 return;
85
86 fWritePageIdx = 1 - fWritePageIdx; // == (fWritePageIdx + 1) % 2
89 }
90
91 /// When the main write page surpasses the 50% fill level, the (full) shadow write page gets flushed
93 auto otherIdx = 1 - fWritePageIdx;
94 if (fWritePage[otherIdx].IsEmpty())
95 return;
97 // Mark the page as flushed; the rangeFirst is zero for now but will be reset to
98 // fNElements in SwapWritePagesIfFull() when the pages swap
99 fWritePage[otherIdx].Reset(0);
100 }
101
102public:
103 template <typename CppT>
104 static std::unique_ptr<RColumn> Create(const RColumnModel &model, std::uint32_t index)
105 {
106 auto column = std::unique_ptr<RColumn>(new RColumn(model, index));
107 column->fElement = RColumnElementBase::Generate<CppT>(model.GetType());
108 return column;
109 }
110
111 RColumn(const RColumn&) = delete;
112 RColumn &operator =(const RColumn&) = delete;
113 ~RColumn();
114
115 void Connect(DescriptorId_t fieldId, RPageStorage *pageStorage);
116
117 void Append(const RColumnElementBase &element) {
118 void *dst = fWritePage[fWritePageIdx].GrowUnchecked(1);
119
122 }
123
124 element.WriteTo(dst, 1);
125 fNElements++;
126
128 }
129
130 void AppendV(const RColumnElementBase &elemArray, std::size_t count) {
131 // We might not have enough space in the current page. In this case, fall back to one by one filling.
133 // TODO(jblomer): use (fewer) calls to AppendV to write the data page-by-page
134 for (unsigned i = 0; i < count; ++i) {
135 Append(RColumnElementBase(elemArray, i));
136 }
137 return;
138 }
139
140 void *dst = fWritePage[fWritePageIdx].GrowUnchecked(count);
141
142 // The check for flushing the shadow page is more complicated than for the Append() case
143 // because we don't necessarily fill up to exactly fApproxNElementsPerPage / 2 elements;
144 // we might instead jump over the 50% fill level
147 {
149 }
150
151 elemArray.WriteTo(dst, count);
152 fNElements += count;
153
154 // Note that by the very first check in AppendV, we cannot have filled more than fApproxNElementsPerPage elements
156 }
157
158 void Read(const NTupleSize_t globalIndex, RColumnElementBase *element) {
159 if (!fReadPage.Contains(globalIndex)) {
160 MapPage(globalIndex);
161 R__ASSERT(fReadPage.Contains(globalIndex));
162 }
163 void *src = static_cast<unsigned char *>(fReadPage.GetBuffer()) +
164 (globalIndex - fReadPage.GetGlobalRangeFirst()) * element->GetSize();
165 element->ReadFrom(src, 1);
166 }
167
168 void Read(const RClusterIndex &clusterIndex, RColumnElementBase *element) {
169 if (!fReadPage.Contains(clusterIndex)) {
170 MapPage(clusterIndex);
171 }
172 void *src = static_cast<unsigned char *>(fReadPage.GetBuffer()) +
173 (clusterIndex.GetIndex() - fReadPage.GetClusterRangeFirst()) * element->GetSize();
174 element->ReadFrom(src, 1);
175 }
176
177 void ReadV(const NTupleSize_t globalIndex, const ClusterSize_t::ValueType count, RColumnElementBase *elemArray) {
178 R__ASSERT(count > 0);
179 if (!fReadPage.Contains(globalIndex)) {
180 MapPage(globalIndex);
181 }
182 NTupleSize_t idxInPage = globalIndex - fReadPage.GetGlobalRangeFirst();
183
184 void *src = static_cast<unsigned char *>(fReadPage.GetBuffer()) + idxInPage * elemArray->GetSize();
185 if (globalIndex + count <= fReadPage.GetGlobalRangeLast() + 1) {
186 elemArray->ReadFrom(src, count);
187 } else {
188 ClusterSize_t::ValueType nBatch = fReadPage.GetNElements() - idxInPage;
189 elemArray->ReadFrom(src, nBatch);
190 RColumnElementBase elemTail(*elemArray, nBatch);
191 ReadV(globalIndex + nBatch, count - nBatch, &elemTail);
192 }
193 }
194
195 void ReadV(const RClusterIndex &clusterIndex, const ClusterSize_t::ValueType count, RColumnElementBase *elemArray)
196 {
197 if (!fReadPage.Contains(clusterIndex)) {
198 MapPage(clusterIndex);
199 }
200 NTupleSize_t idxInPage = clusterIndex.GetIndex() - fReadPage.GetClusterRangeFirst();
201
202 void* src = static_cast<unsigned char *>(fReadPage.GetBuffer()) + idxInPage * elemArray->GetSize();
203 if (clusterIndex.GetIndex() + count <= fReadPage.GetClusterRangeLast() + 1) {
204 elemArray->ReadFrom(src, count);
205 } else {
206 ClusterSize_t::ValueType nBatch = fReadPage.GetNElements() - idxInPage;
207 elemArray->ReadFrom(src, nBatch);
208 RColumnElementBase elemTail(*elemArray, nBatch);
209 ReadV(RClusterIndex(clusterIndex.GetClusterId(), clusterIndex.GetIndex() + nBatch), count - nBatch, &elemTail);
210 }
211 }
212
213 template <typename CppT>
214 CppT *Map(const NTupleSize_t globalIndex) {
215 NTupleSize_t nItems;
216 return MapV<CppT>(globalIndex, nItems);
217 }
218
219 template <typename CppT>
220 CppT *Map(const RClusterIndex &clusterIndex) {
221 NTupleSize_t nItems;
222 return MapV<CppT>(clusterIndex, nItems);
223 }
224
225 template <typename CppT>
226 CppT *MapV(const NTupleSize_t globalIndex, NTupleSize_t &nItems) {
227 if (R__unlikely(!fReadPage.Contains(globalIndex))) {
228 MapPage(globalIndex);
229 }
230 // +1 to go from 0-based indexing to 1-based number of items
231 nItems = fReadPage.GetGlobalRangeLast() - globalIndex + 1;
232 return reinterpret_cast<CppT*>(
233 static_cast<unsigned char *>(fReadPage.GetBuffer()) +
235 }
236
237 template <typename CppT>
238 CppT *MapV(const RClusterIndex &clusterIndex, NTupleSize_t &nItems) {
239 if (!fReadPage.Contains(clusterIndex)) {
240 MapPage(clusterIndex);
241 }
242 // +1 to go from 0-based indexing to 1-based number of items
243 nItems = fReadPage.GetClusterRangeLast() - clusterIndex.GetIndex() + 1;
244 return reinterpret_cast<CppT*>(
245 static_cast<unsigned char *>(fReadPage.GetBuffer()) +
247 }
248
250 if (!fReadPage.Contains(clusterIndex)) {
251 MapPage(clusterIndex);
252 }
253 return fReadPage.GetClusterInfo().GetIndexOffset() + clusterIndex.GetIndex();
254 }
255
257 if (!fReadPage.Contains(globalIndex)) {
258 MapPage(globalIndex);
259 }
261 globalIndex - fReadPage.GetClusterInfo().GetIndexOffset());
262 }
263
264 /// For offset columns only, look at the two adjacent values that define a collection's coordinates
265 void GetCollectionInfo(const NTupleSize_t globalIndex, RClusterIndex *collectionStart, ClusterSize_t *collectionSize)
266 {
267 NTupleSize_t idxStart = 0;
268 NTupleSize_t idxEnd;
269 // Try to avoid jumping back to the previous page and jumping back to the previous cluster
270 if (R__likely(globalIndex > 0)) {
271 if (R__likely(fReadPage.Contains(globalIndex - 1))) {
272 idxStart = *Map<ClusterSize_t>(globalIndex - 1);
273 idxEnd = *Map<ClusterSize_t>(globalIndex);
274 if (R__unlikely(fReadPage.GetClusterInfo().GetIndexOffset() == globalIndex))
275 idxStart = 0;
276 } else {
277 idxEnd = *Map<ClusterSize_t>(globalIndex);
278 auto selfOffset = fReadPage.GetClusterInfo().GetIndexOffset();
279 idxStart = (globalIndex == selfOffset) ? 0 : *Map<ClusterSize_t>(globalIndex - 1);
280 }
281 } else {
282 idxEnd = *Map<ClusterSize_t>(globalIndex);
283 }
284 *collectionSize = idxEnd - idxStart;
285 *collectionStart = RClusterIndex(fReadPage.GetClusterInfo().GetId(), idxStart);
286 }
287
288 void GetCollectionInfo(const RClusterIndex &clusterIndex,
289 RClusterIndex *collectionStart, ClusterSize_t *collectionSize)
290 {
291 auto index = clusterIndex.GetIndex();
292 auto idxStart = (index == 0) ? 0 : *Map<ClusterSize_t>(clusterIndex - 1);
293 auto idxEnd = *Map<ClusterSize_t>(clusterIndex);
294 *collectionSize = idxEnd - idxStart;
295 *collectionStart = RClusterIndex(clusterIndex.GetClusterId(), idxStart);
296 }
297
298 /// Get the currently active cluster id
299 void GetSwitchInfo(NTupleSize_t globalIndex, RClusterIndex *varIndex, std::uint32_t *tag) {
300 auto varSwitch = Map<RColumnSwitch>(globalIndex);
301 *varIndex = RClusterIndex(fReadPage.GetClusterInfo().GetId(), varSwitch->GetIndex());
302 *tag = varSwitch->GetTag();
303 }
304
305 void Flush();
306 void MapPage(const NTupleSize_t index);
307 void MapPage(const RClusterIndex &clusterIndex);
309 RColumnElementBase *GetElement() const { return fElement.get(); }
310 const RColumnModel &GetModel() const { return fModel; }
311 std::uint32_t GetIndex() const { return fIndex; }
316};
317
318} // namespace Detail
319
320} // namespace Experimental
321} // namespace ROOT
322
323#endif
#define R__likely(expr)
Definition: RConfig.hxx:599
#define R__unlikely(expr)
Definition: RConfig.hxx:598
#define R__ASSERT(e)
Definition: TError.h:118
Option_t Option_t TPoint TPoint const char GetTextMagnitude GetFillStyle GetLineColor GetLineWidth GetMarkerStyle GetTextAlign GetTextColor GetTextSize void char Point_t Rectangle_t WindowAttributes_t index
Option_t Option_t TPoint TPoint const char GetTextMagnitude GetFillStyle GetLineColor GetLineWidth GetMarkerStyle GetTextAlign GetTextColor GetTextSize void char Point_t Rectangle_t src
void WriteTo(void *destination, std::size_t count) const
Write one or multiple column elements into destination.
void ReadFrom(void *source, std::size_t count)
Set the column element or an array of elements from the memory location source.
Pairs of C++ type and column type, like float and EColumnType::kReal32.
RPageStorage::ColumnHandle_t GetHandleSource() const
Definition: RColumn.hxx:314
void ReadV(const NTupleSize_t globalIndex, const ClusterSize_t::ValueType count, RColumnElementBase *elemArray)
Definition: RColumn.hxx:177
RColumn & operator=(const RColumn &)=delete
void GetCollectionInfo(const RClusterIndex &clusterIndex, RClusterIndex *collectionStart, ClusterSize_t *collectionSize)
Definition: RColumn.hxx:288
const RColumnModel & GetModel() const
Definition: RColumn.hxx:310
ColumnId_t GetColumnIdSource() const
Definition: RColumn.hxx:312
std::uint32_t GetIndex() const
Definition: RColumn.hxx:311
RPage fReadPage
The currently mapped page for reading.
Definition: RColumn.hxx:72
CppT * MapV(const RClusterIndex &clusterIndex, NTupleSize_t &nItems)
Definition: RColumn.hxx:238
void AppendV(const RColumnElementBase &elemArray, std::size_t count)
Definition: RColumn.hxx:130
RColumnElementBase * GetElement() const
Definition: RColumn.hxx:309
void Append(const RColumnElementBase &element)
Definition: RColumn.hxx:117
CppT * MapV(const NTupleSize_t globalIndex, NTupleSize_t &nItems)
Definition: RColumn.hxx:226
void FlushShadowWritePage()
When the main write page surpasses the 50% fill level, the (full) shadow write page gets flushed.
Definition: RColumn.hxx:92
void Connect(DescriptorId_t fieldId, RPageStorage *pageStorage)
Definition: RColumn.cxx:43
void MapPage(const NTupleSize_t index)
Definition: RColumn.cxx:87
void SwapWritePagesIfFull()
Used in Append() and AppendV() to switch pages when the main page reached the target size The other p...
Definition: RColumn.hxx:82
void GetSwitchInfo(NTupleSize_t globalIndex, RClusterIndex *varIndex, std::uint32_t *tag)
Get the currently active cluster id.
Definition: RColumn.hxx:299
CppT * Map(const NTupleSize_t globalIndex)
Definition: RColumn.hxx:214
std::uint32_t fApproxNElementsPerPage
For writing, the targeted number of elements, given by fApproxNElementsPerPage (in the write options)...
Definition: RColumn.hxx:68
void Read(const NTupleSize_t globalIndex, RColumnElementBase *element)
Definition: RColumn.hxx:158
CppT * Map(const RClusterIndex &clusterIndex)
Definition: RColumn.hxx:220
NTupleSize_t GetGlobalIndex(const RClusterIndex &clusterIndex)
Definition: RColumn.hxx:249
std::unique_ptr< RColumnElementBase > fElement
Used to pack and unpack pages on writing/reading.
Definition: RColumn.hxx:76
int fWritePageIdx
Index of the current write page.
Definition: RColumn.hxx:64
NTupleSize_t GetNElements() const
Definition: RColumn.hxx:308
RColumn(const RColumn &)=delete
std::uint32_t fIndex
Columns belonging to the same field are distinguished by their order.
Definition: RColumn.hxx:52
static std::unique_ptr< RColumn > Create(const RColumnModel &model, std::uint32_t index)
Definition: RColumn.hxx:104
RPageStorage::ColumnHandle_t GetHandleSink() const
Definition: RColumn.hxx:315
RPageSource * GetPageSource() const
Definition: RColumn.hxx:313
NTupleSize_t fNElements
The number of elements written resp. available in the column.
Definition: RColumn.hxx:70
ColumnId_t fColumnIdSource
The column id is used to find matching pages with content when reading.
Definition: RColumn.hxx:74
void ReadV(const RClusterIndex &clusterIndex, const ClusterSize_t::ValueType count, RColumnElementBase *elemArray)
Definition: RColumn.hxx:195
RPageStorage::ColumnHandle_t fHandleSink
Definition: RColumn.hxx:55
RColumn(const RColumnModel &model, std::uint32_t index)
Definition: RColumn.cxx:24
void Read(const RClusterIndex &clusterIndex, RColumnElementBase *element)
Definition: RColumn.hxx:168
RPageStorage::ColumnHandle_t fHandleSource
Definition: RColumn.hxx:56
void GetCollectionInfo(const NTupleSize_t globalIndex, RClusterIndex *collectionStart, ClusterSize_t *collectionSize)
For offset columns only, look at the two adjacent values that define a collection's coordinates.
Definition: RColumn.hxx:265
RClusterIndex GetClusterIndex(NTupleSize_t globalIndex)
Definition: RColumn.hxx:256
RPage fWritePage[2]
A set of open pages into which new elements are being written.
Definition: RColumn.hxx:62
Abstract interface to write data into an ntuple.
void CommitPage(ColumnHandle_t columnHandle, const RPage &page)
Write a page to the storage. The column must have been added before.
Abstract interface to read data from an ntuple.
Common functionality of an ntuple storage for both reading and writing.
A page is a slice of a column that is mapped into memory.
Definition: RPage.hxx:41
ClusterSize_t::ValueType GetClusterRangeLast() const
Definition: RPage.hxx:88
ClusterSize_t::ValueType GetNElements() const
Definition: RPage.hxx:83
bool Contains(NTupleSize_t globalIndex) const
Definition: RPage.hxx:93
void * GrowUnchecked(ClusterSize_t::ValueType nElements)
Called during writing: returns a pointer after the last element and increases the element counter in ...
Definition: RPage.hxx:109
const RClusterInfo & GetClusterInfo() const
Definition: RPage.hxx:91
void Reset(NTupleSize_t rangeFirst)
Forget all currently stored elements (size == 0) and set a new starting index.
Definition: RPage.hxx:120
NTupleSize_t GetGlobalRangeFirst() const
Definition: RPage.hxx:85
NTupleSize_t GetGlobalRangeLast() const
Definition: RPage.hxx:86
ClusterSize_t::ValueType GetClusterRangeFirst() const
Definition: RPage.hxx:87
Addresses a column element or field item relative to a particular cluster, instead of a global NTuple...
DescriptorId_t GetClusterId() const
ClusterSize_t::ValueType GetIndex() const
Holds the static meta-data of an RNTuple column.
std::uint64_t NTupleSize_t
Integer type long enough to hold the maximum number of entries in a column.
Definition: RNTupleUtil.hxx:48
constexpr ColumnId_t kInvalidColumnId
Definition: RNTupleUtil.hxx:97
std::uint64_t DescriptorId_t
Distriniguishes elements of the same type within a descriptor, e.g. different fields.
std::int64_t ColumnId_t
Uniquely identifies a physical column within the scope of the current process, used to tag pages.
Definition: RNTupleUtil.hxx:96
This file contains a specialised ROOT message handler to test for diagnostic in unit tests.
Wrap the 32bit integer in a struct in order to avoid template specialization clash with std::uint32_t...
Definition: RNTupleUtil.hxx:51