Logo ROOT  
Reference Guide
Loading...
Searching...
No Matches
RNTupleTypes.hxx
Go to the documentation of this file.
1/// \file ROOT/RNTupleTypes.hxx
2/// \author Jakob Blomer <jblomer@cern.ch>
3/// \date 2018-10-04
4
5/*************************************************************************
6 * Copyright (C) 1995-2020, Rene Brun and Fons Rademakers. *
7 * All rights reserved. *
8 * *
9 * For the licensing terms see $ROOTSYS/LICENSE. *
10 * For the list of contributors see $ROOTSYS/README/CREDITS. *
11 *************************************************************************/
12
13#ifndef ROOT_RNTupleTypes
14#define ROOT_RNTupleTypes
15
16#include <ROOT/RConfig.hxx>
17
18#include <cstddef>
19#include <cstdint>
20#include <limits>
21#include <ostream>
22#include <type_traits>
23
24namespace ROOT {
25
26/// Helper types to present an offset column as array of collection sizes.
27/// See RField<RNTupleCardinality<SizeT>> for details.
28template <typename SizeT>
30 static_assert(std::is_same_v<SizeT, std::uint32_t> || std::is_same_v<SizeT, std::uint64_t>,
31 "RNTupleCardinality is only supported with std::uint32_t or std::uint64_t template parameters");
32
33 using ValueType = SizeT;
34
36 explicit constexpr RNTupleCardinality(ValueType value) : fValue(value) {}
38 {
39 fValue = value;
40 return *this;
41 }
42 operator ValueType() const { return fValue; }
43
45};
46
47// clang-format off
48/**
49\class ROOT::ENTupleColumnType
50\ingroup NTuple
51\brief The available trivial, native content types of a column
52
53More complex types, such as classes, get translated into columns of such simple types by the RField.
54When changed, remember to update
55 - RColumnElement::Generate()
56 - RColumnElement::GetTypeName()
57 - RColumnElement::GetValidBitRange()
58 - RColumnElement template specializations / packing & unpacking
59 - If necessary, endianess handling for the packing + unit test in ntuple_endian
60 - RNTupleSerializer::[Des|S]erializeColumnType
61*/
62// clang-format on
65 // type for root columns of (nested) collections; offsets are relative to the current cluster
68 // 96 bit column that is a pair of a kIndex64 and a 32bit dispatch tag to a column ID;
69 // used to serialize std::variant.
98};
99
100/// The fields in the RNTuple data model tree can carry different structural information about the type system.
101/// Collection fields have an offset column and subfields with arbitrary cardinality, record fields have no
102/// materialization on the primitive column layer and an arbitrary number of subfields. Plain fields are either
103/// leafs (e.g., `float`) or "wrapper fields" with exactly one child that has the same cardinality
104/// (number of elements in the data set modulo field repetitions) as the parent (e.g., std::atomic<T>).
105// IMPORTANT: if you add members, remember to change the related `operator<<` below.
115
116inline std::ostream &operator<<(std::ostream &os, ENTupleStructure structure)
117{
118 static const char *const names[] = {"Invalid", "Plain", "Collection", "Record", "Variant", "Streamer", "Unknown"};
119 static_assert((std::size_t)ENTupleStructure::kUnknown + 1 == std::size(names));
120
121 if (R__likely(static_cast<std::size_t>(structure) <= std::size(names)))
122 os << names[static_cast<std::uint16_t>(structure)];
123 else
124 os << "(invalid)";
125 return os;
126}
127
128/// Integer type long enough to hold the maximum number of entries in a column
129using NTupleSize_t = std::uint64_t;
130constexpr NTupleSize_t kInvalidNTupleIndex = std::uint64_t(-1);
131
132/// Distriniguishes elements of the same type within a descriptor, e.g. different fields
133using DescriptorId_t = std::uint64_t;
134constexpr DescriptorId_t kInvalidDescriptorId = std::uint64_t(-1);
135
136/// Addresses a column element or field item relative to a particular cluster, instead of a global NTupleSize_t index
138private:
141
142public:
143 RNTupleLocalIndex() = default;
144 RNTupleLocalIndex(const RNTupleLocalIndex &other) = default;
146 constexpr RNTupleLocalIndex(ROOT::DescriptorId_t clusterId, ROOT::NTupleSize_t indexInCluster)
147 : fClusterId(clusterId), fIndexInCluster(indexInCluster)
148 {
149 }
150
155
160
162 {
163 return RNTupleLocalIndex(fClusterId, fIndexInCluster * repetitionFactor);
164 }
165
166 RNTupleLocalIndex operator++(int) /* postfix */
167 {
168 auto r = *this;
170 return r;
171 }
172
174 {
176 return *this;
177 }
178
180 {
181 return fClusterId == other.fClusterId && fIndexInCluster == other.fIndexInCluster;
182 }
183
184 bool operator!=(RNTupleLocalIndex other) const { return !(*this == other); }
185
188};
189
190/// RNTupleLocator payload that is common for object stores using 64bit location information.
191/// This might not contain the full location of the content. In particular, for page locators this information may be
192/// used in conjunction with the cluster and column ID.
194private:
195 std::uint64_t fLocation = 0;
196
197public:
199 explicit RNTupleLocatorObject64(std::uint64_t location) : fLocation(location) {}
200 bool operator==(const RNTupleLocatorObject64 &other) const { return fLocation == other.fLocation; }
201 std::uint64_t GetLocation() const { return fLocation; }
202};
203
204// Workaround missing return type overloading
205class RNTupleLocator;
206namespace Internal {
207template <typename T>
209
210template <>
211struct RNTupleLocatorHelper<std::uint64_t> {
212 static std::uint64_t Get(const RNTupleLocator &loc);
213};
214
215template <>
219} // namespace Internal
220
221/// Generic information about the physical location of data. Values depend on the concrete storage type. E.g.,
222/// for a local file `fPosition` is a 64bit file offset. Referenced objects on storage can be compressed
223/// and therefore we need to store their actual size.
224/// Note that we use a representation optimized for memory consumption that slightly differs from the on-disk
225/// representation.
227 friend struct Internal::RNTupleLocatorHelper<std::uint64_t>;
228 friend struct Internal::RNTupleLocatorHelper<RNTupleLocatorObject64>;
229
230public:
231 /// Values for the _Type_ field in non-disk locators. Serializable types must have the MSb == 0; see
232 /// `doc/BinaryFormatSpecification.md` for details
233 enum ELocatorType : std::uint8_t {
234 // The kTypeFile locator may translate to an on-disk standard locator (type 0x00) or a large locator (type 0x01),
235 // if the size of the referenced data block is >2GB
236 kTypeFile = 0x00,
237 kTypeDAOS = 0x02,
238
242 };
243
244private:
245 /// The 4 most significant bits of fFlagsAndNBytes carry the locator type (3 bits)
246 /// plus one bit for a reserved bit of an extended locator.
247 static constexpr std::uint64_t kMaskFlags = 0x0FULL << 60;
248 static constexpr std::uint64_t kMaskType = 0x07ULL << 61;
249 static constexpr std::uint64_t kMaskReservedBit = 1ull << 60;
250
251 /// To save memory, we use the most significant bits to store the locator type (file, DAOS, zero page,
252 /// unkown, kTestLocatorType) as well as one "reserved bit" that can be used in future locators.
253 /// Consequently, we can only store sizes up to 60 bits (1 EB), which in practice won't be an issue.
254 std::uint64_t fFlagsAndNBytes = 0;
255 /// Simple on-disk locators consisting of a 64-bit offset use variant type `uint64_t`;
256 /// Object store locators use RNTupleLocatorObject64 but can still use the same 64 bit int for information storage.
257 std::uint64_t fPosition = 0;
258
259public:
260 RNTupleLocator() = default;
261
262 bool operator==(const RNTupleLocator &other) const
263 {
264 return fPosition == other.fPosition && fFlagsAndNBytes == other.fFlagsAndNBytes;
265 }
266
267 std::uint64_t GetNBytesOnStorage() const { return fFlagsAndNBytes & ~kMaskFlags; }
268 /// For non-disk locators, the value for the _Type_ field. This makes it possible to have different type values even
269 /// if the payload structure is identical.
270 ELocatorType GetType() const;
271 /// We currently only support one of the 8 available reserved bits (none are used so far).
272 std::uint8_t GetReserved() const { return (fFlagsAndNBytes & kMaskReservedBit) > 0; }
273
274 void SetNBytesOnStorage(std::uint64_t nBytesOnStorage);
276 void SetReserved(std::uint8_t reserved);
277
278 /// Note that for GetPosition() / SetPosition(), the locator type must correspond (kTypeFile, kTypeDAOS).
279
280 template <typename T>
281 T GetPosition() const
282 {
284 }
285
286 void SetPosition(std::uint64_t position);
287 void SetPosition(RNTupleLocatorObject64 position);
288};
289
290namespace Internal {
291
294 /// Uncompressed length of the anchor, including the checksum.
295 std::uint32_t fLength = 0;
296};
297
298/// The in-memory representation of a 32bit or 64bit on-disk index column. Wraps the integer in a
299/// named type so that templates can distinguish between integer data columns and index columns.
301public:
302 using ValueType = std::uint64_t;
303
304private:
306
307public:
308 RColumnIndex() = default;
309 explicit constexpr RColumnIndex(ValueType value) : fValue(value) {}
311 {
312 fValue = value;
313 return *this;
314 }
316 {
317 fValue += value;
318 return *this;
319 }
321 {
322 auto result = *this;
323 fValue++;
324 return result;
325 }
326 operator ValueType() const { return fValue; }
327};
328
329/// Holds the index and the tag of a kSwitch column
331private:
333 std::uint32_t fTag = 0;
334
335public:
336 RColumnSwitch() = default;
337 RColumnSwitch(ROOT::NTupleSize_t index, std::uint32_t tag) : fIndex(index), fTag(tag) {}
339 std::uint32_t GetTag() const { return fTag; }
340};
341
343 static_cast<ENTupleColumnType>(std::numeric_limits<std::underlying_type_t<ENTupleColumnType>>::max() - 1);
344
346 static_cast<ROOT::ENTupleStructure>(std::numeric_limits<std::underlying_type_t<ROOT::ENTupleStructure>>::max() - 1);
347
350
351} // namespace Internal
352} // namespace ROOT
353
354#endif
ROOT::R::TRInterface & r
Definition Object.C:4
#define R__likely(expr)
Definition RConfig.hxx:587
@ kUnknown
Definition TStructNode.h:19
@ kCollection
Definition TStructNode.h:21
@ kInvalid
Definition TSystem.h:87
RColumnIndex()=default
RColumnSwitch()=default
constexpr RColumnIndex(ValueType value)
RColumnIndex & operator=(const ValueType value)
RColumnIndex & operator+=(const ValueType value)
std::uint32_t GetTag() const
ROOT::NTupleSize_t GetIndex() const
RColumnSwitch(ROOT::NTupleSize_t index, std::uint32_t tag)
RNTupleLocalIndex operator*(ROOT::NTupleSize_t repetitionFactor) const
RNTupleLocalIndex operator-(ROOT::NTupleSize_t off) const
ROOT::NTupleSize_t fIndexInCluster
RNTupleLocalIndex operator++(int)
bool operator==(RNTupleLocalIndex other) const
RNTupleLocalIndex operator+(ROOT::NTupleSize_t off) const
ROOT::NTupleSize_t GetIndexInCluster() const
constexpr RNTupleLocalIndex(ROOT::DescriptorId_t clusterId, ROOT::NTupleSize_t indexInCluster)
RNTupleLocalIndex & operator++()
RNTupleLocalIndex(const RNTupleLocalIndex &other)=default
RNTupleLocalIndex & operator=(const RNTupleLocalIndex &other)=default
bool operator!=(RNTupleLocalIndex other) const
ROOT::DescriptorId_t fClusterId
ROOT::DescriptorId_t GetClusterId() const
RNTupleLocator payload that is common for object stores using 64bit location information.
bool operator==(const RNTupleLocatorObject64 &other) const
RNTupleLocatorObject64(std::uint64_t location)
std::uint64_t GetLocation() const
Generic information about the physical location of data.
std::uint64_t GetNBytesOnStorage() const
bool operator==(const RNTupleLocator &other) const
std::uint64_t fPosition
Simple on-disk locators consisting of a 64-bit offset use variant type uint64_t; Object store locator...
RNTupleLocator()=default
ELocatorType
Values for the Type field in non-disk locators.
std::uint64_t fFlagsAndNBytes
To save memory, we use the most significant bits to store the locator type (file, DAOS,...
static constexpr std::uint64_t kMaskFlags
The 4 most significant bits of fFlagsAndNBytes carry the locator type (3 bits) plus one bit for a res...
ELocatorType GetType() const
For non-disk locators, the value for the Type field.
std::uint8_t GetReserved() const
We currently only support one of the 8 available reserved bits (none are used so far).
static constexpr std::uint64_t kMaskReservedBit
T GetPosition() const
Note that for GetPosition() / SetPosition(), the locator type must correspond (kTypeFile,...
void SetType(ELocatorType type)
void SetPosition(std::uint64_t position)
static constexpr std::uint64_t kMaskType
void SetReserved(std::uint8_t reserved)
See GetReserved(): we ignore the reserved flag since we don't use it anywhere currently.
void SetNBytesOnStorage(std::uint64_t nBytesOnStorage)
constexpr ROOT::ENTupleStructure kTestFutureFieldStructure
constexpr ENTupleColumnType kTestFutureColumnType
constexpr RNTupleLocator::ELocatorType kTestLocatorType
std::ostream & operator<<(std::ostream &os, const RConcurrentHashColl::HashValue &h)
std::uint64_t DescriptorId_t
Distriniguishes elements of the same type within a descriptor, e.g. different fields.
constexpr NTupleSize_t kInvalidNTupleIndex
std::uint64_t NTupleSize_t
Integer type long enough to hold the maximum number of entries in a column.
constexpr DescriptorId_t kInvalidDescriptorId
ENTupleStructure
The fields in the RNTuple data model tree can carry different structural information about the type s...
ENTupleColumnType
static RNTupleLocatorObject64 Get(const RNTupleLocator &loc)
RNTupleCardinality & operator=(const ValueType value)
constexpr RNTupleCardinality(ValueType value)