Logo ROOT  
Reference Guide
 
Loading...
Searching...
No Matches
RNTupleUtil.hxx
Go to the documentation of this file.
1/// \file ROOT/RNTupleUtil.hxx
2/// \ingroup NTuple
3/// \author Jakob Blomer <jblomer@cern.ch>
4/// \date 2018-10-04
5
6/*************************************************************************
7 * Copyright (C) 1995-2020, Rene Brun and Fons Rademakers. *
8 * All rights reserved. *
9 * *
10 * For the licensing terms see $ROOTSYS/LICENSE. *
11 * For the list of contributors see $ROOTSYS/README/CREDITS. *
12 *************************************************************************/
13
14#ifndef ROOT_RNTupleUtil
15#define ROOT_RNTupleUtil
16
17#include <cstdint>
18#include <string>
19#include <string_view>
20#include <type_traits>
21#include <variant>
22
23#include <ROOT/RError.hxx>
24#include <ROOT/RLogger.hxx>
25
26namespace ROOT {
27
28class RLogChannel;
29namespace Internal {
30/// Log channel for RNTuple diagnostics.
32} // namespace Internal
33
34/// Helper types to present an offset column as array of collection sizes.
35/// See RField<RNTupleCardinality<SizeT>> for details.
36template <typename SizeT>
38 static_assert(std::is_same_v<SizeT, std::uint32_t> || std::is_same_v<SizeT, std::uint64_t>,
39 "RNTupleCardinality is only supported with std::uint32_t or std::uint64_t template parameters");
40
41 using ValueType = SizeT;
42
44 explicit constexpr RNTupleCardinality(ValueType value) : fValue(value) {}
46 {
47 fValue = value;
48 return *this;
49 }
50 operator ValueType() const { return fValue; }
51
53};
54
55// clang-format off
56/**
57\class ROOT::ENTupleColumnType
58\ingroup NTuple
59\brief The available trivial, native content types of a column
60
61More complex types, such as classes, get translated into columns of such simple types by the RField.
62When changed, remember to update
63 - RColumnElement::Generate()
64 - RColumnElement::GetTypeName()
65 - RColumnElement::GetValidBitRange()
66 - RColumnElement template specializations / packing & unpacking
67 - If necessary, endianess handling for the packing + unit test in ntuple_endian
68 - RNTupleSerializer::[Des|S]erializeColumnType
69*/
70// clang-format on
72 kUnknown = 0,
73 // type for root columns of (nested) collections; offsets are relative to the current cluster
76 // 96 bit column that is a pair of a kIndex64 and a 32bit dispatch tag to a column ID;
77 // used to serialize std::variant.
78 kSwitch,
79 kByte,
80 kChar,
81 kBit,
82 kReal64,
83 kReal32,
84 kReal16,
85 kInt64,
86 kUInt64,
87 kInt32,
88 kUInt32,
89 kInt16,
90 kUInt16,
91 kInt8,
92 kUInt8,
105 kMax,
106};
107
108/// The fields in the ntuple model tree can carry different structural information about the type system.
109/// Leaf fields contain just data, collection fields resolve to offset columns, record fields have no
110/// materialization on the primitive column layer.
111// IMPORTANT: if you add members, remember to change the related `operator<<` below.
112enum class ENTupleStructure : std::uint16_t {
113 kInvalid,
114 kLeaf,
116 kRecord,
117 kVariant,
118 kStreamer,
120};
121
122inline std::ostream &operator<<(std::ostream &os, ENTupleStructure structure)
123{
124 static const char *const names[] = {"Invalid", "Leaf", "Collection", "Record", "Variant", "Streamer", "Unknown"};
125 static_assert((std::size_t)ENTupleStructure::kUnknown + 1 == std::size(names));
126
127 if (R__likely(static_cast<std::size_t>(structure) <= std::size(names)))
129 else
130 os << "(invalid)";
131 return os;
132}
133
134/// Integer type long enough to hold the maximum number of entries in a column
135using NTupleSize_t = std::uint64_t;
136constexpr NTupleSize_t kInvalidNTupleIndex = std::uint64_t(-1);
137
138/// Distriniguishes elements of the same type within a descriptor, e.g. different fields
139using DescriptorId_t = std::uint64_t;
140constexpr DescriptorId_t kInvalidDescriptorId = std::uint64_t(-1);
141
142/// Addresses a column element or field item relative to a particular cluster, instead of a global NTupleSize_t index
190
191/// RNTupleLocator payload that is common for object stores using 64bit location information.
192/// This might not contain the full location of the content. In particular, for page locators this information may be
193/// used in conjunction with the cluster and column ID.
195private:
196 std::uint64_t fLocation = 0;
197
198public:
200 explicit RNTupleLocatorObject64(std::uint64_t location) : fLocation(location) {}
201 bool operator==(const RNTupleLocatorObject64 &other) const { return fLocation == other.fLocation; }
202 std::uint64_t GetLocation() const { return fLocation; }
203};
204
205/// Generic information about the physical location of data. Values depend on the concrete storage type. E.g.,
206/// for a local file `fPosition` might be a 64bit file offset. Referenced objects on storage can be compressed
207/// and therefore we need to store their actual size.
209public:
210 /// Values for the _Type_ field in non-disk locators. Serializable types must have the MSb == 0; see
211 /// `doc/BinaryFormatSpecification.md` for details
212 enum ELocatorType : std::uint8_t {
213 // The kTypeFile locator may translate to an on-disk standard locator (type 0x00) or a large locator (type 0x01),
214 // if the size of the referenced data block is >2GB
215 kTypeFile = 0x00,
216 kTypeDAOS = 0x02,
217
221 };
222
223private:
224 std::uint64_t fNBytesOnStorage = 0;
225 /// Simple on-disk locators consisting of a 64-bit offset use variant type `uint64_t`; extended locators have
226 /// `fPosition.index()` > 0
227 std::variant<std::uint64_t, RNTupleLocatorObject64> fPosition{};
228 /// For non-disk locators, the value for the _Type_ field. This makes it possible to have different type values even
229 /// if the payload structure is identical.
231 /// Reserved for use by concrete storage backends
232 std::uint8_t fReserved = 0;
233
234public:
235 RNTupleLocator() = default;
236
237 bool operator==(const RNTupleLocator &other) const
238 {
239 return fPosition == other.fPosition && fNBytesOnStorage == other.fNBytesOnStorage && fType == other.fType;
240 }
241
242 std::uint64_t GetNBytesOnStorage() const { return fNBytesOnStorage; }
243 ELocatorType GetType() const { return fType; }
244 std::uint8_t GetReserved() const { return fReserved; }
245
248 void SetReserved(std::uint8_t reserved) { fReserved = reserved; }
249
250 template <typename T>
251 T GetPosition() const
252 {
253 return std::get<T>(fPosition);
254 }
255
256 template <typename T>
257 void SetPosition(T position)
258 {
259 fPosition = position;
260 }
261};
262
263namespace Internal {
264
265/// The in-memory representation of a 32bit or 64bit on-disk index column. Wraps the integer in a
266/// named type so that templates can distinguish between integer data columns and index columns.
268public:
269 using ValueType = std::uint64_t;
270
271private:
273
274public:
275 RColumnIndex() = default;
276 explicit constexpr RColumnIndex(ValueType value) : fValue(value) {}
278 {
279 fValue = value;
280 return *this;
281 }
283 {
284 fValue += value;
285 return *this;
286 }
288 {
289 auto result = *this;
290 fValue++;
291 return result;
292 }
293 operator ValueType() const { return fValue; }
294};
295
296/// Holds the index and the tag of a kSwitch column
298private:
300 std::uint32_t fTag = 0;
301
302public:
303 RColumnSwitch() = default;
304 RColumnSwitch(ROOT::NTupleSize_t index, std::uint32_t tag) : fIndex(index), fTag(tag) {}
306 std::uint32_t GetTag() const { return fTag; }
307};
308
309} // namespace Internal
310
311namespace Internal {
312
313template <typename T>
315{
316 const static std::shared_ptr<T> fgRawPtrCtrlBlock;
317 return std::shared_ptr<T>(fgRawPtrCtrlBlock, rawPtr);
318}
319
320/// Make an array of default-initialized elements. This is useful for buffers that do not need to be initialized.
321///
322/// With C++20, this function can be replaced by std::make_unique_for_overwrite<T[]>.
323template <typename T>
324std::unique_ptr<T[]> MakeUninitArray(std::size_t size)
325{
326 // DO NOT use std::make_unique<T[]>, the array elements are value-initialized!
327 return std::unique_ptr<T[]>(new T[size]);
328}
329
330inline constexpr ENTupleColumnType kTestFutureColumnType =
331 static_cast<ENTupleColumnType>(std::numeric_limits<std::underlying_type_t<ENTupleColumnType>>::max() - 1);
332
334 static_cast<ROOT::ENTupleStructure>(std::numeric_limits<std::underlying_type_t<ROOT::ENTupleStructure>>::max() - 1);
335
338
339/// Check whether a given string is a valid name according to the RNTuple specification
340RResult<void> EnsureValidNameForRNTuple(std::string_view name, std::string_view where);
341
342} // namespace Internal
343} // namespace ROOT
344
345#endif
#define R__likely(expr)
Definition RConfig.hxx:587
size_t size(const MatrixT &matrix)
retrieve the size of a square matrix
ROOT::Detail::TRangeCast< T, true > TRangeDynCast
TRangeDynCast is an adapter class that allows the typed iteration through a TCollection.
Option_t Option_t TPoint TPoint const char GetTextMagnitude GetFillStyle GetLineColor GetLineWidth GetMarkerStyle GetTextAlign GetTextColor GetTextSize void char Point_t Rectangle_t WindowAttributes_t Float_t r
Option_t Option_t TPoint TPoint const char GetTextMagnitude GetFillStyle GetLineColor GetLineWidth GetMarkerStyle GetTextAlign GetTextColor GetTextSize void char Point_t Rectangle_t WindowAttributes_t Float_t Float_t Float_t Int_t Int_t UInt_t UInt_t Rectangle_t result
Option_t Option_t TPoint TPoint const char GetTextMagnitude GetFillStyle GetLineColor GetLineWidth GetMarkerStyle GetTextAlign GetTextColor GetTextSize void char Point_t Rectangle_t WindowAttributes_t index
Option_t Option_t TPoint TPoint const char GetTextMagnitude GetFillStyle GetLineColor GetLineWidth GetMarkerStyle GetTextAlign GetTextColor GetTextSize void value
Option_t Option_t TPoint TPoint const char GetTextMagnitude GetFillStyle GetLineColor GetLineWidth GetMarkerStyle GetTextAlign GetTextColor GetTextSize void char Point_t Rectangle_t WindowAttributes_t Float_t Float_t Float_t Int_t Int_t UInt_t UInt_t Rectangle_t Int_t Int_t Window_t TString Int_t GCValues_t GetPrimarySelectionOwner GetDisplay GetScreen GetColormap GetNativeEvent const char const char dpyName wid window const char font_name cursor keysym reg const char only_if_exist regb h Point_t winding char text const char depth char const char Int_t count const char ColorStruct_t color const char Pixmap_t Pixmap_t PictureAttributes_t attr const char char ret_data h unsigned char height h Atom_t Int_t ULong_t ULong_t unsigned char prop_list Atom_t Atom_t Atom_t Time_t type
char name[80]
Definition TGX11.cxx:110
The in-memory representation of a 32bit or 64bit on-disk index column.
constexpr RColumnIndex(ValueType value)
RColumnIndex & operator=(const ValueType value)
RColumnIndex & operator+=(const ValueType value)
Holds the index and the tag of a kSwitch column.
std::uint32_t GetTag() const
ROOT::NTupleSize_t GetIndex() const
RColumnSwitch(ROOT::NTupleSize_t index, std::uint32_t tag)
A log configuration for a channel, e.g.
Definition RLogger.hxx:98
Addresses a column element or field item relative to a particular cluster, instead of a global NTuple...
RNTupleLocalIndex operator-(ROOT::NTupleSize_t off) const
ROOT::NTupleSize_t fIndexInCluster
RNTupleLocalIndex operator++(int)
bool operator==(RNTupleLocalIndex other) const
RNTupleLocalIndex operator+(ROOT::NTupleSize_t off) const
ROOT::NTupleSize_t GetIndexInCluster() const
constexpr RNTupleLocalIndex(ROOT::DescriptorId_t clusterId, ROOT::NTupleSize_t indexInCluster)
RNTupleLocalIndex & operator++()
RNTupleLocalIndex(const RNTupleLocalIndex &other)=default
RNTupleLocalIndex & operator=(const RNTupleLocalIndex &other)=default
bool operator!=(RNTupleLocalIndex other) const
ROOT::DescriptorId_t fClusterId
ROOT::DescriptorId_t GetClusterId() const
RNTupleLocator payload that is common for object stores using 64bit location information.
bool operator==(const RNTupleLocatorObject64 &other) const
RNTupleLocatorObject64(std::uint64_t location)
std::uint64_t GetLocation() const
Generic information about the physical location of data.
std::uint64_t GetNBytesOnStorage() const
bool operator==(const RNTupleLocator &other) const
RNTupleLocator()=default
ELocatorType
Values for the Type field in non-disk locators.
std::variant< std::uint64_t, RNTupleLocatorObject64 > fPosition
Simple on-disk locators consisting of a 64-bit offset use variant type uint64_t; extended locators ha...
std::uint8_t fReserved
Reserved for use by concrete storage backends.
std::uint8_t GetReserved() const
void SetPosition(T position)
void SetType(ELocatorType type)
ELocatorType GetType() const
void SetReserved(std::uint8_t reserved)
void SetNBytesOnStorage(std::uint64_t nBytesOnStorage)
ELocatorType fType
For non-disk locators, the value for the Type field.
std::uint64_t fNBytesOnStorage
constexpr ROOT::ENTupleStructure kTestFutureFieldStructure
RResult< void > EnsureValidNameForRNTuple(std::string_view name, std::string_view where)
Check whether a given string is a valid name according to the RNTuple specification.
ROOT::RLogChannel & NTupleLog()
Log channel for RNTuple diagnostics.
std::unique_ptr< T[]> MakeUninitArray(std::size_t size)
Make an array of default-initialized elements.
auto MakeAliasedSharedPtr(T *rawPtr)
constexpr RNTupleLocator::ELocatorType kTestLocatorType
std::ostream & operator<<(std::ostream &os, const RConcurrentHashColl::HashValue &h)
tbb::task_arena is an alias of tbb::interface7::task_arena, which doesn't allow to forward declare tb...
std::uint64_t DescriptorId_t
Distriniguishes elements of the same type within a descriptor, e.g. different fields.
constexpr NTupleSize_t kInvalidNTupleIndex
std::uint64_t NTupleSize_t
Integer type long enough to hold the maximum number of entries in a column.
constexpr DescriptorId_t kInvalidDescriptorId
ENTupleStructure
The fields in the ntuple model tree can carry different structural information about the type system.
ENTupleColumnType
Helper types to present an offset column as array of collection sizes.
RNTupleCardinality & operator=(const ValueType value)
constexpr RNTupleCardinality(ValueType value)