Logo ROOT  
Reference Guide
 
Loading...
Searching...
No Matches
RNTupleSerialize.hxx
Go to the documentation of this file.
1/// \file ROOT/RNTupleSerialize.hxx
2/// \ingroup NTuple ROOT7
3/// \author Jakob Blomer <jblomer@cern.ch>
4/// \date 2021-08-02
5/// \warning This is part of the ROOT 7 prototype! It will change without notice. It might trigger earthquakes. Feedback
6/// is welcome!
7
8/*************************************************************************
9 * Copyright (C) 1995-2021, Rene Brun and Fons Rademakers. *
10 * All rights reserved. *
11 * *
12 * For the licensing terms see $ROOTSYS/LICENSE. *
13 * For the list of contributors see $ROOTSYS/README/CREDITS. *
14 *************************************************************************/
15
16#ifndef ROOT7_RNTupleSerialize
17#define ROOT7_RNTupleSerialize
18
19#include <ROOT/RError.hxx>
20#include <ROOT/RNTupleUtil.hxx>
21#include <ROOT/RSpan.hxx>
22
23#include <cstdint>
24#include <map>
25#include <string>
26#include <vector>
27
28namespace ROOT {
29namespace Experimental {
30
31enum class EColumnType;
32class RClusterDescriptor;
33class RClusterDescriptorBuilder;
34class RNTupleDescriptor;
35class RNTupleDescriptorBuilder;
36
37
38namespace Internal {
39
40// clang-format off
41/**
42\class ROOT::Experimental::Internal::RNTupleSerializer
43\ingroup NTuple
44\brief A helper class for serializing and deserialization of the RNTuple binary format
45
46All serialization and deserialization routines return the number of bytes processed (written or read).
47
48The serialization routines can be called with a nullptr buffer, in which case only the size required to perform
49a serialization is returned. Deserialization routines must be called with a buffer that is sufficiently large.
50
51Deserialization errors throw exceptions. Only when indicated or when passed as a parameter is the buffer size checked.
52*/
53// clang-format on
55public:
56 /// In order to handle changes to the serialization routine in future ntuple versions
57 static constexpr std::uint16_t kEnvelopeCurrentVersion = 1;
58 static constexpr std::uint16_t kEnvelopeMinVersion = 1;
59 static constexpr std::uint32_t kReleaseCandidateTag = 1;
60
61 static constexpr std::uint16_t kFlagRepetitiveField = 0x01;
62 static constexpr std::uint16_t kFlagAliasField = 0x02;
63
64 static constexpr std::uint32_t kFlagSortAscColumn = 0x01;
65 static constexpr std::uint32_t kFlagSortDesColumn = 0x02;
66 static constexpr std::uint32_t kFlagNonNegativeColumn = 0x04;
67
68 static constexpr DescriptorId_t kZeroFieldId = std::uint64_t(-2);
69
71 std::uint32_t fUnzippedSize = 0;
73 };
74
76 std::uint64_t fFirstEntry = 0;
77 std::uint64_t fNEntries = 0;
78 /// -1 for "all columns"
79 std::int32_t fColumnGroupID = -1;
80 };
81
83 std::uint32_t fNClusters = 0;
85 };
86
87 /// The serialization context is used for the piecewise serialization of a descriptor. During header serialization,
88 /// the mapping of in-memory field and column IDs to physical IDs is built so that it can be used for the
89 /// footer serialization in a second step.
90 class RContext {
91 private:
92 std::uint32_t fHeaderSize = 0;
93 std::uint32_t fHeaderCrc32 = 0;
94 std::vector<RClusterGroup> fClusterGroups;
95 std::map<DescriptorId_t, DescriptorId_t> fMem2PhysFieldIDs;
96 std::map<DescriptorId_t, DescriptorId_t> fMem2PhysColumnIDs;
97 std::map<DescriptorId_t, DescriptorId_t> fMem2PhysClusterIDs;
98 std::vector<DescriptorId_t> fPhys2MemFieldIDs;
99 std::vector<DescriptorId_t> fPhys2MemColumnIDs;
100 std::vector<DescriptorId_t> fPhys2MemClusterIDs;
101 public:
102 void SetHeaderSize(std::uint32_t size) { fHeaderSize = size; }
103 std::uint32_t GetHeaderSize() const { return fHeaderSize; }
104 void SetHeaderCRC32(std::uint32_t crc32) { fHeaderCrc32 = crc32; }
105 std::uint32_t GetHeaderCRC32() const { return fHeaderCrc32; }
106 void AddClusterGroup(std::uint32_t nClusters, const REnvelopeLink &pageListEnvelope) {
107 fClusterGroups.push_back({nClusters, pageListEnvelope});
108 }
109 const std::vector<RClusterGroup> &GetClusterGroups() const {
110 return fClusterGroups;
111 }
113 auto physId = fPhys2MemFieldIDs.size();
114 fMem2PhysFieldIDs[memId] = physId;
115 fPhys2MemFieldIDs.push_back(memId);
116 return physId;
117 }
119 auto physId = fPhys2MemColumnIDs.size();
120 fMem2PhysColumnIDs[memId] = physId;
121 fPhys2MemColumnIDs.push_back(memId);
122 return physId;
123 }
125 auto physId = fPhys2MemClusterIDs.size();
126 fMem2PhysClusterIDs[memId] = physId;
127 fPhys2MemClusterIDs.push_back(memId);
128 return physId;
129 }
136 };
137
138 /// Writes a CRC32 checksum of the byte range given by data and length.
139 static std::uint32_t SerializeCRC32(const unsigned char *data, std::uint32_t length,
140 std::uint32_t &crc32, void *buffer);
141 /// Expects a CRC32 checksum in the 4 bytes following data + length and verifies it.
142 static RResult<void> VerifyCRC32(const unsigned char *data, std::uint32_t length, std::uint32_t &crc32);
143 static RResult<void> VerifyCRC32(const unsigned char *data, std::uint32_t length);
144
145 static std::uint32_t SerializeInt16(std::int16_t val, void *buffer);
146 static std::uint32_t DeserializeInt16(const void *buffer, std::int16_t &val);
147 static std::uint32_t SerializeUInt16(std::uint16_t val, void *buffer);
148 static std::uint32_t DeserializeUInt16(const void *buffer, std::uint16_t &val);
149
150 static std::uint32_t SerializeInt32(std::int32_t val, void *buffer);
151 static std::uint32_t DeserializeInt32(const void *buffer, std::int32_t &val);
152 static std::uint32_t SerializeUInt32(std::uint32_t val, void *buffer);
153 static std::uint32_t DeserializeUInt32(const void *buffer, std::uint32_t &val);
154
155 static std::uint32_t SerializeInt64(std::int64_t val, void *buffer);
156 static std::uint32_t DeserializeInt64(const void *buffer, std::int64_t &val);
157 static std::uint32_t SerializeUInt64(std::uint64_t val, void *buffer);
158 static std::uint32_t DeserializeUInt64(const void *buffer, std::uint64_t &val);
159
160 static std::uint32_t SerializeString(const std::string &val, void *buffer);
161 static RResult<std::uint32_t> DeserializeString(const void *buffer, std::uint32_t bufSize, std::string &val);
162
163 /// While we could just interpret the enums as ints, we make the translation explicit
164 /// in order to avoid accidentally changing the on-disk numbers when adjusting the enum classes.
165 static std::uint16_t SerializeFieldStructure(ROOT::Experimental::ENTupleStructure structure, void *buffer);
166 static std::uint16_t SerializeColumnType(ROOT::Experimental::EColumnType type, void *buffer);
169
170 static std::uint32_t SerializeEnvelopePreamble(void *buffer);
171 static std::uint32_t SerializeEnvelopePostscript(const unsigned char *envelope, std::uint32_t size, void *buffer);
172 static std::uint32_t SerializeEnvelopePostscript(const unsigned char *envelope, std::uint32_t size,
173 std::uint32_t &crc32, void *buffer);
174 // The bufSize must include the 4 bytes for the final CRC32 checksum.
175 static RResult<std::uint32_t> DeserializeEnvelope(const void *buffer, std::uint32_t bufSize);
176 static RResult<std::uint32_t> DeserializeEnvelope(const void *buffer, std::uint32_t bufSize, std::uint32_t &crc32);
177
178 static std::uint32_t SerializeRecordFramePreamble(void *buffer);
179 static std::uint32_t SerializeListFramePreamble(std::uint32_t nitems, void *buffer);
180 static std::uint32_t SerializeFramePostscript(void *frame, std::int32_t size);
181 static RResult<std::uint32_t> DeserializeFrameHeader(const void *buffer, std::uint32_t bufSize,
182 std::uint32_t &frameSize, std::uint32_t &nitems);
183 static RResult<std::uint32_t> DeserializeFrameHeader(const void *buffer, std::uint32_t bufSize,
184 std::uint32_t &frameSize);
185
186 // An empty flags vector will be serialized as a single, zero feature flag
187 // The most significant bit in every flag is reserved and must _not_ be set
188 static std::uint32_t SerializeFeatureFlags(const std::vector<std::int64_t> &flags, void *buffer);
189 static RResult<std::uint32_t> DeserializeFeatureFlags(const void *buffer, std::uint32_t bufSize,
190 std::vector<std::int64_t> &flags);
191
192 static std::uint32_t SerializeLocator(const RNTupleLocator &locator, void *buffer);
193 static std::uint32_t SerializeEnvelopeLink(const REnvelopeLink &envelopeLink, void *buffer);
194 static RResult<std::uint32_t> DeserializeLocator(const void *buffer, std::uint32_t bufSize, RNTupleLocator &locator);
195 static RResult<std::uint32_t> DeserializeEnvelopeLink(const void *buffer, std::uint32_t bufSize,
196 REnvelopeLink &envelopeLink);
197
198 static std::uint32_t SerializeClusterSummary(const RClusterSummary &clusterSummary, void *buffer);
199 static std::uint32_t SerializeClusterGroup(const RClusterGroup &clusterGroup, void *buffer);
200 static RResult<std::uint32_t> DeserializeClusterSummary(const void *buffer, std::uint32_t bufSize,
201 RClusterSummary &clusterSummary);
202 static RResult<std::uint32_t> DeserializeClusterGroup(const void *buffer, std::uint32_t bufSize,
203 RClusterGroup &clusterGroup);
204
205 static RContext SerializeHeaderV1(void *buffer, const RNTupleDescriptor &desc);
206 static std::uint32_t SerializePageListV1(void *buffer,
207 const RNTupleDescriptor &desc,
208 std::span<DescriptorId_t> physClusterIDs,
209 const RContext &context);
210 static std::uint32_t SerializeFooterV1(void *buffer, const RNTupleDescriptor &desc, const RContext &context);
211
212 static RResult<void> DeserializeHeaderV1(const void *buffer,
213 std::uint32_t bufSize,
214 RNTupleDescriptorBuilder &descBuilder);
215 static RResult<void> DeserializeFooterV1(const void *buffer,
216 std::uint32_t bufSize,
217 RNTupleDescriptorBuilder &descBuilder);
218 static RResult<void> DeserializePageListV1(const void *buffer,
219 std::uint32_t bufSize,
220 std::vector<RClusterDescriptorBuilder> &clusters);
221}; // class RNTupleSerializer
222
223} // namespace Internal
224} // namespace Experimental
225} // namespace ROOT
226
227#endif // ROOT7_RNTupleSerialize
size_t size(const MatrixT &matrix)
retrieve the size of a square matrix
int type
Definition TGX11.cxx:121
The available trivial, native content types of a column.
The serialization context is used for the piecewise serialization of a descriptor.
DescriptorId_t GetPhysClusterId(DescriptorId_t memId) const
std::map< DescriptorId_t, DescriptorId_t > fMem2PhysColumnIDs
void AddClusterGroup(std::uint32_t nClusters, const REnvelopeLink &pageListEnvelope)
DescriptorId_t GetMemClusterId(DescriptorId_t physId) const
std::map< DescriptorId_t, DescriptorId_t > fMem2PhysClusterIDs
std::map< DescriptorId_t, DescriptorId_t > fMem2PhysFieldIDs
const std::vector< RClusterGroup > & GetClusterGroups() const
DescriptorId_t GetPhysColumnId(DescriptorId_t memId) const
DescriptorId_t GetMemFieldId(DescriptorId_t physId) const
DescriptorId_t GetPhysFieldId(DescriptorId_t memId) const
DescriptorId_t GetMemColumnId(DescriptorId_t physId) const
A helper class for serializing and deserialization of the RNTuple binary format.
static RResult< std::uint32_t > DeserializeString(const void *buffer, std::uint32_t bufSize, std::string &val)
static std::uint32_t SerializeFeatureFlags(const std::vector< std::int64_t > &flags, void *buffer)
static constexpr std::uint32_t kReleaseCandidateTag
static std::uint32_t SerializePageListV1(void *buffer, const RNTupleDescriptor &desc, std::span< DescriptorId_t > physClusterIDs, const RContext &context)
static std::uint32_t SerializeListFramePreamble(std::uint32_t nitems, void *buffer)
static RResult< std::uint32_t > DeserializeLocator(const void *buffer, std::uint32_t bufSize, RNTupleLocator &locator)
static constexpr std::uint16_t kFlagRepetitiveField
static std::uint32_t SerializeCRC32(const unsigned char *data, std::uint32_t length, std::uint32_t &crc32, void *buffer)
Writes a CRC32 checksum of the byte range given by data and length.
static std::uint16_t SerializeColumnType(ROOT::Experimental::EColumnType type, void *buffer)
static std::uint32_t DeserializeUInt16(const void *buffer, std::uint16_t &val)
static RResult< void > DeserializePageListV1(const void *buffer, std::uint32_t bufSize, std::vector< RClusterDescriptorBuilder > &clusters)
static std::uint32_t SerializeString(const std::string &val, void *buffer)
static constexpr std::uint16_t kEnvelopeCurrentVersion
In order to handle changes to the serialization routine in future ntuple versions.
static constexpr std::uint32_t kFlagNonNegativeColumn
static constexpr std::uint32_t kFlagSortDesColumn
static RResult< std::uint32_t > DeserializeEnvelope(const void *buffer, std::uint32_t bufSize)
static std::uint32_t DeserializeUInt32(const void *buffer, std::uint32_t &val)
static std::uint32_t SerializeUInt64(std::uint64_t val, void *buffer)
static std::uint32_t DeserializeInt16(const void *buffer, std::int16_t &val)
static std::uint32_t SerializeClusterSummary(const RClusterSummary &clusterSummary, void *buffer)
static RContext SerializeHeaderV1(void *buffer, const RNTupleDescriptor &desc)
static RResult< void > DeserializeFooterV1(const void *buffer, std::uint32_t bufSize, RNTupleDescriptorBuilder &descBuilder)
static std::uint32_t SerializeInt16(std::int16_t val, void *buffer)
static std::uint32_t SerializeLocator(const RNTupleLocator &locator, void *buffer)
static std::uint32_t SerializeInt32(std::int32_t val, void *buffer)
static std::uint32_t SerializeEnvelopePreamble(void *buffer)
Currently all enevelopes have the same version number (1).
static RResult< std::uint16_t > DeserializeColumnType(const void *buffer, ROOT::Experimental::EColumnType &type)
static RResult< std::uint32_t > DeserializeClusterGroup(const void *buffer, std::uint32_t bufSize, RClusterGroup &clusterGroup)
static std::uint32_t DeserializeUInt64(const void *buffer, std::uint64_t &val)
static constexpr std::uint16_t kEnvelopeMinVersion
static std::uint32_t DeserializeInt32(const void *buffer, std::int32_t &val)
static std::uint32_t DeserializeInt64(const void *buffer, std::int64_t &val)
static RResult< std::uint32_t > DeserializeEnvelopeLink(const void *buffer, std::uint32_t bufSize, REnvelopeLink &envelopeLink)
static std::uint32_t SerializeEnvelopePostscript(const unsigned char *envelope, std::uint32_t size, void *buffer)
static RResult< std::uint16_t > DeserializeFieldStructure(const void *buffer, ROOT::Experimental::ENTupleStructure &structure)
static std::uint32_t SerializeEnvelopeLink(const REnvelopeLink &envelopeLink, void *buffer)
static std::uint32_t SerializeRecordFramePreamble(void *buffer)
static std::uint32_t SerializeUInt16(std::uint16_t val, void *buffer)
static RResult< std::uint32_t > DeserializeFrameHeader(const void *buffer, std::uint32_t bufSize, std::uint32_t &frameSize, std::uint32_t &nitems)
static RResult< std::uint32_t > DeserializeFeatureFlags(const void *buffer, std::uint32_t bufSize, std::vector< std::int64_t > &flags)
static std::uint32_t SerializeClusterGroup(const RClusterGroup &clusterGroup, void *buffer)
static std::uint32_t SerializeFramePostscript(void *frame, std::int32_t size)
static std::uint32_t SerializeFooterV1(void *buffer, const RNTupleDescriptor &desc, const RContext &context)
static std::uint32_t SerializeInt64(std::int64_t val, void *buffer)
static constexpr std::uint32_t kFlagSortAscColumn
static RResult< void > VerifyCRC32(const unsigned char *data, std::uint32_t length, std::uint32_t &crc32)
Expects a CRC32 checksum in the 4 bytes following data + length and verifies it.
static std::uint16_t SerializeFieldStructure(ROOT::Experimental::ENTupleStructure structure, void *buffer)
While we could just interpret the enums as ints, we make the translation explicit in order to avoid a...
static std::uint32_t SerializeUInt32(std::uint32_t val, void *buffer)
static RResult< void > DeserializeHeaderV1(const void *buffer, std::uint32_t bufSize, RNTupleDescriptorBuilder &descBuilder)
static RResult< std::uint32_t > DeserializeClusterSummary(const void *buffer, std::uint32_t bufSize, RClusterSummary &clusterSummary)
A helper class for piece-wise construction of an RNTupleDescriptor.
The on-storage meta-data of an ntuple.
The class is used as a return type for operations that can fail; wraps a value of type T or an RError...
Definition RError.hxx:195
ENTupleStructure
The fields in the ntuple model tree can carry different structural information about the type system.
std::uint64_t DescriptorId_t
Distriniguishes elements of the same type within a descriptor, e.g. different fields.
tbb::task_arena is an alias of tbb::interface7::task_arena, which doesn't allow to forward declare tb...
Generic information about the physical location of data.