Logo ROOT  
Reference Guide
 
Loading...
Searching...
No Matches
RNTupleSerialize.hxx
Go to the documentation of this file.
1/// \file ROOT/RNTupleSerialize.hxx
2/// \ingroup NTuple ROOT7
3/// \author Jakob Blomer <jblomer@cern.ch>
4/// \author Javier Lopez-Gomez <javier.lopez.gomez@cern.ch>
5/// \date 2021-08-02
6/// \warning This is part of the ROOT 7 prototype! It will change without notice. It might trigger earthquakes. Feedback
7/// is welcome!
8
9/*************************************************************************
10 * Copyright (C) 1995-2021, Rene Brun and Fons Rademakers. *
11 * All rights reserved. *
12 * *
13 * For the licensing terms see $ROOTSYS/LICENSE. *
14 * For the list of contributors see $ROOTSYS/README/CREDITS. *
15 *************************************************************************/
16
17#ifndef ROOT7_RNTupleSerialize
18#define ROOT7_RNTupleSerialize
19
20#include <ROOT/RError.hxx>
21#include <ROOT/RNTupleUtil.hxx>
22#include <ROOT/RSpan.hxx>
23
24#include <cstdint>
25#include <map>
26#include <string>
27#include <vector>
28
29namespace ROOT {
30namespace Experimental {
31
32enum class EColumnType;
33class RClusterDescriptor;
34class RClusterDescriptorBuilder;
35class RNTupleDescriptor;
36class RNTupleDescriptorBuilder;
37
38
39namespace Internal {
40
41// clang-format off
42/**
43\class ROOT::Experimental::Internal::RNTupleSerializer
44\ingroup NTuple
45\brief A helper class for serializing and deserialization of the RNTuple binary format
46
47All serialization and deserialization routines return the number of bytes processed (written or read).
48
49The serialization routines can be called with a nullptr buffer, in which case only the size required to perform
50a serialization is returned. Deserialization routines must be called with a buffer that is sufficiently large.
51
52Deserialization errors throw exceptions. Only when indicated or when passed as a parameter is the buffer size checked.
53*/
54// clang-format on
56public:
57 /// In order to handle changes to the serialization routine in future ntuple versions
58 static constexpr std::uint16_t kEnvelopeCurrentVersion = 1;
59 static constexpr std::uint16_t kEnvelopeMinVersion = 1;
60 static constexpr std::uint32_t kReleaseCandidateTag = 1;
61
62 static constexpr std::uint16_t kFlagRepetitiveField = 0x01;
63
64 static constexpr std::uint32_t kFlagSortAscColumn = 0x01;
65 static constexpr std::uint32_t kFlagSortDesColumn = 0x02;
66 static constexpr std::uint32_t kFlagNonNegativeColumn = 0x04;
67 static constexpr std::uint32_t kFlagDeferredColumn = 0x08;
68
69 static constexpr DescriptorId_t kZeroFieldId = std::uint64_t(-2);
70
72 std::uint32_t fUnzippedSize = 0;
74 };
75
77 std::uint64_t fFirstEntry = 0;
78 std::uint64_t fNEntries = 0;
79 /// -1 for "all columns"
80 std::int32_t fColumnGroupID = -1;
81 };
82
84 std::uint32_t fNClusters = 0;
86 };
87
88 /// The serialization context is used for the piecewise serialization of a descriptor. During header serialization,
89 /// the mapping of in-memory field and column IDs to on-disk IDs is built so that it can be used for the
90 /// footer serialization in a second step.
91 class RContext {
92 private:
93 std::uint32_t fHeaderSize = 0;
94 std::uint32_t fHeaderCrc32 = 0;
95 std::map<DescriptorId_t, DescriptorId_t> fMem2OnDiskFieldIDs;
96 std::map<DescriptorId_t, DescriptorId_t> fMem2OnDiskColumnIDs;
97 std::map<DescriptorId_t, DescriptorId_t> fMem2OnDiskClusterIDs;
98 std::map<DescriptorId_t, DescriptorId_t> fMem2OnDiskClusterGroupIDs;
99 std::vector<DescriptorId_t> fOnDisk2MemFieldIDs;
100 std::vector<DescriptorId_t> fOnDisk2MemColumnIDs;
101 std::vector<DescriptorId_t> fOnDisk2MemClusterIDs;
102 std::vector<DescriptorId_t> fOnDisk2MemClusterGroupIDs;
103 std::size_t fHeaderExtensionOffset = -1U;
104
105 public:
106 void SetHeaderSize(std::uint32_t size) { fHeaderSize = size; }
107 std::uint32_t GetHeaderSize() const { return fHeaderSize; }
108 void SetHeaderCRC32(std::uint32_t crc32) { fHeaderCrc32 = crc32; }
109 std::uint32_t GetHeaderCRC32() const { return fHeaderCrc32; }
110 /// Map an in-memory field ID to its on-disk counterpart. It is allowed to call this function multiple times for
111 /// the same `memId`, in which case the return value is the on-disk ID assigned on the first call.
113 auto onDiskId = fOnDisk2MemFieldIDs.size();
114 const auto &p = fMem2OnDiskFieldIDs.try_emplace(memId, onDiskId);
115 if (p.second)
116 fOnDisk2MemFieldIDs.push_back(memId);
117 return (*p.first).second;
118 }
119 /// Map an in-memory column ID to its on-disk counterpart. It is allowed to call this function multiple times for
120 /// the same `memId`, in which case the return value is the on-disk ID assigned on the first call.
122 auto onDiskId = fOnDisk2MemColumnIDs.size();
123 const auto &p = fMem2OnDiskColumnIDs.try_emplace(memId, onDiskId);
124 if (p.second)
125 fOnDisk2MemColumnIDs.push_back(memId);
126 return (*p.first).second;
127 }
129 auto onDiskId = fOnDisk2MemClusterIDs.size();
130 fMem2OnDiskClusterIDs[memId] = onDiskId;
131 fOnDisk2MemClusterIDs.push_back(memId);
132 return onDiskId;
133 }
135 {
136 auto onDiskId = fOnDisk2MemClusterGroupIDs.size();
137 fMem2OnDiskClusterGroupIDs[memId] = onDiskId;
138 fOnDisk2MemClusterGroupIDs.push_back(memId);
139 return onDiskId;
140 }
141 /// Map in-memory field and column IDs to their on-disk counterparts. This function is unconditionally called
142 /// during header serialization. This function must be manually called after an incremental schema update as page
143 /// list serialization requires all columns to be mapped.
144 void MapSchema(const RNTupleDescriptor &desc, bool forHeaderExtension);
145
150 {
151 return fMem2OnDiskClusterGroupIDs.at(memId);
152 }
157 {
158 return fOnDisk2MemClusterGroupIDs[onDiskId];
159 }
160
161 /// Return a vector containing the in-memory field ID for each on-disk counterpart, in order, i.e. the `i`-th
162 /// value corresponds to the in-memory field ID for `i`-th on-disk ID
163 const std::vector<DescriptorId_t> &GetOnDiskFieldList() const { return fOnDisk2MemFieldIDs; }
164 /// Mark the first on-disk field ID that is part of the schema extension
166 /// Return the offset of the first element in `fOnDisk2MemFieldIDs` that is part of the schema extension
167 std::size_t GetHeaderExtensionOffset() const { return fHeaderExtensionOffset; }
168 };
169
170 /// Writes a CRC32 checksum of the byte range given by data and length.
171 static std::uint32_t SerializeCRC32(const unsigned char *data, std::uint32_t length,
172 std::uint32_t &crc32, void *buffer);
173 /// Expects a CRC32 checksum in the 4 bytes following data + length and verifies it.
174 static RResult<void> VerifyCRC32(const unsigned char *data, std::uint32_t length, std::uint32_t &crc32);
175 static RResult<void> VerifyCRC32(const unsigned char *data, std::uint32_t length);
176
177 static std::uint32_t SerializeInt16(std::int16_t val, void *buffer);
178 static std::uint32_t DeserializeInt16(const void *buffer, std::int16_t &val);
179 static std::uint32_t SerializeUInt16(std::uint16_t val, void *buffer);
180 static std::uint32_t DeserializeUInt16(const void *buffer, std::uint16_t &val);
181
182 static std::uint32_t SerializeInt32(std::int32_t val, void *buffer);
183 static std::uint32_t DeserializeInt32(const void *buffer, std::int32_t &val);
184 static std::uint32_t SerializeUInt32(std::uint32_t val, void *buffer);
185 static std::uint32_t DeserializeUInt32(const void *buffer, std::uint32_t &val);
186
187 static std::uint32_t SerializeInt64(std::int64_t val, void *buffer);
188 static std::uint32_t DeserializeInt64(const void *buffer, std::int64_t &val);
189 static std::uint32_t SerializeUInt64(std::uint64_t val, void *buffer);
190 static std::uint32_t DeserializeUInt64(const void *buffer, std::uint64_t &val);
191
192 static std::uint32_t SerializeString(const std::string &val, void *buffer);
193 static RResult<std::uint32_t> DeserializeString(const void *buffer, std::uint32_t bufSize, std::string &val);
194
195 /// While we could just interpret the enums as ints, we make the translation explicit
196 /// in order to avoid accidentally changing the on-disk numbers when adjusting the enum classes.
197 static std::uint16_t SerializeFieldStructure(ROOT::Experimental::ENTupleStructure structure, void *buffer);
198 static std::uint16_t SerializeColumnType(ROOT::Experimental::EColumnType type, void *buffer);
201
202 static std::uint32_t SerializeEnvelopePreamble(void *buffer);
203 static std::uint32_t SerializeEnvelopePostscript(const unsigned char *envelope, std::uint32_t size, void *buffer);
204 static std::uint32_t SerializeEnvelopePostscript(const unsigned char *envelope, std::uint32_t size,
205 std::uint32_t &crc32, void *buffer);
206 // The bufSize must include the 4 bytes for the final CRC32 checksum.
207 static RResult<std::uint32_t> DeserializeEnvelope(const void *buffer, std::uint32_t bufSize);
208 static RResult<std::uint32_t> DeserializeEnvelope(const void *buffer, std::uint32_t bufSize, std::uint32_t &crc32);
209
210 static std::uint32_t SerializeRecordFramePreamble(void *buffer);
211 static std::uint32_t SerializeListFramePreamble(std::uint32_t nitems, void *buffer);
212 static std::uint32_t SerializeFramePostscript(void *frame, std::int32_t size);
213 static RResult<std::uint32_t> DeserializeFrameHeader(const void *buffer, std::uint32_t bufSize,
214 std::uint32_t &frameSize, std::uint32_t &nitems);
215 static RResult<std::uint32_t> DeserializeFrameHeader(const void *buffer, std::uint32_t bufSize,
216 std::uint32_t &frameSize);
217
218 // An empty flags vector will be serialized as a single, zero feature flag
219 // The most significant bit in every flag is reserved and must _not_ be set
220 static std::uint32_t SerializeFeatureFlags(const std::vector<std::int64_t> &flags, void *buffer);
221 static RResult<std::uint32_t> DeserializeFeatureFlags(const void *buffer, std::uint32_t bufSize,
222 std::vector<std::int64_t> &flags);
223
224 static std::uint32_t SerializeLocator(const RNTupleLocator &locator, void *buffer);
225 static std::uint32_t SerializeEnvelopeLink(const REnvelopeLink &envelopeLink, void *buffer);
226 static RResult<std::uint32_t> DeserializeLocator(const void *buffer, std::uint32_t bufSize, RNTupleLocator &locator);
227 static RResult<std::uint32_t> DeserializeEnvelopeLink(const void *buffer, std::uint32_t bufSize,
228 REnvelopeLink &envelopeLink);
229
230 static std::uint32_t SerializeClusterSummary(const RClusterSummary &clusterSummary, void *buffer);
231 static std::uint32_t SerializeClusterGroup(const RClusterGroup &clusterGroup, void *buffer);
232 static RResult<std::uint32_t> DeserializeClusterSummary(const void *buffer, std::uint32_t bufSize,
233 RClusterSummary &clusterSummary);
234 static RResult<std::uint32_t> DeserializeClusterGroup(const void *buffer, std::uint32_t bufSize,
235 RClusterGroup &clusterGroup);
236
237 /// Serialize the schema description in `desc` into `buffer`. If `forHeaderExtension` is true, serialize only the
238 /// fields and columns tagged as part of the header extension (see `RNTupleDescriptorBuilder::BeginHeaderExtension`).
239 static std::uint32_t SerializeSchemaDescription(void *buffer, const RNTupleDescriptor &desc, const RContext &context,
240 bool forHeaderExtension = false);
242 DeserializeSchemaDescription(const void *buffer, std::uint32_t bufSize, RNTupleDescriptorBuilder &descBuilder);
243
244 static RContext SerializeHeaderV1(void *buffer, const RNTupleDescriptor &desc);
245 static std::uint32_t SerializePageListV1(void *buffer,
246 const RNTupleDescriptor &desc,
247 std::span<DescriptorId_t> physClusterIDs,
248 const RContext &context);
249 static std::uint32_t SerializeFooterV1(void *buffer, const RNTupleDescriptor &desc, const RContext &context);
250
251 static RResult<void> DeserializeHeaderV1(const void *buffer,
252 std::uint32_t bufSize,
253 RNTupleDescriptorBuilder &descBuilder);
254 static RResult<void> DeserializeFooterV1(const void *buffer,
255 std::uint32_t bufSize,
256 RNTupleDescriptorBuilder &descBuilder);
257 // The clusters vector must be initialized with the cluster summaries corresponding to the page list
258 static RResult<void> DeserializePageListV1(const void *buffer,
259 std::uint32_t bufSize,
260 std::vector<RClusterDescriptorBuilder> &clusters);
261}; // class RNTupleSerializer
262
263} // namespace Internal
264} // namespace Experimental
265} // namespace ROOT
266
267#endif // ROOT7_RNTupleSerialize
size_t size(const MatrixT &matrix)
retrieve the size of a square matrix
winID h TVirtualViewer3D TVirtualGLPainter p
Option_t Option_t TPoint TPoint const char GetTextMagnitude GetFillStyle GetLineColor GetLineWidth GetMarkerStyle GetTextAlign GetTextColor GetTextSize void data
Option_t Option_t TPoint TPoint const char GetTextMagnitude GetFillStyle GetLineColor GetLineWidth GetMarkerStyle GetTextAlign GetTextColor GetTextSize void char Point_t Rectangle_t WindowAttributes_t Float_t Float_t Float_t Int_t Int_t UInt_t UInt_t Rectangle_t Int_t Int_t Window_t TString Int_t GCValues_t GetPrimarySelectionOwner GetDisplay GetScreen GetColormap GetNativeEvent const char const char dpyName wid window const char font_name cursor keysym reg const char only_if_exist regb h Point_t winding char text const char depth char const char Int_t count const char ColorStruct_t color const char Pixmap_t Pixmap_t PictureAttributes_t attr const char char ret_data h unsigned char height h length
Option_t Option_t TPoint TPoint const char GetTextMagnitude GetFillStyle GetLineColor GetLineWidth GetMarkerStyle GetTextAlign GetTextColor GetTextSize void char Point_t Rectangle_t WindowAttributes_t Float_t Float_t Float_t Int_t Int_t UInt_t UInt_t Rectangle_t Int_t Int_t Window_t TString Int_t GCValues_t GetPrimarySelectionOwner GetDisplay GetScreen GetColormap GetNativeEvent const char const char dpyName wid window const char font_name cursor keysym reg const char only_if_exist regb h Point_t winding char text const char depth char const char Int_t count const char ColorStruct_t color const char Pixmap_t Pixmap_t PictureAttributes_t attr const char char ret_data h unsigned char height h Atom_t Int_t ULong_t nitems
Option_t Option_t TPoint TPoint const char GetTextMagnitude GetFillStyle GetLineColor GetLineWidth GetMarkerStyle GetTextAlign GetTextColor GetTextSize void char Point_t Rectangle_t WindowAttributes_t Float_t Float_t Float_t Int_t Int_t UInt_t UInt_t Rectangle_t Int_t Int_t Window_t TString Int_t GCValues_t GetPrimarySelectionOwner GetDisplay GetScreen GetColormap GetNativeEvent const char const char dpyName wid window const char font_name cursor keysym reg const char only_if_exist regb h Point_t winding char text const char depth char const char Int_t count const char ColorStruct_t color const char Pixmap_t Pixmap_t PictureAttributes_t attr const char char ret_data h unsigned char height h Atom_t Int_t ULong_t ULong_t unsigned char prop_list Atom_t Atom_t Atom_t Time_t type
The available trivial, native content types of a column.
The serialization context is used for the piecewise serialization of a descriptor.
DescriptorId_t GetOnDiskColumnId(DescriptorId_t memId) const
const std::vector< DescriptorId_t > & GetOnDiskFieldList() const
Return a vector containing the in-memory field ID for each on-disk counterpart, in order,...
std::map< DescriptorId_t, DescriptorId_t > fMem2OnDiskClusterIDs
DescriptorId_t GetOnDiskFieldId(DescriptorId_t memId) const
DescriptorId_t GetMemColumnId(DescriptorId_t onDiskId) const
DescriptorId_t MapColumnId(DescriptorId_t memId)
Map an in-memory column ID to its on-disk counterpart.
DescriptorId_t MapFieldId(DescriptorId_t memId)
Map an in-memory field ID to its on-disk counterpart.
DescriptorId_t GetMemFieldId(DescriptorId_t onDiskId) const
std::map< DescriptorId_t, DescriptorId_t > fMem2OnDiskFieldIDs
std::map< DescriptorId_t, DescriptorId_t > fMem2OnDiskColumnIDs
std::size_t GetHeaderExtensionOffset() const
Return the offset of the first element in fOnDisk2MemFieldIDs that is part of the schema extension.
std::map< DescriptorId_t, DescriptorId_t > fMem2OnDiskClusterGroupIDs
DescriptorId_t GetMemClusterGroupId(DescriptorId_t onDiskId) const
DescriptorId_t GetMemClusterId(DescriptorId_t onDiskId) const
DescriptorId_t GetOnDiskClusterGroupId(DescriptorId_t memId) const
DescriptorId_t GetOnDiskClusterId(DescriptorId_t memId) const
void BeginHeaderExtension()
Mark the first on-disk field ID that is part of the schema extension.
void MapSchema(const RNTupleDescriptor &desc, bool forHeaderExtension)
Map in-memory field and column IDs to their on-disk counterparts.
A helper class for serializing and deserialization of the RNTuple binary format.
static RResult< std::uint32_t > DeserializeString(const void *buffer, std::uint32_t bufSize, std::string &val)
static std::uint32_t SerializeFeatureFlags(const std::vector< std::int64_t > &flags, void *buffer)
static constexpr std::uint32_t kReleaseCandidateTag
static std::uint32_t SerializePageListV1(void *buffer, const RNTupleDescriptor &desc, std::span< DescriptorId_t > physClusterIDs, const RContext &context)
static std::uint32_t SerializeListFramePreamble(std::uint32_t nitems, void *buffer)
static RResult< std::uint32_t > DeserializeLocator(const void *buffer, std::uint32_t bufSize, RNTupleLocator &locator)
static constexpr std::uint16_t kFlagRepetitiveField
static std::uint32_t SerializeCRC32(const unsigned char *data, std::uint32_t length, std::uint32_t &crc32, void *buffer)
Writes a CRC32 checksum of the byte range given by data and length.
static std::uint16_t SerializeColumnType(ROOT::Experimental::EColumnType type, void *buffer)
static std::uint32_t DeserializeUInt16(const void *buffer, std::uint16_t &val)
static RResult< void > DeserializePageListV1(const void *buffer, std::uint32_t bufSize, std::vector< RClusterDescriptorBuilder > &clusters)
static std::uint32_t SerializeString(const std::string &val, void *buffer)
static constexpr std::uint16_t kEnvelopeCurrentVersion
In order to handle changes to the serialization routine in future ntuple versions.
static constexpr std::uint32_t kFlagNonNegativeColumn
static constexpr std::uint32_t kFlagSortDesColumn
static RResult< std::uint32_t > DeserializeEnvelope(const void *buffer, std::uint32_t bufSize)
static constexpr std::uint32_t kFlagDeferredColumn
static std::uint32_t DeserializeUInt32(const void *buffer, std::uint32_t &val)
static std::uint32_t SerializeUInt64(std::uint64_t val, void *buffer)
static std::uint32_t DeserializeInt16(const void *buffer, std::int16_t &val)
static std::uint32_t SerializeClusterSummary(const RClusterSummary &clusterSummary, void *buffer)
static RContext SerializeHeaderV1(void *buffer, const RNTupleDescriptor &desc)
static RResult< void > DeserializeFooterV1(const void *buffer, std::uint32_t bufSize, RNTupleDescriptorBuilder &descBuilder)
static std::uint32_t SerializeInt16(std::int16_t val, void *buffer)
static RResult< std::uint32_t > DeserializeSchemaDescription(const void *buffer, std::uint32_t bufSize, RNTupleDescriptorBuilder &descBuilder)
static std::uint32_t SerializeSchemaDescription(void *buffer, const RNTupleDescriptor &desc, const RContext &context, bool forHeaderExtension=false)
Serialize the schema description in desc into buffer.
static std::uint32_t SerializeLocator(const RNTupleLocator &locator, void *buffer)
static std::uint32_t SerializeInt32(std::int32_t val, void *buffer)
static std::uint32_t SerializeEnvelopePreamble(void *buffer)
Currently all enevelopes have the same version number (1).
static RResult< std::uint16_t > DeserializeColumnType(const void *buffer, ROOT::Experimental::EColumnType &type)
static RResult< std::uint32_t > DeserializeClusterGroup(const void *buffer, std::uint32_t bufSize, RClusterGroup &clusterGroup)
static std::uint32_t DeserializeUInt64(const void *buffer, std::uint64_t &val)
static constexpr std::uint16_t kEnvelopeMinVersion
static std::uint32_t DeserializeInt32(const void *buffer, std::int32_t &val)
static std::uint32_t DeserializeInt64(const void *buffer, std::int64_t &val)
static RResult< std::uint32_t > DeserializeEnvelopeLink(const void *buffer, std::uint32_t bufSize, REnvelopeLink &envelopeLink)
static std::uint32_t SerializeEnvelopePostscript(const unsigned char *envelope, std::uint32_t size, void *buffer)
static RResult< std::uint16_t > DeserializeFieldStructure(const void *buffer, ROOT::Experimental::ENTupleStructure &structure)
static std::uint32_t SerializeEnvelopeLink(const REnvelopeLink &envelopeLink, void *buffer)
static std::uint32_t SerializeRecordFramePreamble(void *buffer)
static std::uint32_t SerializeUInt16(std::uint16_t val, void *buffer)
static RResult< std::uint32_t > DeserializeFrameHeader(const void *buffer, std::uint32_t bufSize, std::uint32_t &frameSize, std::uint32_t &nitems)
static RResult< std::uint32_t > DeserializeFeatureFlags(const void *buffer, std::uint32_t bufSize, std::vector< std::int64_t > &flags)
static std::uint32_t SerializeClusterGroup(const RClusterGroup &clusterGroup, void *buffer)
static std::uint32_t SerializeFramePostscript(void *frame, std::int32_t size)
static std::uint32_t SerializeFooterV1(void *buffer, const RNTupleDescriptor &desc, const RContext &context)
static std::uint32_t SerializeInt64(std::int64_t val, void *buffer)
static constexpr std::uint32_t kFlagSortAscColumn
static RResult< void > VerifyCRC32(const unsigned char *data, std::uint32_t length, std::uint32_t &crc32)
Expects a CRC32 checksum in the 4 bytes following data + length and verifies it.
static std::uint16_t SerializeFieldStructure(ROOT::Experimental::ENTupleStructure structure, void *buffer)
While we could just interpret the enums as ints, we make the translation explicit in order to avoid a...
static std::uint32_t SerializeUInt32(std::uint32_t val, void *buffer)
static RResult< void > DeserializeHeaderV1(const void *buffer, std::uint32_t bufSize, RNTupleDescriptorBuilder &descBuilder)
static RResult< std::uint32_t > DeserializeClusterSummary(const void *buffer, std::uint32_t bufSize, RClusterSummary &clusterSummary)
A helper class for piece-wise construction of an RNTupleDescriptor.
The on-storage meta-data of an ntuple.
The class is used as a return type for operations that can fail; wraps a value of type T or an RError...
Definition RError.hxx:207
ENTupleStructure
The fields in the ntuple model tree can carry different structural information about the type system.
std::uint64_t DescriptorId_t
Distriniguishes elements of the same type within a descriptor, e.g. different fields.
This file contains a specialised ROOT message handler to test for diagnostic in unit tests.
Generic information about the physical location of data.