Logo ROOT  
Reference Guide
RNTupleSerialize.hxx
Go to the documentation of this file.
1/// \file ROOT/RNTupleSerialize.hxx
2/// \ingroup NTuple ROOT7
3/// \author Jakob Blomer <jblomer@cern.ch>
4/// \date 2021-08-02
5/// \warning This is part of the ROOT 7 prototype! It will change without notice. It might trigger earthquakes. Feedback
6/// is welcome!
7
8/*************************************************************************
9 * Copyright (C) 1995-2021, Rene Brun and Fons Rademakers. *
10 * All rights reserved. *
11 * *
12 * For the licensing terms see $ROOTSYS/LICENSE. *
13 * For the list of contributors see $ROOTSYS/README/CREDITS. *
14 *************************************************************************/
15
16#ifndef ROOT7_RNTupleSerialize
17#define ROOT7_RNTupleSerialize
18
19#include <ROOT/RError.hxx>
20#include <ROOT/RNTupleUtil.hxx>
21#include <ROOT/RSpan.hxx>
22
23#include <cstdint>
24#include <map>
25#include <string>
26#include <vector>
27
28namespace ROOT {
29namespace Experimental {
30
31enum class EColumnType;
32class RClusterDescriptor;
33class RClusterDescriptorBuilder;
34class RNTupleDescriptor;
35class RNTupleDescriptorBuilder;
36
37
38namespace Internal {
39
40// clang-format off
41/**
42\class ROOT::Experimental::Internal::RNTupleSerializer
43\ingroup NTuple
44\brief A helper class for serializing and deserialization of the RNTuple binary format
45
46All serialization and deserialization routines return the number of bytes processed (written or read).
47
48The serialization routines can be called with a nullptr buffer, in which case only the size required to perform
49a serialization is returned. Deserialization routines must be called with a buffer that is sufficiently large.
50
51Deserialization errors throw exceptions. Only when indicated or when passed as a parameter is the buffer size checked.
52*/
53// clang-format on
55public:
56 /// In order to handle changes to the serialization routine in future ntuple versions
57 static constexpr std::uint16_t kEnvelopeCurrentVersion = 1;
58 static constexpr std::uint16_t kEnvelopeMinVersion = 1;
59 static constexpr std::uint32_t kReleaseCandidateTag = 1;
60
61 static constexpr std::uint16_t kFlagRepetitiveField = 0x01;
62
63 static constexpr std::uint32_t kFlagSortAscColumn = 0x01;
64 static constexpr std::uint32_t kFlagSortDesColumn = 0x02;
65 static constexpr std::uint32_t kFlagNonNegativeColumn = 0x04;
66
67 static constexpr DescriptorId_t kZeroFieldId = std::uint64_t(-2);
68
70 std::uint32_t fUnzippedSize = 0;
72 };
73
75 std::uint64_t fFirstEntry = 0;
76 std::uint64_t fNEntries = 0;
77 /// -1 for "all columns"
78 std::int32_t fColumnGroupID = -1;
79 };
80
82 std::uint32_t fNClusters = 0;
84 };
85
86 /// The serialization context is used for the piecewise serialization of a descriptor. During header serialization,
87 /// the mapping of in-memory field and column IDs to on-disk IDs is built so that it can be used for the
88 /// footer serialization in a second step.
89 class RContext {
90 private:
91 std::uint32_t fHeaderSize = 0;
92 std::uint32_t fHeaderCrc32 = 0;
93 std::map<DescriptorId_t, DescriptorId_t> fMem2OnDiskFieldIDs;
94 std::map<DescriptorId_t, DescriptorId_t> fMem2OnDiskColumnIDs;
95 std::map<DescriptorId_t, DescriptorId_t> fMem2OnDiskClusterIDs;
96 std::map<DescriptorId_t, DescriptorId_t> fMem2OnDiskClusterGroupIDs;
97 std::vector<DescriptorId_t> fOnDisk2MemFieldIDs;
98 std::vector<DescriptorId_t> fOnDisk2MemColumnIDs;
99 std::vector<DescriptorId_t> fOnDisk2MemClusterIDs;
100 std::vector<DescriptorId_t> fOnDisk2MemClusterGroupIDs;
101
102 public:
103 void SetHeaderSize(std::uint32_t size) { fHeaderSize = size; }
104 std::uint32_t GetHeaderSize() const { return fHeaderSize; }
105 void SetHeaderCRC32(std::uint32_t crc32) { fHeaderCrc32 = crc32; }
106 std::uint32_t GetHeaderCRC32() const { return fHeaderCrc32; }
108 auto onDiskId = fOnDisk2MemFieldIDs.size();
109 fMem2OnDiskFieldIDs[memId] = onDiskId;
110 fOnDisk2MemFieldIDs.push_back(memId);
111 return onDiskId;
112 }
114 auto onDiskId = fOnDisk2MemColumnIDs.size();
115 fMem2OnDiskColumnIDs[memId] = onDiskId;
116 fOnDisk2MemColumnIDs.push_back(memId);
117 return onDiskId;
118 }
120 auto onDiskId = fOnDisk2MemClusterIDs.size();
121 fMem2OnDiskClusterIDs[memId] = onDiskId;
122 fOnDisk2MemClusterIDs.push_back(memId);
123 return onDiskId;
124 }
126 {
127 auto onDiskId = fOnDisk2MemClusterGroupIDs.size();
128 fMem2OnDiskClusterGroupIDs[memId] = onDiskId;
129 fOnDisk2MemClusterGroupIDs.push_back(memId);
130 return onDiskId;
131 }
136 {
137 return fMem2OnDiskClusterGroupIDs.at(memId);
138 }
143 {
144 return fOnDisk2MemClusterGroupIDs[onDiskId];
145 }
146 };
147
148 /// Writes a CRC32 checksum of the byte range given by data and length.
149 static std::uint32_t SerializeCRC32(const unsigned char *data, std::uint32_t length,
150 std::uint32_t &crc32, void *buffer);
151 /// Expects a CRC32 checksum in the 4 bytes following data + length and verifies it.
152 static RResult<void> VerifyCRC32(const unsigned char *data, std::uint32_t length, std::uint32_t &crc32);
153 static RResult<void> VerifyCRC32(const unsigned char *data, std::uint32_t length);
154
155 static std::uint32_t SerializeInt16(std::int16_t val, void *buffer);
156 static std::uint32_t DeserializeInt16(const void *buffer, std::int16_t &val);
157 static std::uint32_t SerializeUInt16(std::uint16_t val, void *buffer);
158 static std::uint32_t DeserializeUInt16(const void *buffer, std::uint16_t &val);
159
160 static std::uint32_t SerializeInt32(std::int32_t val, void *buffer);
161 static std::uint32_t DeserializeInt32(const void *buffer, std::int32_t &val);
162 static std::uint32_t SerializeUInt32(std::uint32_t val, void *buffer);
163 static std::uint32_t DeserializeUInt32(const void *buffer, std::uint32_t &val);
164
165 static std::uint32_t SerializeInt64(std::int64_t val, void *buffer);
166 static std::uint32_t DeserializeInt64(const void *buffer, std::int64_t &val);
167 static std::uint32_t SerializeUInt64(std::uint64_t val, void *buffer);
168 static std::uint32_t DeserializeUInt64(const void *buffer, std::uint64_t &val);
169
170 static std::uint32_t SerializeString(const std::string &val, void *buffer);
171 static RResult<std::uint32_t> DeserializeString(const void *buffer, std::uint32_t bufSize, std::string &val);
172
173 /// While we could just interpret the enums as ints, we make the translation explicit
174 /// in order to avoid accidentally changing the on-disk numbers when adjusting the enum classes.
175 static std::uint16_t SerializeFieldStructure(ROOT::Experimental::ENTupleStructure structure, void *buffer);
176 static std::uint16_t SerializeColumnType(ROOT::Experimental::EColumnType type, void *buffer);
179
180 static std::uint32_t SerializeEnvelopePreamble(void *buffer);
181 static std::uint32_t SerializeEnvelopePostscript(const unsigned char *envelope, std::uint32_t size, void *buffer);
182 static std::uint32_t SerializeEnvelopePostscript(const unsigned char *envelope, std::uint32_t size,
183 std::uint32_t &crc32, void *buffer);
184 // The bufSize must include the 4 bytes for the final CRC32 checksum.
185 static RResult<std::uint32_t> DeserializeEnvelope(const void *buffer, std::uint32_t bufSize);
186 static RResult<std::uint32_t> DeserializeEnvelope(const void *buffer, std::uint32_t bufSize, std::uint32_t &crc32);
187
188 static std::uint32_t SerializeRecordFramePreamble(void *buffer);
189 static std::uint32_t SerializeListFramePreamble(std::uint32_t nitems, void *buffer);
190 static std::uint32_t SerializeFramePostscript(void *frame, std::int32_t size);
191 static RResult<std::uint32_t> DeserializeFrameHeader(const void *buffer, std::uint32_t bufSize,
192 std::uint32_t &frameSize, std::uint32_t &nitems);
193 static RResult<std::uint32_t> DeserializeFrameHeader(const void *buffer, std::uint32_t bufSize,
194 std::uint32_t &frameSize);
195
196 // An empty flags vector will be serialized as a single, zero feature flag
197 // The most significant bit in every flag is reserved and must _not_ be set
198 static std::uint32_t SerializeFeatureFlags(const std::vector<std::int64_t> &flags, void *buffer);
199 static RResult<std::uint32_t> DeserializeFeatureFlags(const void *buffer, std::uint32_t bufSize,
200 std::vector<std::int64_t> &flags);
201
202 static std::uint32_t SerializeLocator(const RNTupleLocator &locator, void *buffer);
203 static std::uint32_t SerializeEnvelopeLink(const REnvelopeLink &envelopeLink, void *buffer);
204 static RResult<std::uint32_t> DeserializeLocator(const void *buffer, std::uint32_t bufSize, RNTupleLocator &locator);
205 static RResult<std::uint32_t> DeserializeEnvelopeLink(const void *buffer, std::uint32_t bufSize,
206 REnvelopeLink &envelopeLink);
207
208 static std::uint32_t SerializeClusterSummary(const RClusterSummary &clusterSummary, void *buffer);
209 static std::uint32_t SerializeClusterGroup(const RClusterGroup &clusterGroup, void *buffer);
210 static RResult<std::uint32_t> DeserializeClusterSummary(const void *buffer, std::uint32_t bufSize,
211 RClusterSummary &clusterSummary);
212 static RResult<std::uint32_t> DeserializeClusterGroup(const void *buffer, std::uint32_t bufSize,
213 RClusterGroup &clusterGroup);
214
215 static RContext SerializeHeaderV1(void *buffer, const RNTupleDescriptor &desc);
216 static std::uint32_t SerializePageListV1(void *buffer,
217 const RNTupleDescriptor &desc,
218 std::span<DescriptorId_t> physClusterIDs,
219 const RContext &context);
220 static std::uint32_t SerializeFooterV1(void *buffer, const RNTupleDescriptor &desc, const RContext &context);
221
222 static RResult<void> DeserializeHeaderV1(const void *buffer,
223 std::uint32_t bufSize,
224 RNTupleDescriptorBuilder &descBuilder);
225 static RResult<void> DeserializeFooterV1(const void *buffer,
226 std::uint32_t bufSize,
227 RNTupleDescriptorBuilder &descBuilder);
228 // The clusters vector must be initialized with the cluster summaries corresponding to the page list
229 static RResult<void> DeserializePageListV1(const void *buffer,
230 std::uint32_t bufSize,
231 std::vector<RClusterDescriptorBuilder> &clusters);
232}; // class RNTupleSerializer
233
234} // namespace Internal
235} // namespace Experimental
236} // namespace ROOT
237
238#endif // ROOT7_RNTupleSerialize
size_t size(const MatrixT &matrix)
retrieve the size of a square matrix
Option_t Option_t TPoint TPoint const char GetTextMagnitude GetFillStyle GetLineColor GetLineWidth GetMarkerStyle GetTextAlign GetTextColor GetTextSize void data
Option_t Option_t TPoint TPoint const char GetTextMagnitude GetFillStyle GetLineColor GetLineWidth GetMarkerStyle GetTextAlign GetTextColor GetTextSize void char Point_t Rectangle_t WindowAttributes_t Float_t Float_t Float_t Int_t Int_t UInt_t UInt_t Rectangle_t Int_t Int_t Window_t TString Int_t GCValues_t GetPrimarySelectionOwner GetDisplay GetScreen GetColormap GetNativeEvent const char const char dpyName wid window const char font_name cursor keysym reg const char only_if_exist regb h Point_t winding char text const char depth char const char Int_t count const char ColorStruct_t color const char Pixmap_t Pixmap_t PictureAttributes_t attr const char char ret_data h unsigned char height h length
Option_t Option_t TPoint TPoint const char GetTextMagnitude GetFillStyle GetLineColor GetLineWidth GetMarkerStyle GetTextAlign GetTextColor GetTextSize void char Point_t Rectangle_t WindowAttributes_t Float_t Float_t Float_t Int_t Int_t UInt_t UInt_t Rectangle_t Int_t Int_t Window_t TString Int_t GCValues_t GetPrimarySelectionOwner GetDisplay GetScreen GetColormap GetNativeEvent const char const char dpyName wid window const char font_name cursor keysym reg const char only_if_exist regb h Point_t winding char text const char depth char const char Int_t count const char ColorStruct_t color const char Pixmap_t Pixmap_t PictureAttributes_t attr const char char ret_data h unsigned char height h Atom_t Int_t ULong_t nitems
Option_t Option_t TPoint TPoint const char GetTextMagnitude GetFillStyle GetLineColor GetLineWidth GetMarkerStyle GetTextAlign GetTextColor GetTextSize void char Point_t Rectangle_t WindowAttributes_t Float_t Float_t Float_t Int_t Int_t UInt_t UInt_t Rectangle_t Int_t Int_t Window_t TString Int_t GCValues_t GetPrimarySelectionOwner GetDisplay GetScreen GetColormap GetNativeEvent const char const char dpyName wid window const char font_name cursor keysym reg const char only_if_exist regb h Point_t winding char text const char depth char const char Int_t count const char ColorStruct_t color const char Pixmap_t Pixmap_t PictureAttributes_t attr const char char ret_data h unsigned char height h Atom_t Int_t ULong_t ULong_t unsigned char prop_list Atom_t Atom_t Atom_t Time_t type
The available trivial, native content types of a column.
The serialization context is used for the piecewise serialization of a descriptor.
DescriptorId_t GetOnDiskColumnId(DescriptorId_t memId) const
std::map< DescriptorId_t, DescriptorId_t > fMem2OnDiskClusterIDs
DescriptorId_t GetOnDiskFieldId(DescriptorId_t memId) const
DescriptorId_t GetMemColumnId(DescriptorId_t onDiskId) const
DescriptorId_t GetMemFieldId(DescriptorId_t onDiskId) const
std::map< DescriptorId_t, DescriptorId_t > fMem2OnDiskFieldIDs
std::map< DescriptorId_t, DescriptorId_t > fMem2OnDiskColumnIDs
std::map< DescriptorId_t, DescriptorId_t > fMem2OnDiskClusterGroupIDs
DescriptorId_t GetMemClusterGroupId(DescriptorId_t onDiskId) const
DescriptorId_t GetMemClusterId(DescriptorId_t onDiskId) const
DescriptorId_t GetOnDiskClusterGroupId(DescriptorId_t memId) const
DescriptorId_t GetOnDiskClusterId(DescriptorId_t memId) const
A helper class for serializing and deserialization of the RNTuple binary format.
static RResult< std::uint32_t > DeserializeString(const void *buffer, std::uint32_t bufSize, std::string &val)
static std::uint32_t SerializeFeatureFlags(const std::vector< std::int64_t > &flags, void *buffer)
static constexpr std::uint32_t kReleaseCandidateTag
static std::uint32_t SerializePageListV1(void *buffer, const RNTupleDescriptor &desc, std::span< DescriptorId_t > physClusterIDs, const RContext &context)
static std::uint32_t SerializeListFramePreamble(std::uint32_t nitems, void *buffer)
static RResult< std::uint32_t > DeserializeLocator(const void *buffer, std::uint32_t bufSize, RNTupleLocator &locator)
static constexpr std::uint16_t kFlagRepetitiveField
static std::uint32_t SerializeCRC32(const unsigned char *data, std::uint32_t length, std::uint32_t &crc32, void *buffer)
Writes a CRC32 checksum of the byte range given by data and length.
static std::uint16_t SerializeColumnType(ROOT::Experimental::EColumnType type, void *buffer)
static std::uint32_t DeserializeUInt16(const void *buffer, std::uint16_t &val)
static RResult< void > DeserializePageListV1(const void *buffer, std::uint32_t bufSize, std::vector< RClusterDescriptorBuilder > &clusters)
static std::uint32_t SerializeString(const std::string &val, void *buffer)
static constexpr std::uint16_t kEnvelopeCurrentVersion
In order to handle changes to the serialization routine in future ntuple versions.
static constexpr std::uint32_t kFlagNonNegativeColumn
static constexpr std::uint32_t kFlagSortDesColumn
static RResult< std::uint32_t > DeserializeEnvelope(const void *buffer, std::uint32_t bufSize)
static std::uint32_t DeserializeUInt32(const void *buffer, std::uint32_t &val)
static std::uint32_t SerializeUInt64(std::uint64_t val, void *buffer)
static std::uint32_t DeserializeInt16(const void *buffer, std::int16_t &val)
static std::uint32_t SerializeClusterSummary(const RClusterSummary &clusterSummary, void *buffer)
static RContext SerializeHeaderV1(void *buffer, const RNTupleDescriptor &desc)
static RResult< void > DeserializeFooterV1(const void *buffer, std::uint32_t bufSize, RNTupleDescriptorBuilder &descBuilder)
static std::uint32_t SerializeInt16(std::int16_t val, void *buffer)
static std::uint32_t SerializeLocator(const RNTupleLocator &locator, void *buffer)
static std::uint32_t SerializeInt32(std::int32_t val, void *buffer)
static std::uint32_t SerializeEnvelopePreamble(void *buffer)
Currently all enevelopes have the same version number (1).
static RResult< std::uint16_t > DeserializeColumnType(const void *buffer, ROOT::Experimental::EColumnType &type)
static RResult< std::uint32_t > DeserializeClusterGroup(const void *buffer, std::uint32_t bufSize, RClusterGroup &clusterGroup)
static std::uint32_t DeserializeUInt64(const void *buffer, std::uint64_t &val)
static constexpr std::uint16_t kEnvelopeMinVersion
static std::uint32_t DeserializeInt32(const void *buffer, std::int32_t &val)
static std::uint32_t DeserializeInt64(const void *buffer, std::int64_t &val)
static RResult< std::uint32_t > DeserializeEnvelopeLink(const void *buffer, std::uint32_t bufSize, REnvelopeLink &envelopeLink)
static std::uint32_t SerializeEnvelopePostscript(const unsigned char *envelope, std::uint32_t size, void *buffer)
static RResult< std::uint16_t > DeserializeFieldStructure(const void *buffer, ROOT::Experimental::ENTupleStructure &structure)
static std::uint32_t SerializeEnvelopeLink(const REnvelopeLink &envelopeLink, void *buffer)
static std::uint32_t SerializeRecordFramePreamble(void *buffer)
static std::uint32_t SerializeUInt16(std::uint16_t val, void *buffer)
static RResult< std::uint32_t > DeserializeFrameHeader(const void *buffer, std::uint32_t bufSize, std::uint32_t &frameSize, std::uint32_t &nitems)
static RResult< std::uint32_t > DeserializeFeatureFlags(const void *buffer, std::uint32_t bufSize, std::vector< std::int64_t > &flags)
static std::uint32_t SerializeClusterGroup(const RClusterGroup &clusterGroup, void *buffer)
static std::uint32_t SerializeFramePostscript(void *frame, std::int32_t size)
static std::uint32_t SerializeFooterV1(void *buffer, const RNTupleDescriptor &desc, const RContext &context)
static std::uint32_t SerializeInt64(std::int64_t val, void *buffer)
static constexpr std::uint32_t kFlagSortAscColumn
static RResult< void > VerifyCRC32(const unsigned char *data, std::uint32_t length, std::uint32_t &crc32)
Expects a CRC32 checksum in the 4 bytes following data + length and verifies it.
static std::uint16_t SerializeFieldStructure(ROOT::Experimental::ENTupleStructure structure, void *buffer)
While we could just interpret the enums as ints, we make the translation explicit in order to avoid a...
static constexpr DescriptorId_t kZeroFieldId
static std::uint32_t SerializeUInt32(std::uint32_t val, void *buffer)
static RResult< void > DeserializeHeaderV1(const void *buffer, std::uint32_t bufSize, RNTupleDescriptorBuilder &descBuilder)
static RResult< std::uint32_t > DeserializeClusterSummary(const void *buffer, std::uint32_t bufSize, RClusterSummary &clusterSummary)
A helper class for piece-wise construction of an RNTupleDescriptor.
The on-storage meta-data of an ntuple.
RResult<void> has no data member and no Inspect() method but instead a Success() factory method.
Definition: RError.hxx:269
The class is used as a return type for operations that can fail; wraps a value of type T or an RError...
Definition: RError.hxx:207
ENTupleStructure
The fields in the ntuple model tree can carry different structural information about the type system.
Definition: RNTupleUtil.hxx:38
std::uint64_t DescriptorId_t
Distriniguishes elements of the same type within a descriptor, e.g. different fields.
This file contains a specialised ROOT message handler to test for diagnostic in unit tests.
Generic information about the physical location of data.