Logo ROOT  
Reference Guide
 
Loading...
Searching...
No Matches
RNTupleSerialize.cxx
Go to the documentation of this file.
1/// \file RNTupleSerialize.cxx
2/// \ingroup NTuple ROOT7
3/// \author Jakob Blomer <jblomer@cern.ch>
4/// \date 2021-08-02
5/// \warning This is part of the ROOT 7 prototype! It will change without notice. It might trigger earthquakes. Feedback
6/// is welcome!
7
8/*************************************************************************
9 * Copyright (C) 1995-2021, Rene Brun and Fons Rademakers. *
10 * All rights reserved. *
11 * *
12 * For the licensing terms see $ROOTSYS/LICENSE. *
13 * For the list of contributors see $ROOTSYS/README/CREDITS. *
14 *************************************************************************/
15
17#include <ROOT/RColumnModel.hxx>
18#include <ROOT/RError.hxx>
21
22#include <RVersion.h>
23#include <RZip.h> // for R__crc32
24
25#include <cstring> // for memcpy
26#include <deque>
27#include <set>
28#include <unordered_map>
29
30template <typename T>
32
33
34namespace {
35
36std::uint32_t SerializeFieldV1(
37 const ROOT::Experimental::RFieldDescriptor &fieldDesc, ROOT::Experimental::DescriptorId_t physParentId, void *buffer)
38{
40
41 auto base = reinterpret_cast<unsigned char *>(buffer);
42 auto pos = base;
43 void** where = (buffer == nullptr) ? &buffer : reinterpret_cast<void**>(&pos);
44
45 pos += RNTupleSerializer::SerializeRecordFramePreamble(*where);
46
47 pos += RNTupleSerializer::SerializeUInt32(fieldDesc.GetFieldVersion().GetVersionUse(), *where);
48 pos += RNTupleSerializer::SerializeUInt32(fieldDesc.GetTypeVersion().GetVersionUse(), *where);
49 pos += RNTupleSerializer::SerializeUInt32(physParentId, *where);
50 pos += RNTupleSerializer::SerializeFieldStructure(fieldDesc.GetStructure(), *where);
51 if (fieldDesc.GetNRepetitions() > 0) {
52 pos += RNTupleSerializer::SerializeUInt16(RNTupleSerializer::kFlagRepetitiveField, *where);
53 pos += RNTupleSerializer::SerializeUInt64(fieldDesc.GetNRepetitions(), *where);
54 } else {
55 pos += RNTupleSerializer::SerializeUInt16(0, *where);
56 }
57 pos += RNTupleSerializer::SerializeString(fieldDesc.GetFieldName(), *where);
58 pos += RNTupleSerializer::SerializeString(fieldDesc.GetTypeName(), *where);
59 pos += RNTupleSerializer::SerializeString("" /* type alias */, *where);
60 pos += RNTupleSerializer::SerializeString(fieldDesc.GetFieldDescription(), *where);
61
62 auto size = pos - base;
63 RNTupleSerializer::SerializeFramePostscript(base, size);
64
65 return size;
66}
67
68
69std::uint32_t SerializeFieldTree(
72 void *buffer)
73{
74 auto base = reinterpret_cast<unsigned char *>(buffer);
75 auto pos = base;
76 void** where = (buffer == nullptr) ? &buffer : reinterpret_cast<void**>(&pos);
77
78 std::deque<ROOT::Experimental::DescriptorId_t> idQueue{desc.GetFieldZeroId()};
79
80 while (!idQueue.empty()) {
81 auto parentId = idQueue.front();
82 idQueue.pop_front();
83
84 for (const auto &f : desc.GetFieldIterable(parentId)) {
85 auto physFieldId = context.MapFieldId(f.GetId());
86 auto physParentId = (parentId == desc.GetFieldZeroId()) ? physFieldId : context.GetPhysFieldId(parentId);
87 pos += SerializeFieldV1(f, physParentId, *where);
88 idQueue.push_back(f.GetId());
89 }
90 }
91
92 return pos - base;
93}
94
95RResult<std::uint32_t> DeserializeFieldV1(
96 const void *buffer,
97 std::uint32_t bufSize,
99{
100 using RNTupleSerializer = ROOT::Experimental::Internal::RNTupleSerializer;
101 using ENTupleStructure = ROOT::Experimental::ENTupleStructure;
102
103 auto base = reinterpret_cast<const unsigned char *>(buffer);
104 auto bytes = base;
105 std::uint32_t frameSize;
106 auto fnFrameSizeLeft = [&]() { return frameSize - static_cast<std::uint32_t>(bytes - base); };
107 auto result = RNTupleSerializer::DeserializeFrameHeader(bytes, bufSize, frameSize);
108 if (!result)
109 return R__FORWARD_ERROR(result);
110 bytes += result.Unwrap();
111
112 std::uint32_t fieldVersion;
113 std::uint32_t typeVersion;
114 std::uint32_t parentId;
115 // initialize properly for call to SerializeFieldStructure()
116 ENTupleStructure structure{ENTupleStructure::kLeaf};
117 std::uint16_t flags;
118 if (fnFrameSizeLeft() < 3 * sizeof(std::uint32_t) +
119 RNTupleSerializer::SerializeFieldStructure(structure, nullptr) +
120 sizeof(std::uint16_t))
121 {
122 return R__FAIL("field record frame too short");
123 }
124 bytes += RNTupleSerializer::DeserializeUInt32(bytes, fieldVersion);
125 bytes += RNTupleSerializer::DeserializeUInt32(bytes, typeVersion);
126 bytes += RNTupleSerializer::DeserializeUInt32(bytes, parentId);
127 auto res16 = RNTupleSerializer::DeserializeFieldStructure(bytes, structure);
128 if (!res16)
129 return R__FORWARD_ERROR(res16);
130 bytes += res16.Unwrap();
131 bytes += RNTupleSerializer::DeserializeUInt16(bytes, flags);
132 fieldDesc.FieldVersion(ROOT::Experimental::RNTupleVersion(fieldVersion, fieldVersion))
133 .TypeVersion(ROOT::Experimental::RNTupleVersion(typeVersion, typeVersion))
134 .ParentId(parentId)
135 .Structure(structure);
136
137 if (flags & RNTupleSerializer::kFlagRepetitiveField) {
138 if (fnFrameSizeLeft() < sizeof(std::uint64_t))
139 return R__FAIL("field record frame too short");
140 std::uint64_t nRepetitions;
141 bytes += RNTupleSerializer::DeserializeUInt64(bytes, nRepetitions);
142 fieldDesc.NRepetitions(nRepetitions);
143 }
144
145 std::string fieldName;
146 std::string typeName;
147 std::string aliasName; // so far unused
148 std::string description;
149 result = RNTupleSerializer::DeserializeString(bytes, fnFrameSizeLeft(), fieldName).Unwrap();
150 if (!result)
151 return R__FORWARD_ERROR(result);
152 bytes += result.Unwrap();
153 result = RNTupleSerializer::DeserializeString(bytes, fnFrameSizeLeft(), typeName).Unwrap();
154 if (!result)
155 return R__FORWARD_ERROR(result);
156 bytes += result.Unwrap();
157 result = RNTupleSerializer::DeserializeString(bytes, fnFrameSizeLeft(), aliasName).Unwrap();
158 if (!result)
159 return R__FORWARD_ERROR(result);
160 bytes += result.Unwrap();
161 result = RNTupleSerializer::DeserializeString(bytes, fnFrameSizeLeft(), description).Unwrap();
162 if (!result)
163 return R__FORWARD_ERROR(result);
164 bytes += result.Unwrap();
165 fieldDesc.FieldName(fieldName).TypeName(typeName).FieldDescription(description);
166
167 return frameSize;
168}
169
170std::uint32_t SerializeColumnListV1(
173 void *buffer)
174{
175 using RNTupleSerializer = ROOT::Experimental::Internal::RNTupleSerializer;
176 using RColumnElementBase = ROOT::Experimental::Detail::RColumnElementBase;
177
178 auto base = reinterpret_cast<unsigned char *>(buffer);
179 auto pos = base;
180 void** where = (buffer == nullptr) ? &buffer : reinterpret_cast<void**>(&pos);
181
182 std::deque<ROOT::Experimental::DescriptorId_t> idQueue{desc.GetFieldZeroId()};
183
184 while (!idQueue.empty()) {
185 auto parentId = idQueue.front();
186 idQueue.pop_front();
187
188 for (const auto &c : desc.GetColumnIterable(parentId)) {
189 auto frame = pos;
190 pos += RNTupleSerializer::SerializeRecordFramePreamble(*where);
191
192 auto type = c.GetModel().GetType();
193 pos += RNTupleSerializer::SerializeColumnType(type, *where);
194 pos += RNTupleSerializer::SerializeUInt16(RColumnElementBase::GetBitsOnStorage(type), *where);
195 pos += RNTupleSerializer::SerializeUInt32(context.GetPhysFieldId(c.GetFieldId()), *where);
196 std::uint32_t flags = 0;
197 // TODO(jblomer): add support for descending columns in the column model
198 if (c.GetModel().GetIsSorted())
199 flags |= RNTupleSerializer::kFlagSortAscColumn;
200 // TODO(jblomer): fix for unsigned integer types
202 flags |= RNTupleSerializer::kFlagNonNegativeColumn;
203 pos += RNTupleSerializer::SerializeUInt32(flags, *where);
204
205 pos += RNTupleSerializer::SerializeFramePostscript(buffer ? frame : nullptr, pos - frame);
206
207 context.MapColumnId(c.GetId());
208 }
209
210 for (const auto &f : desc.GetFieldIterable(parentId))
211 idQueue.push_back(f.GetId());
212 }
213
214 return pos - base;
215}
216
217RResult<std::uint32_t> DeserializeColumnV1(
218 const void *buffer,
219 std::uint32_t bufSize,
221{
222 using RNTupleSerializer = ROOT::Experimental::Internal::RNTupleSerializer;
224
225 auto base = reinterpret_cast<const unsigned char *>(buffer);
226 auto bytes = base;
227 std::uint32_t frameSize;
228 auto fnFrameSizeLeft = [&]() { return frameSize - static_cast<std::uint32_t>(bytes - base); };
229 auto result = RNTupleSerializer::DeserializeFrameHeader(bytes, bufSize, frameSize);
230 if (!result)
231 return R__FORWARD_ERROR(result);
232 bytes += result.Unwrap();
233
234 // Initialize properly for SerializeColumnType
235 EColumnType type{EColumnType::kIndex};
236 std::uint16_t bitsOnStorage;
237 std::uint32_t fieldId;
238 std::uint32_t flags;
239 if (fnFrameSizeLeft() < RNTupleSerializer::SerializeColumnType(type, nullptr) +
240 sizeof(std::uint16_t) + 2 * sizeof(std::uint32_t))
241 {
242 return R__FAIL("column record frame too short");
243 }
244 auto res16 = RNTupleSerializer::DeserializeColumnType(bytes, type);
245 if (!res16)
246 return R__FORWARD_ERROR(res16);
247 bytes += res16.Unwrap();
248 bytes += RNTupleSerializer::DeserializeUInt16(bytes, bitsOnStorage);
249 bytes += RNTupleSerializer::DeserializeUInt32(bytes, fieldId);
250 bytes += RNTupleSerializer::DeserializeUInt32(bytes, flags);
251
253 return R__FAIL("column element size mismatch");
254
255 const bool isSorted = (flags & (RNTupleSerializer::kFlagSortAscColumn | RNTupleSerializer::kFlagSortDesColumn));
256 columnDesc.FieldId(fieldId).Model({type, isSorted});
257
258 return frameSize;
259}
260
261} // anonymous namespace
262
263
265 const unsigned char *data, std::uint32_t length, std::uint32_t &crc32, void *buffer)
266{
267 if (buffer != nullptr) {
268 crc32 = R__crc32(0, nullptr, 0);
269 crc32 = R__crc32(crc32, data, length);
270 SerializeUInt32(crc32, buffer);
271 }
272 return 4;
273}
274
276 const unsigned char *data, std::uint32_t length, std::uint32_t &crc32)
277{
278 auto checksumReal = R__crc32(0, nullptr, 0);
279 checksumReal = R__crc32(checksumReal, data, length);
280 DeserializeUInt32(data + length, crc32);
281 if (crc32 != checksumReal)
282 return R__FAIL("CRC32 checksum mismatch");
283 return RResult<void>::Success();
284}
285
286
288 const unsigned char *data, std::uint32_t length)
289{
290 std::uint32_t crc32;
291 return R__FORWARD_RESULT(VerifyCRC32(data, length, crc32));
292}
293
294
295std::uint32_t ROOT::Experimental::Internal::RNTupleSerializer::SerializeInt16(std::int16_t val, void *buffer)
296{
297 if (buffer != nullptr) {
298 auto bytes = reinterpret_cast<unsigned char *>(buffer);
299 bytes[0] = (val & 0x00FF);
300 bytes[1] = (val & 0xFF00) >> 8;
301 }
302 return 2;
303}
304
305std::uint32_t ROOT::Experimental::Internal::RNTupleSerializer::DeserializeInt16(const void *buffer, std::int16_t &val)
306{
307 auto bytes = reinterpret_cast<const unsigned char *>(buffer);
308 val = std::int16_t(bytes[0]) + (std::int16_t(bytes[1]) << 8);
309 return 2;
310}
311
312std::uint32_t ROOT::Experimental::Internal::RNTupleSerializer::SerializeUInt16(std::uint16_t val, void *buffer)
313{
314 return SerializeInt16(val, buffer);
315}
316
317std::uint32_t ROOT::Experimental::Internal::RNTupleSerializer::DeserializeUInt16(const void *buffer, std::uint16_t &val)
318{
319 return DeserializeInt16(buffer, *reinterpret_cast<std::int16_t *>(&val));
320}
321
322std::uint32_t ROOT::Experimental::Internal::RNTupleSerializer::SerializeInt32(std::int32_t val, void *buffer)
323{
324 if (buffer != nullptr) {
325 auto bytes = reinterpret_cast<unsigned char *>(buffer);
326 bytes[0] = (val & 0x000000FF);
327 bytes[1] = (val & 0x0000FF00) >> 8;
328 bytes[2] = (val & 0x00FF0000) >> 16;
329 bytes[3] = (val & 0xFF000000) >> 24;
330 }
331 return 4;
332}
333
334std::uint32_t ROOT::Experimental::Internal::RNTupleSerializer::DeserializeInt32(const void *buffer, std::int32_t &val)
335{
336 auto bytes = reinterpret_cast<const unsigned char *>(buffer);
337 val = std::int32_t(bytes[0]) + (std::int32_t(bytes[1]) << 8) +
338 (std::int32_t(bytes[2]) << 16) + (std::int32_t(bytes[3]) << 24);
339 return 4;
340}
341
342std::uint32_t ROOT::Experimental::Internal::RNTupleSerializer::SerializeUInt32(std::uint32_t val, void *buffer)
343{
344 return SerializeInt32(val, buffer);
345}
346
347std::uint32_t ROOT::Experimental::Internal::RNTupleSerializer::DeserializeUInt32(const void *buffer, std::uint32_t &val)
348{
349 return DeserializeInt32(buffer, *reinterpret_cast<std::int32_t *>(&val));
350}
351
352std::uint32_t ROOT::Experimental::Internal::RNTupleSerializer::SerializeInt64(std::int64_t val, void *buffer)
353{
354 if (buffer != nullptr) {
355 auto bytes = reinterpret_cast<unsigned char *>(buffer);
356 bytes[0] = (val & 0x00000000000000FF);
357 bytes[1] = (val & 0x000000000000FF00) >> 8;
358 bytes[2] = (val & 0x0000000000FF0000) >> 16;
359 bytes[3] = (val & 0x00000000FF000000) >> 24;
360 bytes[4] = (val & 0x000000FF00000000) >> 32;
361 bytes[5] = (val & 0x0000FF0000000000) >> 40;
362 bytes[6] = (val & 0x00FF000000000000) >> 48;
363 bytes[7] = (val & 0xFF00000000000000) >> 56;
364 }
365 return 8;
366}
367
368std::uint32_t ROOT::Experimental::Internal::RNTupleSerializer::DeserializeInt64(const void *buffer, std::int64_t &val)
369{
370 auto bytes = reinterpret_cast<const unsigned char *>(buffer);
371 val = std::int64_t(bytes[0]) + (std::int64_t(bytes[1]) << 8) +
372 (std::int64_t(bytes[2]) << 16) + (std::int64_t(bytes[3]) << 24) +
373 (std::int64_t(bytes[4]) << 32) + (std::int64_t(bytes[5]) << 40) +
374 (std::int64_t(bytes[6]) << 48) + (std::int64_t(bytes[7]) << 56);
375 return 8;
376}
377
378std::uint32_t ROOT::Experimental::Internal::RNTupleSerializer::SerializeUInt64(std::uint64_t val, void *buffer)
379{
380 return SerializeInt64(val, buffer);
381}
382
383std::uint32_t ROOT::Experimental::Internal::RNTupleSerializer::DeserializeUInt64(const void *buffer, std::uint64_t &val)
384{
385 return DeserializeInt64(buffer, *reinterpret_cast<std::int64_t *>(&val));
386}
387
388std::uint32_t ROOT::Experimental::Internal::RNTupleSerializer::SerializeString(const std::string &val, void *buffer)
389{
390 if (buffer) {
391 auto pos = reinterpret_cast<unsigned char *>(buffer);
392 pos += SerializeUInt32(val.length(), pos);
393 memcpy(pos, val.data(), val.length());
394 }
395 return sizeof(std::uint32_t) + val.length();
396}
397
399 const void *buffer, std::uint32_t bufSize, std::string &val)
400{
401 if (bufSize < sizeof(std::uint32_t))
402 return R__FAIL("string buffer too short");
403 bufSize -= sizeof(std::uint32_t);
404
405 auto base = reinterpret_cast<const unsigned char *>(buffer);
406 auto bytes = base;
407 std::uint32_t length;
408 bytes += DeserializeUInt32(buffer, length);
409 if (bufSize < length)
410 return R__FAIL("string buffer too short");
411
412 val.resize(length);
413 memcpy(&val[0], bytes, length);
414 return sizeof(std::uint32_t) + length;
415}
416
417
420{
422 switch (type) {
424 return SerializeUInt16(0x02, buffer);
426 return SerializeUInt16(0x03, buffer);
428 return SerializeUInt16(0x04, buffer);
430 return SerializeUInt16(0x05, buffer);
432 return SerializeUInt16(0x06, buffer);
434 return SerializeUInt16(0x07, buffer);
436 return SerializeUInt16(0x08, buffer);
438 return SerializeUInt16(0x09, buffer);
440 return SerializeUInt16(0x0A, buffer);
442 return SerializeUInt16(0x0B, buffer);
444 return SerializeUInt16(0x0C, buffer);
446 return SerializeUInt16(0x0D, buffer);
447 default:
448 throw RException(R__FAIL("ROOT bug: unexpected column type"));
449 }
450}
451
452
454 const void *buffer, ROOT::Experimental::EColumnType &type)
455{
457 std::uint16_t onDiskType;
458 auto result = DeserializeUInt16(buffer, onDiskType);
459 switch (onDiskType) {
460 case 0x02:
462 break;
463 case 0x03:
465 break;
466 case 0x04:
468 break;
469 case 0x05:
471 break;
472 case 0x06:
474 break;
475 case 0x07:
477 break;
478 case 0x08:
480 break;
481 case 0x09:
483 break;
484 case 0x0A:
486 break;
487 case 0x0B:
489 break;
490 case 0x0C:
492 break;
493 case 0x0D:
495 break;
496 default:
497 return R__FAIL("unexpected on-disk column type");
498 }
499 return result;
500}
501
502
504 ROOT::Experimental::ENTupleStructure structure, void *buffer)
505{
507 switch (structure) {
509 return SerializeUInt16(0x00, buffer);
511 return SerializeUInt16(0x01, buffer);
513 return SerializeUInt16(0x02, buffer);
515 return SerializeUInt16(0x03, buffer);
517 return SerializeUInt16(0x04, buffer);
518 default:
519 throw RException(R__FAIL("ROOT bug: unexpected field structure type"));
520 }
521}
522
523
525 const void *buffer, ROOT::Experimental::ENTupleStructure &structure)
526{
528 std::uint16_t onDiskValue;
529 auto result = DeserializeUInt16(buffer, onDiskValue);
530 switch (onDiskValue) {
531 case 0x00:
532 structure = ENTupleStructure::kLeaf;
533 break;
534 case 0x01:
536 break;
537 case 0x02:
538 structure = ENTupleStructure::kRecord;
539 break;
540 case 0x03:
541 structure = ENTupleStructure::kVariant;
542 break;
543 case 0x04:
545 break;
546 default:
547 return R__FAIL("unexpected on-disk field structure value");
548 }
549 return result;
550}
551
552
553/// Currently all enevelopes have the same version number (1). At a later point, different envelope types
554/// may have different version numbers
556{
557 auto base = reinterpret_cast<unsigned char *>(buffer);
558 auto pos = base;
559 void** where = (buffer == nullptr) ? &buffer : reinterpret_cast<void**>(&pos);
560
561 pos += SerializeUInt16(kEnvelopeCurrentVersion, *where);
562 pos += SerializeUInt16(kEnvelopeMinVersion, *where);
563 return pos - base;
564}
565
566
568 const unsigned char *envelope, std::uint32_t size, std::uint32_t &crc32, void *buffer)
569{
570 return SerializeCRC32(envelope, size, crc32, buffer);
571}
572
574 const unsigned char *envelope, std::uint32_t size, void *buffer)
575{
576 std::uint32_t crc32;
577 return SerializeEnvelopePostscript(envelope, size, crc32, buffer);
578}
579
580/// Currently all enevelopes have the same version number (1). At a later point, different envelope types
581/// may have different version numbers
583 const void *buffer, std::uint32_t bufSize, std::uint32_t &crc32)
584{
585 if (bufSize < (2 * sizeof(std::uint16_t) + sizeof(std::uint32_t)))
586 return R__FAIL("invalid envelope, too short");
587
588 auto bytes = reinterpret_cast<const unsigned char *>(buffer);
589 auto base = bytes;
590
591 std::uint16_t protocolVersionAtWrite;
592 std::uint16_t protocolVersionMinRequired;
593 bytes += DeserializeUInt16(bytes, protocolVersionAtWrite);
594 // RNTuple compatible back to version 1 (but not to version 0)
595 if (protocolVersionAtWrite < 1)
596 return R__FAIL("The RNTuple format is too old (version 0)");
597
598 bytes += DeserializeUInt16(bytes, protocolVersionMinRequired);
599 if (protocolVersionMinRequired > kEnvelopeCurrentVersion) {
600 return R__FAIL(std::string("The RNTuple format is too new (version ") +
601 std::to_string(protocolVersionMinRequired) + ")");
602 }
603
604 // We defer the CRC32 check to the end to faciliate testing of forward/backward incompatibilities
605 auto result = VerifyCRC32(base, bufSize - 4, crc32);
606 if (!result)
607 return R__FORWARD_ERROR(result);
608
609 return sizeof(protocolVersionAtWrite) + sizeof(protocolVersionMinRequired);
610}
611
612
614 const void *buffer, std::uint32_t bufSize)
615{
616 std::uint32_t crc32;
617 return R__FORWARD_RESULT(DeserializeEnvelope(buffer, bufSize, crc32));
618}
619
620
622{
623 // Marker: multiply the final size with 1
624 return SerializeInt32(1, buffer);
625}
626
627
629 std::uint32_t nitems, void *buffer)
630{
631 if (nitems >= (1 << 28))
632 throw RException(R__FAIL("list frame too large: " + std::to_string(nitems)));
633
634 auto base = reinterpret_cast<unsigned char *>(buffer);
635 auto pos = base;
636 void** where = (buffer == nullptr) ? &buffer : reinterpret_cast<void**>(&pos);
637
638 // Marker: multiply the final size with -1
639 pos += RNTupleSerializer::SerializeInt32(-1, *where);
640 pos += SerializeUInt32(nitems, *where);
641 return pos - base;
642}
643
645 void *frame, std::int32_t size)
646{
647 if (size < 0)
648 throw RException(R__FAIL("frame too large: " + std::to_string(size)));
649 if (size < static_cast<std::int32_t>(sizeof(std::int32_t)))
650 throw RException(R__FAIL("frame too short: " + std::to_string(size)));
651 if (frame) {
652 std::int32_t marker;
653 DeserializeInt32(frame, marker);
654 if ((marker < 0) && (size < static_cast<std::int32_t>(2 * sizeof(std::int32_t))))
655 throw RException(R__FAIL("frame too short: " + std::to_string(size)));
656
657 SerializeInt32(marker * size, frame);
658 }
659 return 0;
660}
661
663 const void *buffer, std::uint32_t bufSize, std::uint32_t &frameSize, std::uint32_t &nitems)
664{
665 if (bufSize < sizeof(std::int32_t))
666 return R__FAIL("frame too short");
667
668 std::int32_t *ssize = reinterpret_cast<std::int32_t *>(&frameSize);
669 auto bytes = reinterpret_cast<const unsigned char *>(buffer);
670 bytes += DeserializeInt32(bytes, *ssize);
671 if (*ssize >= 0) {
672 // Record frame
673 nitems = 1;
674 if (frameSize < sizeof(std::int32_t))
675 return R__FAIL("corrupt record frame size");
676 } else {
677 // List frame
678 if (bufSize < 2 * sizeof(std::int32_t))
679 return R__FAIL("frame too short");
680 bytes += DeserializeUInt32(bytes, nitems);
681 nitems &= (2 << 28) - 1;
682 *ssize = -(*ssize);
683 if (frameSize < 2 * sizeof(std::int32_t))
684 return R__FAIL("corrupt list frame size");
685 }
686
687 if (bufSize < frameSize)
688 return R__FAIL("frame too short");
689
690 return bytes - reinterpret_cast<const unsigned char *>(buffer);
691}
692
694 const void *buffer, std::uint32_t bufSize, std::uint32_t &frameSize)
695{
696 std::uint32_t nitems;
697 return R__FORWARD_RESULT(DeserializeFrameHeader(buffer, bufSize, frameSize, nitems));
698}
699
701 const std::vector<std::int64_t> &flags, void *buffer)
702{
703 if (flags.empty())
704 return SerializeInt64(0, buffer);
705
706 if (buffer) {
707 auto bytes = reinterpret_cast<unsigned char *>(buffer);
708
709 for (unsigned i = 0; i < flags.size(); ++i) {
710 if (flags[i] < 0)
711 throw RException(R__FAIL("feature flag out of bounds"));
712
713 // The MSb indicates that another Int64 follows; set this bit to 1 for all except the last element
714 if (i == (flags.size() - 1))
715 SerializeInt64(flags[i], bytes);
716 else
717 bytes += SerializeInt64(flags[i] | 0x8000000000000000, bytes);
718 }
719 }
720 return (flags.size() * sizeof(std::int64_t));
721}
722
724 const void *buffer, std::uint32_t bufSize, std::vector<std::int64_t> &flags)
725{
726 auto bytes = reinterpret_cast<const unsigned char *>(buffer);
727
728 flags.clear();
729 std::int64_t f;
730 do {
731 if (bufSize < sizeof(std::int64_t))
732 return R__FAIL("feature flag buffer too short");
733 bytes += DeserializeInt64(bytes, f);
734 bufSize -= sizeof(std::int64_t);
735 flags.emplace_back(f & ~0x8000000000000000);
736 } while (f < 0);
737
738 return (flags.size() * sizeof(std::int64_t));
739}
740
742 const RNTupleLocator &locator, void *buffer)
743{
744 std::uint32_t size = 0;
745 if (!locator.fUrl.empty()) {
746 if (locator.fUrl.length() >= (1 << 24))
747 throw RException(R__FAIL("locator too large"));
748 std::int32_t head = locator.fUrl.length();
749 head |= 0x02 << 24;
750 head = -head;
751 size += SerializeInt32(head, buffer);
752 if (buffer)
753 memcpy(reinterpret_cast<unsigned char *>(buffer) + size, locator.fUrl.data(), locator.fUrl.length());
754 size += locator.fUrl.length();
755 return size;
756 }
757
758 if (static_cast<std::int32_t>(locator.fBytesOnStorage) < 0)
759 throw RException(R__FAIL("locator too large"));
760 size += SerializeUInt32(locator.fBytesOnStorage, buffer);
761 size += SerializeUInt64(locator.fPosition, buffer ? reinterpret_cast<unsigned char *>(buffer) + size : nullptr);
762 return size;
763}
764
766 const void *buffer, std::uint32_t bufSize, RNTupleLocator &locator)
767{
768 if (bufSize < sizeof(std::int32_t))
769 return R__FAIL("too short locator");
770
771 auto bytes = reinterpret_cast<const unsigned char *>(buffer);
772 std::int32_t head;
773
774 bytes += DeserializeInt32(bytes, head);
775 bufSize -= sizeof(std::int32_t);
776 if (head < 0) {
777 head = -head;
778 int type = head >> 24;
779 if (type != 0x02)
780 return R__FAIL("unsupported locator type: " + std::to_string(type));
781 std::uint32_t locatorSize = static_cast<std::uint32_t>(head) & 0x00FFFFFF;
782 if (bufSize < locatorSize)
783 return R__FAIL("too short locator");
784 locator.fBytesOnStorage = 0;
785 locator.fPosition = 0;
786 locator.fUrl.resize(locatorSize);
787 memcpy(&locator.fUrl[0], bytes, locatorSize);
788 bytes += locatorSize;
789 } else {
790 if (bufSize < sizeof(std::uint64_t))
791 return R__FAIL("too short locator");
792 std::uint64_t offset;
793 bytes += DeserializeUInt64(bytes, offset);
794 locator.fUrl.clear();
795 locator.fBytesOnStorage = head;
796 locator.fPosition = offset;
797 }
798
799 return bytes - reinterpret_cast<const unsigned char *>(buffer);
800}
801
803 const REnvelopeLink &envelopeLink, void *buffer)
804{
805 auto size = SerializeUInt32(envelopeLink.fUnzippedSize, buffer);
806 size += SerializeLocator(envelopeLink.fLocator,
807 buffer ? reinterpret_cast<unsigned char *>(buffer) + size : nullptr);
808 return size;
809}
810
812 const void *buffer, std::uint32_t bufSize, REnvelopeLink &envelopeLink)
813{
814 if (bufSize < sizeof(std::int32_t))
815 return R__FAIL("too short envelope link");
816
817 auto bytes = reinterpret_cast<const unsigned char *>(buffer);
818 bytes += DeserializeUInt32(bytes, envelopeLink.fUnzippedSize);
819 bufSize -= sizeof(std::uint32_t);
820 auto result = DeserializeLocator(bytes, bufSize, envelopeLink.fLocator);
821 if (!result)
822 return R__FORWARD_ERROR(result);
823 bytes += result.Unwrap();
824 return bytes - reinterpret_cast<const unsigned char *>(buffer);
825}
826
827
829 const RClusterSummary &clusterSummary, void *buffer)
830{
831 auto base = reinterpret_cast<unsigned char *>(buffer);
832 auto pos = base;
833 void** where = (buffer == nullptr) ? &buffer : reinterpret_cast<void**>(&pos);
834
835 auto frame = pos;
836 pos += SerializeRecordFramePreamble(*where);
837 pos += SerializeUInt64(clusterSummary.fFirstEntry, *where);
838 if (clusterSummary.fColumnGroupID >= 0) {
839 pos += SerializeInt64(-static_cast<int64_t>(clusterSummary.fNEntries), *where);
840 pos += SerializeUInt32(clusterSummary.fColumnGroupID, *where);
841 } else {
842 pos += SerializeInt64(static_cast<int64_t>(clusterSummary.fNEntries), *where);
843 }
844 auto size = pos - frame;
845 pos += SerializeFramePostscript(frame, size);
846 return size;
847}
848
849
851 const void *buffer, std::uint32_t bufSize, RClusterSummary &clusterSummary)
852{
853 auto base = reinterpret_cast<const unsigned char *>(buffer);
854 auto bytes = base;
855 std::uint32_t frameSize;
856 auto result = DeserializeFrameHeader(bytes, bufSize, frameSize);
857 if (!result)
858 return R__FORWARD_ERROR(result);
859 bytes += result.Unwrap();
860
861 auto fnBufSizeLeft = [&]() { return frameSize - static_cast<std::uint32_t>(bytes - base); };
862 if (fnBufSizeLeft() < 2 * sizeof(std::uint64_t))
863 return R__FAIL("too short cluster summary");
864
865 bytes += DeserializeUInt64(bytes, clusterSummary.fFirstEntry);
866 std::int64_t nEntries;
867 bytes += DeserializeInt64(bytes, nEntries);
868
869 if (nEntries < 0) {
870 if (fnBufSizeLeft() < sizeof(std::uint32_t))
871 return R__FAIL("too short cluster summary");
872 clusterSummary.fNEntries = -nEntries;
873 std::uint32_t columnGroupID;
874 bytes += DeserializeUInt32(bytes, columnGroupID);
875 clusterSummary.fColumnGroupID = columnGroupID;
876 } else {
877 clusterSummary.fNEntries = nEntries;
878 clusterSummary.fColumnGroupID = -1;
879 }
880
881 return frameSize;
882}
883
884
886 const RClusterGroup &clusterGroup, void *buffer)
887{
888 auto base = reinterpret_cast<unsigned char *>(buffer);
889 auto pos = base;
890 void** where = (buffer == nullptr) ? &buffer : reinterpret_cast<void**>(&pos);
891
892 auto frame = pos;
893 pos += SerializeRecordFramePreamble(*where);
894 pos += SerializeUInt32(clusterGroup.fNClusters, *where);
895 pos += SerializeEnvelopeLink(clusterGroup.fPageListEnvelopeLink, *where);
896 auto size = pos - frame;
897 pos += SerializeFramePostscript(frame, size);
898 return size;
899}
900
901
903 const void *buffer, std::uint32_t bufSize, RClusterGroup &clusterGroup)
904{
905 auto base = reinterpret_cast<const unsigned char *>(buffer);
906 auto bytes = base;
907
908 std::uint32_t frameSize;
909 auto result = DeserializeFrameHeader(bytes, bufSize, frameSize);
910 if (!result)
911 return R__FORWARD_ERROR(result);
912 bytes += result.Unwrap();
913
914 auto fnFrameSizeLeft = [&]() { return frameSize - static_cast<std::uint32_t>(bytes - base); };
915 if (fnFrameSizeLeft() < sizeof(std::uint32_t))
916 return R__FAIL("too short cluster group");
917
918 bytes += DeserializeUInt32(bytes, clusterGroup.fNClusters);
919 result = DeserializeEnvelopeLink(bytes, fnFrameSizeLeft(), clusterGroup.fPageListEnvelopeLink);
920 if (!result)
921 return R__FORWARD_ERROR(result);
922
923 return frameSize;
924}
925
926
929 void *buffer, const ROOT::Experimental::RNTupleDescriptor &desc)
930{
931 RContext context;
932
933 auto base = reinterpret_cast<unsigned char *>(buffer);
934 auto pos = base;
935 void** where = (buffer == nullptr) ? &buffer : reinterpret_cast<void**>(&pos);
936
937 pos += SerializeEnvelopePreamble(*where);
938 // So far we don't make use of feature flags
939 pos += SerializeFeatureFlags(std::vector<std::int64_t>(), *where);
940 pos += SerializeUInt32(kReleaseCandidateTag, *where);
941 pos += SerializeString(desc.GetName(), *where);
942 pos += SerializeString(desc.GetDescription(), *where);
943 pos += SerializeString(std::string("ROOT v") + ROOT_RELEASE, *where);
944
945 auto frame = pos;
946 R__ASSERT(desc.GetNFields() > 0); // we must have at least a zero field
947 pos += SerializeListFramePreamble(desc.GetNFields() - 1, *where);
948 pos += SerializeFieldTree(desc, context, *where);
949 pos += SerializeFramePostscript(buffer ? frame : nullptr, pos - frame);
950
951 frame = pos;
952 pos += SerializeListFramePreamble(desc.GetNColumns(), *where);
953 pos += SerializeColumnListV1(desc, context, *where);
954 pos += SerializeFramePostscript(buffer ? frame : nullptr, pos - frame);
955
956 // We don't use alias columns yet
957 frame = pos;
958 pos += SerializeListFramePreamble(0, *where);
959 pos += SerializeFramePostscript(buffer ? frame : nullptr, pos - frame);
960
961 // We don't use extra type information yet
962 frame = pos;
963 pos += SerializeListFramePreamble(0, *where);
964 pos += SerializeFramePostscript(buffer ? frame : nullptr, pos - frame);
965
966 std::uint32_t size = pos - base;
967 std::uint32_t crc32 = 0;
968 size += SerializeEnvelopePostscript(base, size, crc32, *where);
969
970 context.SetHeaderSize(size);
971 context.SetHeaderCRC32(crc32);
972 return context;
973}
974
976 void *buffer, const RNTupleDescriptor &desc, std::span<DescriptorId_t> physClusterIDs, const RContext &context)
977{
978 auto base = reinterpret_cast<unsigned char *>(buffer);
979 auto pos = base;
980 void** where = (buffer == nullptr) ? &buffer : reinterpret_cast<void**>(&pos);
981
982 pos += SerializeEnvelopePreamble(*where);
983 auto topMostFrame = pos;
984 pos += SerializeListFramePreamble(physClusterIDs.size(), *where);
985
986 for (auto clusterId : physClusterIDs) {
987 const auto &clusterDesc = desc.GetClusterDescriptor(context.GetMemClusterId(clusterId));
988 // Get an ordered set of physical column ids
989 std::set<DescriptorId_t> physColumnIds;
990 for (auto column : clusterDesc.GetColumnIds())
991 physColumnIds.insert(context.GetPhysColumnId(column));
992
993 auto outerFrame = pos;
994 pos += SerializeListFramePreamble(physColumnIds.size(), *where);
995 for (auto physId : physColumnIds) {
996 auto memId = context.GetMemColumnId(physId);
997 const auto &columnRange = clusterDesc.GetColumnRange(memId);
998 const auto &pageRange = clusterDesc.GetPageRange(memId);
999
1000 auto innerFrame = pos;
1001 pos += SerializeListFramePreamble(pageRange.fPageInfos.size(), *where);
1002
1003 for (const auto &pi : pageRange.fPageInfos) {
1004 pos += SerializeUInt32(pi.fNElements, *where);
1005 pos += SerializeLocator(pi.fLocator, *where);
1006 }
1007 pos += SerializeUInt64(columnRange.fFirstElementIndex, *where);
1008 pos += SerializeUInt32(columnRange.fCompressionSettings, *where);
1009
1010 pos += SerializeFramePostscript(buffer ? innerFrame : nullptr, pos - innerFrame);
1011 }
1012 pos += SerializeFramePostscript(buffer ? outerFrame : nullptr, pos - outerFrame);
1013 }
1014
1015 pos += SerializeFramePostscript(buffer ? topMostFrame : nullptr, pos - topMostFrame);
1016 std::uint32_t size = pos - base;
1017 size += SerializeEnvelopePostscript(base, size, *where);
1018 return size;
1019}
1020
1022 void *buffer, const ROOT::Experimental::RNTupleDescriptor &desc, const RContext &context)
1023{
1024 auto base = reinterpret_cast<unsigned char *>(buffer);
1025 auto pos = base;
1026 void** where = (buffer == nullptr) ? &buffer : reinterpret_cast<void**>(&pos);
1027
1028 pos += SerializeEnvelopePreamble(*where);
1029
1030 // So far we don't make use of feature flags
1031 pos += SerializeFeatureFlags(std::vector<std::int64_t>(), *where);
1032 pos += SerializeUInt32(context.GetHeaderCRC32(), *where);
1033
1034 // So far no support for extension headers
1035 auto frame = pos;
1036 pos += SerializeListFramePreamble(0, *where);
1037 pos += SerializeFramePostscript(buffer ? frame : nullptr, pos - frame);
1038
1039 // So far no support for shared clusters (no column groups)
1040 frame = pos;
1041 pos += SerializeListFramePreamble(0, *where);
1042 pos += SerializeFramePostscript(buffer ? frame : nullptr, pos - frame);
1043
1044 // Cluster summaries
1045 const auto nClusters = desc.GetNClusters();
1046 frame = pos;
1047 pos += SerializeListFramePreamble(nClusters, *where);
1048 for (unsigned int i = 0; i < nClusters; ++i) {
1049 const auto &clusterDesc = desc.GetClusterDescriptor(context.GetMemClusterId(i));
1050 RClusterSummary summary{clusterDesc.GetFirstEntryIndex(), clusterDesc.GetNEntries(), -1};
1051 pos += SerializeClusterSummary(summary, *where);
1052 }
1053 pos += SerializeFramePostscript(buffer ? frame : nullptr, pos - frame);
1054
1055 // Cluster groups
1056 const auto &clusterGroups = context.GetClusterGroups();
1057 const auto nClusterGroups = clusterGroups.size();
1058 frame = pos;
1059 pos += SerializeListFramePreamble(nClusterGroups, *where);
1060 for (unsigned int i = 0; i < nClusterGroups; ++i) {
1061 pos += SerializeClusterGroup(clusterGroups[i], *where);
1062 }
1063 pos += SerializeFramePostscript(buffer ? frame : nullptr, pos - frame);
1064
1065 // So far no support for meta-data
1066 frame = pos;
1067 pos += SerializeListFramePreamble(0, *where);
1068 pos += SerializeFramePostscript(buffer ? frame : nullptr, pos - frame);
1069
1070 std::uint32_t size = pos - base;
1071 size += SerializeEnvelopePostscript(base, size, *where);
1072 return size;
1073}
1074
1076 const void *buffer, std::uint32_t bufSize, RNTupleDescriptorBuilder &descBuilder)
1077{
1078 auto base = reinterpret_cast<const unsigned char *>(buffer);
1079 auto bytes = base;
1080 auto fnBufSizeLeft = [&]() { return bufSize - (bytes - base); };
1081 RResult<std::uint32_t> result{0};
1082
1083 std::uint32_t crc32{0};
1084 result = DeserializeEnvelope(bytes, fnBufSizeLeft(), crc32);
1085 if (!result)
1086 return R__FORWARD_ERROR(result);
1087 bytes += result.Unwrap();
1088 descBuilder.SetHeaderCRC32(crc32);
1089
1090 std::vector<std::int64_t> featureFlags;
1091 result = DeserializeFeatureFlags(bytes, fnBufSizeLeft(), featureFlags);
1092 if (!result)
1093 return R__FORWARD_ERROR(result);
1094 bytes += result.Unwrap();
1095 for (auto f: featureFlags) {
1096 if (f)
1097 R__LOG_WARNING(NTupleLog()) << "Unsupported feature flag! " << f;
1098 }
1099
1100 std::uint32_t rcTag;
1101 if (fnBufSizeLeft() < static_cast<int>(sizeof(std::uint32_t)))
1102 return R__FAIL("header too short");
1103 bytes += DeserializeUInt32(bytes, rcTag);
1104 if (rcTag > 0) {
1105 R__LOG_WARNING(NTupleLog()) << "Pre-release format version: RC " << rcTag;
1106 }
1107
1108 std::string name;
1109 std::string description;
1110 std::string writer;
1111 result = DeserializeString(bytes, fnBufSizeLeft(), name);
1112 if (!result)
1113 return R__FORWARD_ERROR(result);
1114 bytes += result.Unwrap();
1115 result = DeserializeString(bytes, fnBufSizeLeft(), description);
1116 if (!result)
1117 return R__FORWARD_ERROR(result);
1118 bytes += result.Unwrap();
1119 result = DeserializeString(bytes, fnBufSizeLeft(), writer);
1120 if (!result)
1121 return R__FORWARD_ERROR(result);
1122 bytes += result.Unwrap();
1123 descBuilder.SetNTuple(name, description, "", RNTupleVersion(), RNTupleUuid());
1124
1125 std::uint32_t frameSize;
1126 auto frame = bytes;
1127 auto fnFrameSizeLeft = [&]() { return frameSize - (bytes - frame); };
1128
1129 std::uint32_t nFields;
1130 result = DeserializeFrameHeader(bytes, fnBufSizeLeft(), frameSize, nFields);
1131 if (!result)
1132 return R__FORWARD_ERROR(result);
1133 bytes += result.Unwrap();
1134 // Zero field
1135 descBuilder.AddField(RFieldDescriptorBuilder()
1136 .FieldId(kZeroFieldId)
1137 .Structure(ENTupleStructure::kRecord)
1138 .MakeDescriptor()
1139 .Unwrap());
1140 for (std::uint32_t fieldId = 0; fieldId < nFields; ++fieldId) {
1141 RFieldDescriptorBuilder fieldBuilder;
1142 result = DeserializeFieldV1(bytes, fnFrameSizeLeft(), fieldBuilder);
1143 if (!result)
1144 return R__FORWARD_ERROR(result);
1145 bytes += result.Unwrap();
1146 if (fieldId == fieldBuilder.GetParentId())
1147 fieldBuilder.ParentId(kZeroFieldId);
1148 auto fieldDesc = fieldBuilder.FieldId(fieldId).MakeDescriptor();
1149 if (!fieldDesc)
1150 return R__FORWARD_ERROR(fieldDesc);
1151 auto parentId = fieldDesc.Inspect().GetParentId();
1152 descBuilder.AddField(fieldDesc.Unwrap());
1153 auto resVoid = descBuilder.AddFieldLink(parentId, fieldId);
1154 if (!resVoid)
1155 return R__FORWARD_ERROR(resVoid);
1156 }
1157 bytes = frame + frameSize;
1158
1159 std::uint32_t nColumns;
1160 frame = bytes;
1161 result = DeserializeFrameHeader(bytes, fnBufSizeLeft(), frameSize, nColumns);
1162 if (!result)
1163 return R__FORWARD_ERROR(result);
1164 bytes += result.Unwrap();
1165 std::unordered_map<DescriptorId_t, std::uint32_t> maxIndexes;
1166 for (std::uint32_t columnId = 0; columnId < nColumns; ++columnId) {
1167 RColumnDescriptorBuilder columnBuilder;
1168 result = DeserializeColumnV1(bytes, fnFrameSizeLeft(), columnBuilder);
1169 if (!result)
1170 return R__FORWARD_ERROR(result);
1171 bytes += result.Unwrap();
1172
1173 std::uint32_t idx = 0;
1174 const auto fieldId = columnBuilder.GetFieldId();
1175 auto maxIdx = maxIndexes.find(fieldId);
1176 if (maxIdx != maxIndexes.end())
1177 idx = maxIdx->second + 1;
1178 maxIndexes[fieldId] = idx;
1179
1180 auto columnDesc = columnBuilder.Index(idx).ColumnId(columnId).MakeDescriptor();
1181 if (!columnDesc)
1182 return R__FORWARD_ERROR(columnDesc);
1183 auto resVoid = descBuilder.AddColumn(columnDesc.Unwrap());
1184 if (!resVoid)
1185 return R__FORWARD_ERROR(resVoid);
1186 }
1187 bytes = frame + frameSize;
1188
1189 std::uint32_t nAliasColumns;
1190 frame = bytes;
1191 result = DeserializeFrameHeader(bytes, fnBufSizeLeft(), frameSize, nAliasColumns);
1192 if (!result)
1193 return R__FORWARD_ERROR(result);
1194 bytes += result.Unwrap();
1195 if (nAliasColumns > 0)
1196 R__LOG_WARNING(NTupleLog()) << "Alias columns are still unsupported! ";
1197
1198 std::uint32_t nTypeInfo;
1199 frame = bytes;
1200 result = DeserializeFrameHeader(bytes, fnBufSizeLeft(), frameSize, nTypeInfo);
1201 if (!result)
1202 return R__FORWARD_ERROR(result);
1203 bytes += result.Unwrap();
1204 if (nTypeInfo > 0)
1205 R__LOG_WARNING(NTupleLog()) << "Extra type information is still unsupported! ";
1206
1207 return RResult<void>::Success();
1208}
1209
1210
1212 const void *buffer, std::uint32_t bufSize, RNTupleDescriptorBuilder &descBuilder)
1213{
1214 auto base = reinterpret_cast<const unsigned char *>(buffer);
1215 auto bytes = base;
1216 auto fnBufSizeLeft = [&]() { return bufSize - (bytes - base); };
1217 RResult<std::uint32_t> result{0};
1218
1219 result = DeserializeEnvelope(bytes, fnBufSizeLeft());
1220 if (!result)
1221 return R__FORWARD_ERROR(result);
1222 bytes += result.Unwrap();
1223
1224 std::vector<std::int64_t> featureFlags;
1225 result = DeserializeFeatureFlags(bytes, fnBufSizeLeft(), featureFlags);
1226 if (!result)
1227 return R__FORWARD_ERROR(result);
1228 bytes += result.Unwrap();
1229 for (auto f: featureFlags) {
1230 if (f)
1231 R__LOG_WARNING(NTupleLog()) << "Unsupported feature flag! " << f;
1232 }
1233
1234 std::uint32_t crc32{0};
1235 if (fnBufSizeLeft() < static_cast<int>(sizeof(std::uint32_t)))
1236 return R__FAIL("footer too short");
1237 bytes += DeserializeUInt32(bytes, crc32);
1238 if (crc32 != descBuilder.GetHeaderCRC32())
1239 return R__FAIL("CRC32 mismatch between header and footer");
1240
1241 std::uint32_t frameSize;
1242 auto frame = bytes;
1243 auto fnFrameSizeLeft = [&]() { return frameSize - (bytes - frame); };
1244
1245 std::uint32_t nXHeaders;
1246 result = DeserializeFrameHeader(bytes, fnBufSizeLeft(), frameSize, nXHeaders);
1247 if (!result)
1248 return R__FORWARD_ERROR(result);
1249 if (nXHeaders > 0)
1250 R__LOG_WARNING(NTupleLog()) << "extension headers are still unsupported";
1251 bytes = frame + frameSize;
1252
1253 std::uint32_t nColumnGroups;
1254 frame = bytes;
1255 result = DeserializeFrameHeader(bytes, fnBufSizeLeft(), frameSize, nColumnGroups);
1256 if (!result)
1257 return R__FORWARD_ERROR(result);
1258 if (nColumnGroups > 0)
1259 return R__FAIL("sharded clusters are still unsupported");
1260 bytes = frame + frameSize;
1261
1262 std::uint32_t nClusterSummaries;
1263 frame = bytes;
1264 result = DeserializeFrameHeader(bytes, fnBufSizeLeft(), frameSize, nClusterSummaries);
1265 if (!result)
1266 return R__FORWARD_ERROR(result);
1267 bytes += result.Unwrap();
1268 for (std::uint32_t clusterId = 0; clusterId < nClusterSummaries; ++clusterId) {
1269 RClusterSummary clusterSummary;
1270 result = DeserializeClusterSummary(bytes, fnFrameSizeLeft(), clusterSummary);
1271 if (!result)
1272 return R__FORWARD_ERROR(result);
1273 bytes += result.Unwrap();
1274 if (clusterSummary.fColumnGroupID >= 0)
1275 return R__FAIL("sharded clusters are still unsupported");
1276 descBuilder.AddClusterSummary(clusterSummary);
1277 }
1278 bytes = frame + frameSize;
1279
1280 std::uint32_t nClusterGroups;
1281 frame = bytes;
1282 result = DeserializeFrameHeader(bytes, fnBufSizeLeft(), frameSize, nClusterGroups);
1283 if (!result)
1284 return R__FORWARD_ERROR(result);
1285 bytes += result.Unwrap();
1286 for (std::uint32_t groupId = 0; groupId < nClusterGroups; ++groupId) {
1287 RClusterGroup clusterGroup;
1288 result = DeserializeClusterGroup(bytes, fnFrameSizeLeft(), clusterGroup);
1289 if (!result)
1290 return R__FORWARD_ERROR(result);
1291 bytes += result.Unwrap();
1292 descBuilder.AddClusterGroup(clusterGroup);
1293 }
1294 bytes = frame + frameSize;
1295
1296 std::uint32_t nMDBlocks;
1297 frame = bytes;
1298 result = DeserializeFrameHeader(bytes, fnBufSizeLeft(), frameSize, nMDBlocks);
1299 if (!result)
1300 return R__FORWARD_ERROR(result);
1301 if (nMDBlocks > 0)
1302 R__LOG_WARNING(NTupleLog()) << "meta-data blocks are still unsupported";
1303 bytes = frame + frameSize;
1304
1305 return RResult<void>::Success();
1306}
1307
1308
1310 const void *buffer, std::uint32_t bufSize, std::vector<RClusterDescriptorBuilder> &clusters)
1311{
1312 auto base = reinterpret_cast<const unsigned char *>(buffer);
1313 auto bytes = base;
1314 auto fnBufSizeLeft = [&]() { return bufSize - (bytes - base); };
1315 RResult<std::uint32_t> result{0};
1316
1317 result = DeserializeEnvelope(bytes, fnBufSizeLeft());
1318 if (!result)
1319 return R__FORWARD_ERROR(result);
1320 bytes += result.Unwrap();
1321
1322 std::uint32_t topMostFrameSize;
1323 auto topMostFrame = bytes;
1324 auto fnTopMostFrameSizeLeft = [&]() { return topMostFrameSize - (bytes - topMostFrame); };
1325
1326 std::uint32_t nClusters;
1327 result = DeserializeFrameHeader(bytes, fnBufSizeLeft(), topMostFrameSize, nClusters);
1328 if (!result)
1329 return R__FORWARD_ERROR(result);
1330 bytes += result.Unwrap();
1331
1332 for (std::uint32_t i = 0; i < nClusters; ++i) {
1333 std::uint32_t outerFrameSize;
1334 auto outerFrame = bytes;
1335 auto fnOuterFrameSizeLeft = [&]() { return outerFrameSize - (bytes - outerFrame); };
1336
1337 RClusterDescriptorBuilder clusterBuilder;
1338
1339 std::uint32_t nColumns;
1340 result = DeserializeFrameHeader(bytes, fnTopMostFrameSizeLeft(), outerFrameSize, nColumns);
1341 if (!result)
1342 return R__FORWARD_ERROR(result);
1343 bytes += result.Unwrap();
1344
1345 for (std::uint32_t j = 0; j < nColumns; ++j) {
1346 std::uint32_t innerFrameSize;
1347 auto innerFrame = bytes;
1348 auto fnInnerFrameSizeLeft = [&]() { return innerFrameSize - (bytes - innerFrame); };
1349
1350 std::uint32_t nPages;
1351 result = DeserializeFrameHeader(bytes, fnOuterFrameSizeLeft(), innerFrameSize, nPages);
1352 if (!result)
1353 return R__FORWARD_ERROR(result);
1354 bytes += result.Unwrap();
1355
1357 pageRange.fColumnId = j;
1358 for (std::uint32_t k = 0; k < nPages; ++k) {
1359 if (fnInnerFrameSizeLeft() < static_cast<int>(sizeof(std::uint32_t)))
1360 return R__FAIL("inner frame too short");
1361 std::uint32_t nElements;
1362 RNTupleLocator locator;
1363 bytes += DeserializeUInt32(bytes, nElements);
1364 result = DeserializeLocator(bytes, fnInnerFrameSizeLeft(), locator);
1365 if (!result)
1366 return R__FORWARD_ERROR(result);
1367 pageRange.fPageInfos.push_back({ClusterSize_t(nElements), locator});
1368 bytes += result.Unwrap();
1369 }
1370
1371 if (fnInnerFrameSizeLeft() < static_cast<int>(sizeof(std::uint32_t) + sizeof(std::uint64_t)))
1372 return R__FAIL("page list frame too short");
1373 std::uint64_t columnOffset;
1374 bytes += DeserializeUInt64(bytes, columnOffset);
1375 std::uint32_t compressionSettings;
1376 bytes += DeserializeUInt32(bytes, compressionSettings);
1377
1378 clusterBuilder.CommitColumnRange(j, columnOffset, compressionSettings, pageRange);
1379 bytes = innerFrame + innerFrameSize;
1380 }
1381
1382 clusters.emplace_back(std::move(clusterBuilder));
1383 bytes = outerFrame + outerFrameSize;
1384 }
1385
1386 return RResult<void>::Success();
1387}
#define R__FORWARD_ERROR(res)
Short-hand to return an RResult<T> in an error state (i.e. after checking)
Definition RError.hxx:295
#define R__FORWARD_RESULT(res)
Short-hand to return an RResult<T> value from a subroutine to the calling stack frame.
Definition RError.hxx:293
#define R__FAIL(msg)
Short-hand to return an RResult<T> in an error state; the RError is implicitly converted into RResult...
Definition RError.hxx:291
#define R__LOG_WARNING(...)
Definition RLogger.hxx:363
#define f(i)
Definition RSha256.hxx:104
#define c(i)
Definition RSha256.hxx:101
#define ROOT_RELEASE
Definition RVersion.h:17
size_t size(const MatrixT &matrix)
retrieve the size of a square matrix
#define R__ASSERT(e)
Definition TError.h:118
char name[80]
Definition TGX11.cxx:110
int type
Definition TGX11.cxx:121
The available trivial, native content types of a column.
The serialization context is used for the piecewise serialization of a descriptor.
DescriptorId_t GetMemClusterId(DescriptorId_t physId) const
const std::vector< RClusterGroup > & GetClusterGroups() const
DescriptorId_t GetPhysColumnId(DescriptorId_t memId) const
DescriptorId_t GetPhysFieldId(DescriptorId_t memId) const
DescriptorId_t GetMemColumnId(DescriptorId_t physId) const
A helper class for serializing and deserialization of the RNTuple binary format.
static RResult< std::uint32_t > DeserializeString(const void *buffer, std::uint32_t bufSize, std::string &val)
static std::uint32_t SerializeFeatureFlags(const std::vector< std::int64_t > &flags, void *buffer)
static std::uint32_t SerializePageListV1(void *buffer, const RNTupleDescriptor &desc, std::span< DescriptorId_t > physClusterIDs, const RContext &context)
static std::uint32_t SerializeListFramePreamble(std::uint32_t nitems, void *buffer)
static RResult< std::uint32_t > DeserializeLocator(const void *buffer, std::uint32_t bufSize, RNTupleLocator &locator)
static std::uint32_t SerializeCRC32(const unsigned char *data, std::uint32_t length, std::uint32_t &crc32, void *buffer)
Writes a CRC32 checksum of the byte range given by data and length.
static std::uint16_t SerializeColumnType(ROOT::Experimental::EColumnType type, void *buffer)
static std::uint32_t DeserializeUInt16(const void *buffer, std::uint16_t &val)
static RResult< void > DeserializePageListV1(const void *buffer, std::uint32_t bufSize, std::vector< RClusterDescriptorBuilder > &clusters)
static std::uint32_t SerializeString(const std::string &val, void *buffer)
static RResult< std::uint32_t > DeserializeEnvelope(const void *buffer, std::uint32_t bufSize)
static std::uint32_t DeserializeUInt32(const void *buffer, std::uint32_t &val)
static std::uint32_t SerializeUInt64(std::uint64_t val, void *buffer)
static std::uint32_t DeserializeInt16(const void *buffer, std::int16_t &val)
static std::uint32_t SerializeClusterSummary(const RClusterSummary &clusterSummary, void *buffer)
static RContext SerializeHeaderV1(void *buffer, const RNTupleDescriptor &desc)
static RResult< void > DeserializeFooterV1(const void *buffer, std::uint32_t bufSize, RNTupleDescriptorBuilder &descBuilder)
static std::uint32_t SerializeInt16(std::int16_t val, void *buffer)
static std::uint32_t SerializeLocator(const RNTupleLocator &locator, void *buffer)
static std::uint32_t SerializeInt32(std::int32_t val, void *buffer)
static std::uint32_t SerializeEnvelopePreamble(void *buffer)
Currently all enevelopes have the same version number (1).
static RResult< std::uint16_t > DeserializeColumnType(const void *buffer, ROOT::Experimental::EColumnType &type)
static RResult< std::uint32_t > DeserializeClusterGroup(const void *buffer, std::uint32_t bufSize, RClusterGroup &clusterGroup)
static std::uint32_t DeserializeUInt64(const void *buffer, std::uint64_t &val)
static std::uint32_t DeserializeInt32(const void *buffer, std::int32_t &val)
static std::uint32_t DeserializeInt64(const void *buffer, std::int64_t &val)
static RResult< std::uint32_t > DeserializeEnvelopeLink(const void *buffer, std::uint32_t bufSize, REnvelopeLink &envelopeLink)
static std::uint32_t SerializeEnvelopePostscript(const unsigned char *envelope, std::uint32_t size, void *buffer)
static RResult< std::uint16_t > DeserializeFieldStructure(const void *buffer, ROOT::Experimental::ENTupleStructure &structure)
static std::uint32_t SerializeEnvelopeLink(const REnvelopeLink &envelopeLink, void *buffer)
static std::uint32_t SerializeRecordFramePreamble(void *buffer)
static std::uint32_t SerializeUInt16(std::uint16_t val, void *buffer)
static RResult< std::uint32_t > DeserializeFrameHeader(const void *buffer, std::uint32_t bufSize, std::uint32_t &frameSize, std::uint32_t &nitems)
static RResult< std::uint32_t > DeserializeFeatureFlags(const void *buffer, std::uint32_t bufSize, std::vector< std::int64_t > &flags)
static std::uint32_t SerializeClusterGroup(const RClusterGroup &clusterGroup, void *buffer)
static std::uint32_t SerializeFramePostscript(void *frame, std::int32_t size)
static std::uint32_t SerializeFooterV1(void *buffer, const RNTupleDescriptor &desc, const RContext &context)
static std::uint32_t SerializeInt64(std::int64_t val, void *buffer)
static RResult< void > VerifyCRC32(const unsigned char *data, std::uint32_t length, std::uint32_t &crc32)
Expects a CRC32 checksum in the 4 bytes following data + length and verifies it.
static std::uint16_t SerializeFieldStructure(ROOT::Experimental::ENTupleStructure structure, void *buffer)
While we could just interpret the enums as ints, we make the translation explicit in order to avoid a...
static std::uint32_t SerializeUInt32(std::uint32_t val, void *buffer)
static RResult< void > DeserializeHeaderV1(const void *buffer, std::uint32_t bufSize, RNTupleDescriptorBuilder &descBuilder)
static RResult< std::uint32_t > DeserializeClusterSummary(const void *buffer, std::uint32_t bufSize, RClusterSummary &clusterSummary)
A helper class for piece-wise construction of an RClusterDescriptor.
RResult< void > CommitColumnRange(DescriptorId_t columnId, std::uint64_t firstElementIndex, std::uint32_t compressionSettings, const RClusterDescriptor::RPageRange &pageRange)
A helper class for piece-wise construction of an RColumnDescriptor.
RColumnDescriptorBuilder & Model(const RColumnModel &model)
RResult< RColumnDescriptor > MakeDescriptor() const
Attempt to make a column descriptor.
RColumnDescriptorBuilder & FieldId(DescriptorId_t fieldId)
RColumnDescriptorBuilder & Index(std::uint32_t index)
RColumnDescriptorBuilder & ColumnId(DescriptorId_t columnId)
Base class for all ROOT issued exceptions.
Definition RError.hxx:114
A helper class for piece-wise construction of an RFieldDescriptor.
RFieldDescriptorBuilder & FieldName(const std::string &fieldName)
RFieldDescriptorBuilder & NRepetitions(std::uint64_t nRepetitions)
RFieldDescriptorBuilder & FieldVersion(const RNTupleVersion &fieldVersion)
RFieldDescriptorBuilder & Structure(const ENTupleStructure &structure)
RResult< RFieldDescriptor > MakeDescriptor() const
Attempt to make a field descriptor.
RFieldDescriptorBuilder & TypeName(const std::string &typeName)
RFieldDescriptorBuilder & ParentId(DescriptorId_t id)
RFieldDescriptorBuilder & FieldDescription(const std::string &fieldDescription)
RFieldDescriptorBuilder & TypeVersion(const RNTupleVersion &typeVersion)
RFieldDescriptorBuilder & FieldId(DescriptorId_t fieldId)
Meta-data stored for every field of an ntuple.
A helper class for piece-wise construction of an RNTupleDescriptor.
void AddClusterSummary(Internal::RNTupleSerializer::RClusterSummary &clusterSummary)
RResult< void > AddFieldLink(DescriptorId_t fieldId, DescriptorId_t linkId)
void AddColumn(DescriptorId_t columnId, DescriptorId_t fieldId, const RNTupleVersion &version, const RColumnModel &model, std::uint32_t index)
void AddClusterGroup(Internal::RNTupleSerializer::RClusterGroup &clusterGroup)
void SetNTuple(const std::string_view name, const std::string_view description, const std::string_view author, const RNTupleVersion &version, const RNTupleUuid &uuid)
void AddField(const RFieldDescriptor &fieldDesc)
The on-storage meta-data of an ntuple.
DescriptorId_t GetFieldZeroId() const
Returns the logical parent of all top-level NTuple data fields.
RColumnDescriptorIterable GetColumnIterable(const RFieldDescriptor &fieldDesc) const
const RClusterDescriptor & GetClusterDescriptor(DescriptorId_t clusterId) const
RFieldDescriptorIterable GetFieldIterable(const RFieldDescriptor &fieldDesc) const
For forward and backward compatibility, attach version information to the consitituents of the file f...
std::uint32_t GetVersionUse() const
The class is used as a return type for operations that can fail; wraps a value of type T or an RError...
Definition RError.hxx:195
RLogChannel & NTupleLog()
Log channel for RNTuple diagnostics.
RClusterSize ClusterSize_t
ENTupleStructure
The fields in the ntuple model tree can carry different structural information about the type system.
std::uint64_t DescriptorId_t
Distriniguishes elements of the same type within a descriptor, e.g. different fields.
std::string RNTupleUuid
Every NTuple is identified by a UUID. TODO(jblomer): should this be a TUUID?
Records the parition of data into pages for a particular column in a particular cluster.
Generic information about the physical location of data.