Logo ROOT  
Reference Guide
 
Loading...
Searching...
No Matches
RNTupleSerialize.cxx
Go to the documentation of this file.
1/// \file RNTupleSerialize.cxx
2/// \ingroup NTuple ROOT7
3/// \author Jakob Blomer <jblomer@cern.ch>
4/// \date 2021-08-02
5/// \warning This is part of the ROOT 7 prototype! It will change without notice. It might trigger earthquakes. Feedback
6/// is welcome!
7
8/*************************************************************************
9 * Copyright (C) 1995-2021, Rene Brun and Fons Rademakers. *
10 * All rights reserved. *
11 * *
12 * For the licensing terms see $ROOTSYS/LICENSE. *
13 * For the list of contributors see $ROOTSYS/README/CREDITS. *
14 *************************************************************************/
15
17#include <ROOT/RColumnModel.hxx>
18#include <ROOT/RError.hxx>
21
22#include <RVersion.h>
23#include <RZip.h> // for R__crc32
24
25#include <cstring> // for memcpy
26#include <deque>
27#include <set>
28#include <unordered_map>
29
30template <typename T>
32
33
34namespace {
36
37std::uint32_t SerializeFieldV1(
38 const ROOT::Experimental::RFieldDescriptor &fieldDesc, ROOT::Experimental::DescriptorId_t physParentId, void *buffer)
39{
40
41 auto base = reinterpret_cast<unsigned char *>(buffer);
42 auto pos = base;
43 void** where = (buffer == nullptr) ? &buffer : reinterpret_cast<void**>(&pos);
44
45 pos += RNTupleSerializer::SerializeRecordFramePreamble(*where);
46
47 pos += RNTupleSerializer::SerializeUInt32(fieldDesc.GetFieldVersion(), *where);
48 pos += RNTupleSerializer::SerializeUInt32(fieldDesc.GetTypeVersion(), *where);
49 pos += RNTupleSerializer::SerializeUInt32(physParentId, *where);
50 pos += RNTupleSerializer::SerializeFieldStructure(fieldDesc.GetStructure(), *where);
51 if (fieldDesc.GetNRepetitions() > 0) {
52 pos += RNTupleSerializer::SerializeUInt16(RNTupleSerializer::kFlagRepetitiveField, *where);
53 pos += RNTupleSerializer::SerializeUInt64(fieldDesc.GetNRepetitions(), *where);
54 } else {
55 pos += RNTupleSerializer::SerializeUInt16(0, *where);
56 }
57 pos += RNTupleSerializer::SerializeString(fieldDesc.GetFieldName(), *where);
58 pos += RNTupleSerializer::SerializeString(fieldDesc.GetTypeName(), *where);
59 pos += RNTupleSerializer::SerializeString("" /* type alias */, *where);
60 pos += RNTupleSerializer::SerializeString(fieldDesc.GetFieldDescription(), *where);
61
62 auto size = pos - base;
63 RNTupleSerializer::SerializeFramePostscript(base, size);
64
65 return size;
66}
67
68std::uint32_t SerializeFieldTree(
71 void *buffer)
72{
73 auto base = reinterpret_cast<unsigned char *>(buffer);
74 auto pos = base;
75 void** where = (buffer == nullptr) ? &buffer : reinterpret_cast<void**>(&pos);
76
77 std::deque<ROOT::Experimental::DescriptorId_t> idQueue{desc.GetFieldZeroId()};
78
79 while (!idQueue.empty()) {
80 auto parentId = idQueue.front();
81 idQueue.pop_front();
82
83 for (const auto &f : desc.GetFieldIterable(parentId)) {
84 auto physFieldId = context.MapFieldId(f.GetId());
85 auto physParentId = (parentId == desc.GetFieldZeroId()) ? physFieldId : context.GetPhysFieldId(parentId);
86 pos += SerializeFieldV1(f, physParentId, *where);
87 idQueue.push_back(f.GetId());
88 }
89 }
90
91 return pos - base;
92}
93
94RResult<std::uint32_t> DeserializeFieldV1(
95 const void *buffer,
96 std::uint32_t bufSize,
98{
99 using ENTupleStructure = ROOT::Experimental::ENTupleStructure;
100
101 auto base = reinterpret_cast<const unsigned char *>(buffer);
102 auto bytes = base;
103 std::uint32_t frameSize;
104 auto fnFrameSizeLeft = [&]() { return frameSize - static_cast<std::uint32_t>(bytes - base); };
105 auto result = RNTupleSerializer::DeserializeFrameHeader(bytes, bufSize, frameSize);
106 if (!result)
107 return R__FORWARD_ERROR(result);
108 bytes += result.Unwrap();
109
110 std::uint32_t fieldVersion;
111 std::uint32_t typeVersion;
112 std::uint32_t parentId;
113 // initialize properly for call to SerializeFieldStructure()
114 ENTupleStructure structure{ENTupleStructure::kLeaf};
115 std::uint16_t flags;
116 if (fnFrameSizeLeft() < 3 * sizeof(std::uint32_t) +
117 RNTupleSerializer::SerializeFieldStructure(structure, nullptr) +
118 sizeof(std::uint16_t))
119 {
120 return R__FAIL("field record frame too short");
121 }
122 bytes += RNTupleSerializer::DeserializeUInt32(bytes, fieldVersion);
123 bytes += RNTupleSerializer::DeserializeUInt32(bytes, typeVersion);
124 bytes += RNTupleSerializer::DeserializeUInt32(bytes, parentId);
125 auto res16 = RNTupleSerializer::DeserializeFieldStructure(bytes, structure);
126 if (!res16)
127 return R__FORWARD_ERROR(res16);
128 bytes += res16.Unwrap();
129 bytes += RNTupleSerializer::DeserializeUInt16(bytes, flags);
130 fieldDesc.FieldVersion(fieldVersion).TypeVersion(typeVersion).ParentId(parentId).Structure(structure);
131
132 if (flags & RNTupleSerializer::kFlagRepetitiveField) {
133 if (fnFrameSizeLeft() < sizeof(std::uint64_t))
134 return R__FAIL("field record frame too short");
135 std::uint64_t nRepetitions;
136 bytes += RNTupleSerializer::DeserializeUInt64(bytes, nRepetitions);
137 fieldDesc.NRepetitions(nRepetitions);
138 }
139
140 std::string fieldName;
141 std::string typeName;
142 std::string aliasName; // so far unused
143 std::string description;
144 result = RNTupleSerializer::DeserializeString(bytes, fnFrameSizeLeft(), fieldName).Unwrap();
145 if (!result)
146 return R__FORWARD_ERROR(result);
147 bytes += result.Unwrap();
148 result = RNTupleSerializer::DeserializeString(bytes, fnFrameSizeLeft(), typeName).Unwrap();
149 if (!result)
150 return R__FORWARD_ERROR(result);
151 bytes += result.Unwrap();
152 result = RNTupleSerializer::DeserializeString(bytes, fnFrameSizeLeft(), aliasName).Unwrap();
153 if (!result)
154 return R__FORWARD_ERROR(result);
155 bytes += result.Unwrap();
156 result = RNTupleSerializer::DeserializeString(bytes, fnFrameSizeLeft(), description).Unwrap();
157 if (!result)
158 return R__FORWARD_ERROR(result);
159 bytes += result.Unwrap();
160 fieldDesc.FieldName(fieldName).TypeName(typeName).FieldDescription(description);
161
162 return frameSize;
163}
164
165std::uint32_t SerializeColumnListV1(
168 void *buffer)
169{
170 using RColumnElementBase = ROOT::Experimental::Detail::RColumnElementBase;
171
172 auto base = reinterpret_cast<unsigned char *>(buffer);
173 auto pos = base;
174 void** where = (buffer == nullptr) ? &buffer : reinterpret_cast<void**>(&pos);
175
176 std::deque<ROOT::Experimental::DescriptorId_t> idQueue{desc.GetFieldZeroId()};
177
178 while (!idQueue.empty()) {
179 auto parentId = idQueue.front();
180 idQueue.pop_front();
181
182 for (const auto &c : desc.GetColumnIterable(parentId)) {
183 auto frame = pos;
184 pos += RNTupleSerializer::SerializeRecordFramePreamble(*where);
185
186 auto type = c.GetModel().GetType();
187 pos += RNTupleSerializer::SerializeColumnType(type, *where);
188 pos += RNTupleSerializer::SerializeUInt16(RColumnElementBase::GetBitsOnStorage(type), *where);
189 pos += RNTupleSerializer::SerializeUInt32(context.GetPhysFieldId(c.GetFieldId()), *where);
190 std::uint32_t flags = 0;
191 // TODO(jblomer): add support for descending columns in the column model
192 if (c.GetModel().GetIsSorted())
193 flags |= RNTupleSerializer::kFlagSortAscColumn;
194 // TODO(jblomer): fix for unsigned integer types
196 flags |= RNTupleSerializer::kFlagNonNegativeColumn;
197 pos += RNTupleSerializer::SerializeUInt32(flags, *where);
198
199 pos += RNTupleSerializer::SerializeFramePostscript(buffer ? frame : nullptr, pos - frame);
200
201 context.MapColumnId(c.GetId());
202 }
203
204 for (const auto &f : desc.GetFieldIterable(parentId))
205 idQueue.push_back(f.GetId());
206 }
207
208 return pos - base;
209}
210
211RResult<std::uint32_t> DeserializeColumnV1(
212 const void *buffer,
213 std::uint32_t bufSize,
215{
217
218 auto base = reinterpret_cast<const unsigned char *>(buffer);
219 auto bytes = base;
220 std::uint32_t frameSize;
221 auto fnFrameSizeLeft = [&]() { return frameSize - static_cast<std::uint32_t>(bytes - base); };
222 auto result = RNTupleSerializer::DeserializeFrameHeader(bytes, bufSize, frameSize);
223 if (!result)
224 return R__FORWARD_ERROR(result);
225 bytes += result.Unwrap();
226
227 // Initialize properly for SerializeColumnType
228 EColumnType type{EColumnType::kIndex};
229 std::uint16_t bitsOnStorage;
230 std::uint32_t fieldId;
231 std::uint32_t flags;
232 if (fnFrameSizeLeft() < RNTupleSerializer::SerializeColumnType(type, nullptr) +
233 sizeof(std::uint16_t) + 2 * sizeof(std::uint32_t))
234 {
235 return R__FAIL("column record frame too short");
236 }
237 auto res16 = RNTupleSerializer::DeserializeColumnType(bytes, type);
238 if (!res16)
239 return R__FORWARD_ERROR(res16);
240 bytes += res16.Unwrap();
241 bytes += RNTupleSerializer::DeserializeUInt16(bytes, bitsOnStorage);
242 bytes += RNTupleSerializer::DeserializeUInt32(bytes, fieldId);
243 bytes += RNTupleSerializer::DeserializeUInt32(bytes, flags);
244
246 return R__FAIL("column element size mismatch");
247
248 const bool isSorted = (flags & (RNTupleSerializer::kFlagSortAscColumn | RNTupleSerializer::kFlagSortDesColumn));
249 columnDesc.FieldId(fieldId).Model({type, isSorted});
250
251 return frameSize;
252}
253
254std::uint32_t SerializeLocatorPayloadURI(const ROOT::Experimental::RNTupleLocator &locator, unsigned char *buffer)
255{
256 const auto &uri = locator.GetPosition<std::string>();
257 if (uri.length() >= (1 << 16))
258 throw ROOT::Experimental::RException(R__FAIL("locator too large"));
259 if (buffer)
260 memcpy(buffer, uri.data(), uri.length());
261 return uri.length();
262}
263
264void DeserializeLocatorPayloadURI(const unsigned char *buffer, std::uint32_t payloadSize,
266{
267 locator.fBytesOnStorage = 0;
268 auto &uri = locator.fPosition.emplace<std::string>();
269 uri.resize(payloadSize);
270 memcpy(uri.data(), buffer, payloadSize);
271}
272
273std::uint32_t SerializeLocatorPayloadObject64(const ROOT::Experimental::RNTupleLocator &locator, unsigned char *buffer)
274{
276 if (buffer) {
277 RNTupleSerializer::SerializeUInt32(locator.fBytesOnStorage, buffer);
278 RNTupleSerializer::SerializeUInt64(data.fLocation, buffer + sizeof(std::uint32_t));
279 }
280 return sizeof(std::uint32_t) + sizeof(std::uint64_t);
281}
282
283void DeserializeLocatorPayloadObject64(const unsigned char *buffer, ROOT::Experimental::RNTupleLocator &locator)
284{
286 RNTupleSerializer::DeserializeUInt32(buffer, locator.fBytesOnStorage);
287 RNTupleSerializer::DeserializeUInt64(buffer + sizeof(std::uint32_t), data.fLocation);
288}
289
290} // anonymous namespace
291
292
294 const unsigned char *data, std::uint32_t length, std::uint32_t &crc32, void *buffer)
295{
296 if (buffer != nullptr) {
297 crc32 = R__crc32(0, nullptr, 0);
298 crc32 = R__crc32(crc32, data, length);
299 SerializeUInt32(crc32, buffer);
300 }
301 return 4;
302}
303
305 const unsigned char *data, std::uint32_t length, std::uint32_t &crc32)
306{
307 auto checksumReal = R__crc32(0, nullptr, 0);
308 checksumReal = R__crc32(checksumReal, data, length);
309 DeserializeUInt32(data + length, crc32);
310 if (crc32 != checksumReal)
311 return R__FAIL("CRC32 checksum mismatch");
312 return RResult<void>::Success();
313}
314
315
317 const unsigned char *data, std::uint32_t length)
318{
319 std::uint32_t crc32;
320 return R__FORWARD_RESULT(VerifyCRC32(data, length, crc32));
321}
322
323
324std::uint32_t ROOT::Experimental::Internal::RNTupleSerializer::SerializeInt16(std::int16_t val, void *buffer)
325{
326 if (buffer != nullptr) {
327 auto bytes = reinterpret_cast<unsigned char *>(buffer);
328 bytes[0] = (val & 0x00FF);
329 bytes[1] = (val & 0xFF00) >> 8;
330 }
331 return 2;
332}
333
334std::uint32_t ROOT::Experimental::Internal::RNTupleSerializer::DeserializeInt16(const void *buffer, std::int16_t &val)
335{
336 auto bytes = reinterpret_cast<const unsigned char *>(buffer);
337 val = std::int16_t(bytes[0]) + (std::int16_t(bytes[1]) << 8);
338 return 2;
339}
340
341std::uint32_t ROOT::Experimental::Internal::RNTupleSerializer::SerializeUInt16(std::uint16_t val, void *buffer)
342{
343 return SerializeInt16(val, buffer);
344}
345
346std::uint32_t ROOT::Experimental::Internal::RNTupleSerializer::DeserializeUInt16(const void *buffer, std::uint16_t &val)
347{
348 return DeserializeInt16(buffer, *reinterpret_cast<std::int16_t *>(&val));
349}
350
351std::uint32_t ROOT::Experimental::Internal::RNTupleSerializer::SerializeInt32(std::int32_t val, void *buffer)
352{
353 if (buffer != nullptr) {
354 auto bytes = reinterpret_cast<unsigned char *>(buffer);
355 bytes[0] = (val & 0x000000FF);
356 bytes[1] = (val & 0x0000FF00) >> 8;
357 bytes[2] = (val & 0x00FF0000) >> 16;
358 bytes[3] = (val & 0xFF000000) >> 24;
359 }
360 return 4;
361}
362
363std::uint32_t ROOT::Experimental::Internal::RNTupleSerializer::DeserializeInt32(const void *buffer, std::int32_t &val)
364{
365 auto bytes = reinterpret_cast<const unsigned char *>(buffer);
366 val = std::int32_t(bytes[0]) + (std::int32_t(bytes[1]) << 8) +
367 (std::int32_t(bytes[2]) << 16) + (std::int32_t(bytes[3]) << 24);
368 return 4;
369}
370
371std::uint32_t ROOT::Experimental::Internal::RNTupleSerializer::SerializeUInt32(std::uint32_t val, void *buffer)
372{
373 return SerializeInt32(val, buffer);
374}
375
376std::uint32_t ROOT::Experimental::Internal::RNTupleSerializer::DeserializeUInt32(const void *buffer, std::uint32_t &val)
377{
378 return DeserializeInt32(buffer, *reinterpret_cast<std::int32_t *>(&val));
379}
380
381std::uint32_t ROOT::Experimental::Internal::RNTupleSerializer::SerializeInt64(std::int64_t val, void *buffer)
382{
383 if (buffer != nullptr) {
384 auto bytes = reinterpret_cast<unsigned char *>(buffer);
385 bytes[0] = (val & 0x00000000000000FF);
386 bytes[1] = (val & 0x000000000000FF00) >> 8;
387 bytes[2] = (val & 0x0000000000FF0000) >> 16;
388 bytes[3] = (val & 0x00000000FF000000) >> 24;
389 bytes[4] = (val & 0x000000FF00000000) >> 32;
390 bytes[5] = (val & 0x0000FF0000000000) >> 40;
391 bytes[6] = (val & 0x00FF000000000000) >> 48;
392 bytes[7] = (val & 0xFF00000000000000) >> 56;
393 }
394 return 8;
395}
396
397std::uint32_t ROOT::Experimental::Internal::RNTupleSerializer::DeserializeInt64(const void *buffer, std::int64_t &val)
398{
399 auto bytes = reinterpret_cast<const unsigned char *>(buffer);
400 val = std::int64_t(bytes[0]) + (std::int64_t(bytes[1]) << 8) +
401 (std::int64_t(bytes[2]) << 16) + (std::int64_t(bytes[3]) << 24) +
402 (std::int64_t(bytes[4]) << 32) + (std::int64_t(bytes[5]) << 40) +
403 (std::int64_t(bytes[6]) << 48) + (std::int64_t(bytes[7]) << 56);
404 return 8;
405}
406
407std::uint32_t ROOT::Experimental::Internal::RNTupleSerializer::SerializeUInt64(std::uint64_t val, void *buffer)
408{
409 return SerializeInt64(val, buffer);
410}
411
412std::uint32_t ROOT::Experimental::Internal::RNTupleSerializer::DeserializeUInt64(const void *buffer, std::uint64_t &val)
413{
414 return DeserializeInt64(buffer, *reinterpret_cast<std::int64_t *>(&val));
415}
416
417std::uint32_t ROOT::Experimental::Internal::RNTupleSerializer::SerializeString(const std::string &val, void *buffer)
418{
419 if (buffer) {
420 auto pos = reinterpret_cast<unsigned char *>(buffer);
421 pos += SerializeUInt32(val.length(), pos);
422 memcpy(pos, val.data(), val.length());
423 }
424 return sizeof(std::uint32_t) + val.length();
425}
426
428 const void *buffer, std::uint32_t bufSize, std::string &val)
429{
430 if (bufSize < sizeof(std::uint32_t))
431 return R__FAIL("string buffer too short");
432 bufSize -= sizeof(std::uint32_t);
433
434 auto base = reinterpret_cast<const unsigned char *>(buffer);
435 auto bytes = base;
436 std::uint32_t length;
437 bytes += DeserializeUInt32(buffer, length);
438 if (bufSize < length)
439 return R__FAIL("string buffer too short");
440
441 val.resize(length);
442 memcpy(&val[0], bytes, length);
443 return sizeof(std::uint32_t) + length;
444}
445
446
449{
451 switch (type) {
453 return SerializeUInt16(0x02, buffer);
455 return SerializeUInt16(0x03, buffer);
457 return SerializeUInt16(0x04, buffer);
459 return SerializeUInt16(0x05, buffer);
461 return SerializeUInt16(0x06, buffer);
463 return SerializeUInt16(0x07, buffer);
465 return SerializeUInt16(0x08, buffer);
467 return SerializeUInt16(0x09, buffer);
469 return SerializeUInt16(0x0A, buffer);
471 return SerializeUInt16(0x0B, buffer);
473 return SerializeUInt16(0x0C, buffer);
475 return SerializeUInt16(0x0D, buffer);
476 default:
477 throw RException(R__FAIL("ROOT bug: unexpected column type"));
478 }
479}
480
481
483 const void *buffer, ROOT::Experimental::EColumnType &type)
484{
486 std::uint16_t onDiskType;
487 auto result = DeserializeUInt16(buffer, onDiskType);
488 switch (onDiskType) {
489 case 0x02:
491 break;
492 case 0x03:
494 break;
495 case 0x04:
497 break;
498 case 0x05:
500 break;
501 case 0x06:
503 break;
504 case 0x07:
506 break;
507 case 0x08:
509 break;
510 case 0x09:
512 break;
513 case 0x0A:
515 break;
516 case 0x0B:
518 break;
519 case 0x0C:
521 break;
522 case 0x0D:
524 break;
525 default:
526 return R__FAIL("unexpected on-disk column type");
527 }
528 return result;
529}
530
531
533 ROOT::Experimental::ENTupleStructure structure, void *buffer)
534{
536 switch (structure) {
538 return SerializeUInt16(0x00, buffer);
540 return SerializeUInt16(0x01, buffer);
542 return SerializeUInt16(0x02, buffer);
544 return SerializeUInt16(0x03, buffer);
546 return SerializeUInt16(0x04, buffer);
547 default:
548 throw RException(R__FAIL("ROOT bug: unexpected field structure type"));
549 }
550}
551
552
554 const void *buffer, ROOT::Experimental::ENTupleStructure &structure)
555{
557 std::uint16_t onDiskValue;
558 auto result = DeserializeUInt16(buffer, onDiskValue);
559 switch (onDiskValue) {
560 case 0x00:
561 structure = ENTupleStructure::kLeaf;
562 break;
563 case 0x01:
565 break;
566 case 0x02:
567 structure = ENTupleStructure::kRecord;
568 break;
569 case 0x03:
570 structure = ENTupleStructure::kVariant;
571 break;
572 case 0x04:
574 break;
575 default:
576 return R__FAIL("unexpected on-disk field structure value");
577 }
578 return result;
579}
580
581
582/// Currently all enevelopes have the same version number (1). At a later point, different envelope types
583/// may have different version numbers
585{
586 auto base = reinterpret_cast<unsigned char *>(buffer);
587 auto pos = base;
588 void** where = (buffer == nullptr) ? &buffer : reinterpret_cast<void**>(&pos);
589
590 pos += SerializeUInt16(kEnvelopeCurrentVersion, *where);
591 pos += SerializeUInt16(kEnvelopeMinVersion, *where);
592 return pos - base;
593}
594
595
597 const unsigned char *envelope, std::uint32_t size, std::uint32_t &crc32, void *buffer)
598{
599 return SerializeCRC32(envelope, size, crc32, buffer);
600}
601
603 const unsigned char *envelope, std::uint32_t size, void *buffer)
604{
605 std::uint32_t crc32;
606 return SerializeEnvelopePostscript(envelope, size, crc32, buffer);
607}
608
609/// Currently all enevelopes have the same version number (1). At a later point, different envelope types
610/// may have different version numbers
612 const void *buffer, std::uint32_t bufSize, std::uint32_t &crc32)
613{
614 if (bufSize < (2 * sizeof(std::uint16_t) + sizeof(std::uint32_t)))
615 return R__FAIL("invalid envelope, too short");
616
617 auto bytes = reinterpret_cast<const unsigned char *>(buffer);
618 auto base = bytes;
619
620 std::uint16_t protocolVersionAtWrite;
621 std::uint16_t protocolVersionMinRequired;
622 bytes += DeserializeUInt16(bytes, protocolVersionAtWrite);
623 // RNTuple compatible back to version 1 (but not to version 0)
624 if (protocolVersionAtWrite < 1)
625 return R__FAIL("The RNTuple format is too old (version 0)");
626
627 bytes += DeserializeUInt16(bytes, protocolVersionMinRequired);
628 if (protocolVersionMinRequired > kEnvelopeCurrentVersion) {
629 return R__FAIL(std::string("The RNTuple format is too new (version ") +
630 std::to_string(protocolVersionMinRequired) + ")");
631 }
632
633 // We defer the CRC32 check to the end to faciliate testing of forward/backward incompatibilities
634 auto result = VerifyCRC32(base, bufSize - 4, crc32);
635 if (!result)
636 return R__FORWARD_ERROR(result);
637
638 return sizeof(protocolVersionAtWrite) + sizeof(protocolVersionMinRequired);
639}
640
641
643 const void *buffer, std::uint32_t bufSize)
644{
645 std::uint32_t crc32;
646 return R__FORWARD_RESULT(DeserializeEnvelope(buffer, bufSize, crc32));
647}
648
649
651{
652 // Marker: multiply the final size with 1
653 return SerializeInt32(1, buffer);
654}
655
656
658 std::uint32_t nitems, void *buffer)
659{
660 if (nitems >= (1 << 28))
661 throw RException(R__FAIL("list frame too large: " + std::to_string(nitems)));
662
663 auto base = reinterpret_cast<unsigned char *>(buffer);
664 auto pos = base;
665 void** where = (buffer == nullptr) ? &buffer : reinterpret_cast<void**>(&pos);
666
667 // Marker: multiply the final size with -1
668 pos += RNTupleSerializer::SerializeInt32(-1, *where);
669 pos += SerializeUInt32(nitems, *where);
670 return pos - base;
671}
672
674 void *frame, std::int32_t size)
675{
676 if (size < 0)
677 throw RException(R__FAIL("frame too large: " + std::to_string(size)));
678 if (size < static_cast<std::int32_t>(sizeof(std::int32_t)))
679 throw RException(R__FAIL("frame too short: " + std::to_string(size)));
680 if (frame) {
681 std::int32_t marker;
682 DeserializeInt32(frame, marker);
683 if ((marker < 0) && (size < static_cast<std::int32_t>(2 * sizeof(std::int32_t))))
684 throw RException(R__FAIL("frame too short: " + std::to_string(size)));
685
686 SerializeInt32(marker * size, frame);
687 }
688 return 0;
689}
690
692 const void *buffer, std::uint32_t bufSize, std::uint32_t &frameSize, std::uint32_t &nitems)
693{
694 if (bufSize < sizeof(std::int32_t))
695 return R__FAIL("frame too short");
696
697 std::int32_t *ssize = reinterpret_cast<std::int32_t *>(&frameSize);
698 auto bytes = reinterpret_cast<const unsigned char *>(buffer);
699 bytes += DeserializeInt32(bytes, *ssize);
700 if (*ssize >= 0) {
701 // Record frame
702 nitems = 1;
703 if (frameSize < sizeof(std::int32_t))
704 return R__FAIL("corrupt record frame size");
705 } else {
706 // List frame
707 if (bufSize < 2 * sizeof(std::int32_t))
708 return R__FAIL("frame too short");
709 bytes += DeserializeUInt32(bytes, nitems);
710 nitems &= (2 << 28) - 1;
711 *ssize = -(*ssize);
712 if (frameSize < 2 * sizeof(std::int32_t))
713 return R__FAIL("corrupt list frame size");
714 }
715
716 if (bufSize < frameSize)
717 return R__FAIL("frame too short");
718
719 return bytes - reinterpret_cast<const unsigned char *>(buffer);
720}
721
723 const void *buffer, std::uint32_t bufSize, std::uint32_t &frameSize)
724{
725 std::uint32_t nitems;
726 return R__FORWARD_RESULT(DeserializeFrameHeader(buffer, bufSize, frameSize, nitems));
727}
728
730 const std::vector<std::int64_t> &flags, void *buffer)
731{
732 if (flags.empty())
733 return SerializeInt64(0, buffer);
734
735 if (buffer) {
736 auto bytes = reinterpret_cast<unsigned char *>(buffer);
737
738 for (unsigned i = 0; i < flags.size(); ++i) {
739 if (flags[i] < 0)
740 throw RException(R__FAIL("feature flag out of bounds"));
741
742 // The MSb indicates that another Int64 follows; set this bit to 1 for all except the last element
743 if (i == (flags.size() - 1))
744 SerializeInt64(flags[i], bytes);
745 else
746 bytes += SerializeInt64(flags[i] | 0x8000000000000000, bytes);
747 }
748 }
749 return (flags.size() * sizeof(std::int64_t));
750}
751
753 const void *buffer, std::uint32_t bufSize, std::vector<std::int64_t> &flags)
754{
755 auto bytes = reinterpret_cast<const unsigned char *>(buffer);
756
757 flags.clear();
758 std::int64_t f;
759 do {
760 if (bufSize < sizeof(std::int64_t))
761 return R__FAIL("feature flag buffer too short");
762 bytes += DeserializeInt64(bytes, f);
763 bufSize -= sizeof(std::int64_t);
764 flags.emplace_back(f & ~0x8000000000000000);
765 } while (f < 0);
766
767 return (flags.size() * sizeof(std::int64_t));
768}
769
771 const RNTupleLocator &locator, void *buffer)
772{
773 std::uint32_t size = 0;
774 if (locator.fType == RNTupleLocator::kTypeFile) {
775 if (static_cast<std::int32_t>(locator.fBytesOnStorage) < 0)
776 throw RException(R__FAIL("locator too large"));
777 size += SerializeUInt32(locator.fBytesOnStorage, buffer);
778 size += SerializeUInt64(locator.GetPosition<std::uint64_t>(),
779 buffer ? reinterpret_cast<unsigned char *>(buffer) + size : nullptr);
780 return size;
781 }
782
783 auto payloadp = buffer ? reinterpret_cast<unsigned char *>(buffer) + sizeof(std::int32_t) : nullptr;
784 switch (locator.fType) {
785 case RNTupleLocator::kTypeURI: size += SerializeLocatorPayloadURI(locator, payloadp); break;
786 case RNTupleLocator::kTypeDAOS: size += SerializeLocatorPayloadObject64(locator, payloadp); break;
787 default: throw RException(R__FAIL("locator has unknown type"));
788 }
789 std::int32_t head = sizeof(std::int32_t) + size;
790 head |= locator.fReserved << 16;
791 head |= static_cast<int>(locator.fType & 0x7F) << 24;
792 head = -head;
794 return size;
795}
796
798 const void *buffer, std::uint32_t bufSize, RNTupleLocator &locator)
799{
800 if (bufSize < sizeof(std::int32_t))
801 return R__FAIL("too short locator");
802
803 auto bytes = reinterpret_cast<const unsigned char *>(buffer);
804 std::int32_t head;
805
806 bytes += DeserializeInt32(bytes, head);
807 bufSize -= sizeof(std::int32_t);
808 if (head < 0) {
809 head = -head;
810 const int type = head >> 24;
811 const std::uint32_t payloadSize = (static_cast<std::uint32_t>(head) & 0x0000FFFF) - sizeof(std::int32_t);
812 if (bufSize < payloadSize)
813 return R__FAIL("too short locator");
814 locator.fType = static_cast<RNTupleLocator::ELocatorType>(type);
815 locator.fReserved = static_cast<std::uint32_t>(head >> 16) & 0xFF;
816 switch (type) {
817 case RNTupleLocator::kTypeURI: DeserializeLocatorPayloadURI(bytes, payloadSize, locator); break;
818 case RNTupleLocator::kTypeDAOS: DeserializeLocatorPayloadObject64(bytes, locator); break;
819 default: return R__FAIL("unsupported locator type: " + std::to_string(type));
820 }
821 bytes += payloadSize;
822 } else {
823 if (bufSize < sizeof(std::uint64_t))
824 return R__FAIL("too short locator");
825 auto &offset = locator.fPosition.emplace<std::uint64_t>();
827 bytes += DeserializeUInt64(bytes, offset);
828 locator.fBytesOnStorage = head;
829 }
830
831 return bytes - reinterpret_cast<const unsigned char *>(buffer);
832}
833
835 const REnvelopeLink &envelopeLink, void *buffer)
836{
837 auto size = SerializeUInt32(envelopeLink.fUnzippedSize, buffer);
838 size += SerializeLocator(envelopeLink.fLocator,
839 buffer ? reinterpret_cast<unsigned char *>(buffer) + size : nullptr);
840 return size;
841}
842
844 const void *buffer, std::uint32_t bufSize, REnvelopeLink &envelopeLink)
845{
846 if (bufSize < sizeof(std::int32_t))
847 return R__FAIL("too short envelope link");
848
849 auto bytes = reinterpret_cast<const unsigned char *>(buffer);
850 bytes += DeserializeUInt32(bytes, envelopeLink.fUnzippedSize);
851 bufSize -= sizeof(std::uint32_t);
852 auto result = DeserializeLocator(bytes, bufSize, envelopeLink.fLocator);
853 if (!result)
854 return R__FORWARD_ERROR(result);
855 bytes += result.Unwrap();
856 return bytes - reinterpret_cast<const unsigned char *>(buffer);
857}
858
859
861 const RClusterSummary &clusterSummary, void *buffer)
862{
863 auto base = reinterpret_cast<unsigned char *>(buffer);
864 auto pos = base;
865 void** where = (buffer == nullptr) ? &buffer : reinterpret_cast<void**>(&pos);
866
867 auto frame = pos;
868 pos += SerializeRecordFramePreamble(*where);
869 pos += SerializeUInt64(clusterSummary.fFirstEntry, *where);
870 if (clusterSummary.fColumnGroupID >= 0) {
871 pos += SerializeInt64(-static_cast<int64_t>(clusterSummary.fNEntries), *where);
872 pos += SerializeUInt32(clusterSummary.fColumnGroupID, *where);
873 } else {
874 pos += SerializeInt64(static_cast<int64_t>(clusterSummary.fNEntries), *where);
875 }
876 auto size = pos - frame;
877 pos += SerializeFramePostscript(frame, size);
878 return size;
879}
880
881
883 const void *buffer, std::uint32_t bufSize, RClusterSummary &clusterSummary)
884{
885 auto base = reinterpret_cast<const unsigned char *>(buffer);
886 auto bytes = base;
887 std::uint32_t frameSize;
888 auto result = DeserializeFrameHeader(bytes, bufSize, frameSize);
889 if (!result)
890 return R__FORWARD_ERROR(result);
891 bytes += result.Unwrap();
892
893 auto fnBufSizeLeft = [&]() { return frameSize - static_cast<std::uint32_t>(bytes - base); };
894 if (fnBufSizeLeft() < 2 * sizeof(std::uint64_t))
895 return R__FAIL("too short cluster summary");
896
897 bytes += DeserializeUInt64(bytes, clusterSummary.fFirstEntry);
898 std::int64_t nEntries;
899 bytes += DeserializeInt64(bytes, nEntries);
900
901 if (nEntries < 0) {
902 if (fnBufSizeLeft() < sizeof(std::uint32_t))
903 return R__FAIL("too short cluster summary");
904 clusterSummary.fNEntries = -nEntries;
905 std::uint32_t columnGroupID;
906 bytes += DeserializeUInt32(bytes, columnGroupID);
907 clusterSummary.fColumnGroupID = columnGroupID;
908 } else {
909 clusterSummary.fNEntries = nEntries;
910 clusterSummary.fColumnGroupID = -1;
911 }
912
913 return frameSize;
914}
915
916
918 const RClusterGroup &clusterGroup, void *buffer)
919{
920 auto base = reinterpret_cast<unsigned char *>(buffer);
921 auto pos = base;
922 void** where = (buffer == nullptr) ? &buffer : reinterpret_cast<void**>(&pos);
923
924 auto frame = pos;
925 pos += SerializeRecordFramePreamble(*where);
926 pos += SerializeUInt32(clusterGroup.fNClusters, *where);
927 pos += SerializeEnvelopeLink(clusterGroup.fPageListEnvelopeLink, *where);
928 auto size = pos - frame;
929 pos += SerializeFramePostscript(frame, size);
930 return size;
931}
932
933
935 const void *buffer, std::uint32_t bufSize, RClusterGroup &clusterGroup)
936{
937 auto base = reinterpret_cast<const unsigned char *>(buffer);
938 auto bytes = base;
939
940 std::uint32_t frameSize;
941 auto result = DeserializeFrameHeader(bytes, bufSize, frameSize);
942 if (!result)
943 return R__FORWARD_ERROR(result);
944 bytes += result.Unwrap();
945
946 auto fnFrameSizeLeft = [&]() { return frameSize - static_cast<std::uint32_t>(bytes - base); };
947 if (fnFrameSizeLeft() < sizeof(std::uint32_t))
948 return R__FAIL("too short cluster group");
949
950 bytes += DeserializeUInt32(bytes, clusterGroup.fNClusters);
951 result = DeserializeEnvelopeLink(bytes, fnFrameSizeLeft(), clusterGroup.fPageListEnvelopeLink);
952 if (!result)
953 return R__FORWARD_ERROR(result);
954
955 return frameSize;
956}
957
958
961 void *buffer, const ROOT::Experimental::RNTupleDescriptor &desc)
962{
963 RContext context;
964
965 auto base = reinterpret_cast<unsigned char *>(buffer);
966 auto pos = base;
967 void** where = (buffer == nullptr) ? &buffer : reinterpret_cast<void**>(&pos);
968
969 pos += SerializeEnvelopePreamble(*where);
970 // So far we don't make use of feature flags
971 pos += SerializeFeatureFlags(std::vector<std::int64_t>(), *where);
972 pos += SerializeUInt32(kReleaseCandidateTag, *where);
973 pos += SerializeString(desc.GetName(), *where);
974 pos += SerializeString(desc.GetDescription(), *where);
975 pos += SerializeString(std::string("ROOT v") + ROOT_RELEASE, *where);
976
977 auto frame = pos;
978 R__ASSERT(desc.GetNFields() > 0); // we must have at least a zero field
979 pos += SerializeListFramePreamble(desc.GetNFields() - 1, *where);
980 pos += SerializeFieldTree(desc, context, *where);
981 pos += SerializeFramePostscript(buffer ? frame : nullptr, pos - frame);
982
983 frame = pos;
984 pos += SerializeListFramePreamble(desc.GetNColumns(), *where);
985 pos += SerializeColumnListV1(desc, context, *where);
986 pos += SerializeFramePostscript(buffer ? frame : nullptr, pos - frame);
987
988 // We don't use alias columns yet
989 frame = pos;
990 pos += SerializeListFramePreamble(0, *where);
991 pos += SerializeFramePostscript(buffer ? frame : nullptr, pos - frame);
992
993 // We don't use extra type information yet
994 frame = pos;
995 pos += SerializeListFramePreamble(0, *where);
996 pos += SerializeFramePostscript(buffer ? frame : nullptr, pos - frame);
997
998 std::uint32_t size = pos - base;
999 std::uint32_t crc32 = 0;
1000 size += SerializeEnvelopePostscript(base, size, crc32, *where);
1001
1002 context.SetHeaderSize(size);
1003 context.SetHeaderCRC32(crc32);
1004 return context;
1005}
1006
1008 void *buffer, const RNTupleDescriptor &desc, std::span<DescriptorId_t> physClusterIDs, const RContext &context)
1009{
1010 auto base = reinterpret_cast<unsigned char *>(buffer);
1011 auto pos = base;
1012 void** where = (buffer == nullptr) ? &buffer : reinterpret_cast<void**>(&pos);
1013
1014 pos += SerializeEnvelopePreamble(*where);
1015 auto topMostFrame = pos;
1016 pos += SerializeListFramePreamble(physClusterIDs.size(), *where);
1017
1018 for (auto clusterId : physClusterIDs) {
1019 const auto &clusterDesc = desc.GetClusterDescriptor(context.GetMemClusterId(clusterId));
1020 // Get an ordered set of physical column ids
1021 std::set<DescriptorId_t> physColumnIds;
1022 for (auto column : clusterDesc.GetColumnIds())
1023 physColumnIds.insert(context.GetPhysColumnId(column));
1024
1025 auto outerFrame = pos;
1026 pos += SerializeListFramePreamble(physColumnIds.size(), *where);
1027 for (auto physId : physColumnIds) {
1028 auto memId = context.GetMemColumnId(physId);
1029 const auto &columnRange = clusterDesc.GetColumnRange(memId);
1030 const auto &pageRange = clusterDesc.GetPageRange(memId);
1031
1032 auto innerFrame = pos;
1033 pos += SerializeListFramePreamble(pageRange.fPageInfos.size(), *where);
1034
1035 for (const auto &pi : pageRange.fPageInfos) {
1036 pos += SerializeUInt32(pi.fNElements, *where);
1037 pos += SerializeLocator(pi.fLocator, *where);
1038 }
1039 pos += SerializeUInt64(columnRange.fFirstElementIndex, *where);
1040 pos += SerializeUInt32(columnRange.fCompressionSettings, *where);
1041
1042 pos += SerializeFramePostscript(buffer ? innerFrame : nullptr, pos - innerFrame);
1043 }
1044 pos += SerializeFramePostscript(buffer ? outerFrame : nullptr, pos - outerFrame);
1045 }
1046
1047 pos += SerializeFramePostscript(buffer ? topMostFrame : nullptr, pos - topMostFrame);
1048 std::uint32_t size = pos - base;
1049 size += SerializeEnvelopePostscript(base, size, *where);
1050 return size;
1051}
1052
1054 void *buffer, const ROOT::Experimental::RNTupleDescriptor &desc, const RContext &context)
1055{
1056 auto base = reinterpret_cast<unsigned char *>(buffer);
1057 auto pos = base;
1058 void** where = (buffer == nullptr) ? &buffer : reinterpret_cast<void**>(&pos);
1059
1060 pos += SerializeEnvelopePreamble(*where);
1061
1062 // So far we don't make use of feature flags
1063 pos += SerializeFeatureFlags(std::vector<std::int64_t>(), *where);
1064 pos += SerializeUInt32(context.GetHeaderCRC32(), *where);
1065
1066 // So far no support for extension headers
1067 auto frame = pos;
1068 pos += SerializeListFramePreamble(0, *where);
1069 pos += SerializeFramePostscript(buffer ? frame : nullptr, pos - frame);
1070
1071 // So far no support for shared clusters (no column groups)
1072 frame = pos;
1073 pos += SerializeListFramePreamble(0, *where);
1074 pos += SerializeFramePostscript(buffer ? frame : nullptr, pos - frame);
1075
1076 // Cluster summaries
1077 const auto nClusterGroups = desc.GetNClusterGroups();
1078 unsigned int nClusters = 0;
1079 for (const auto &cgDesc : desc.GetClusterGroupIterable())
1080 nClusters += cgDesc.GetNClusters();
1081 frame = pos;
1082 pos += SerializeListFramePreamble(nClusters, *where);
1083 for (unsigned int i = 0; i < nClusterGroups; ++i) {
1084 const auto &cgDesc = desc.GetClusterGroupDescriptor(context.GetMemClusterGroupId(i));
1085 const auto nClustersInGroup = cgDesc.GetNClusters();
1086 const auto &clusterIds = cgDesc.GetClusterIds();
1087 for (unsigned int j = 0; j < nClustersInGroup; ++j) {
1088 const auto &clusterDesc = desc.GetClusterDescriptor(clusterIds[j]);
1089 RClusterSummary summary{clusterDesc.GetFirstEntryIndex(), clusterDesc.GetNEntries(), -1};
1090 pos += SerializeClusterSummary(summary, *where);
1091 }
1092 }
1093 pos += SerializeFramePostscript(buffer ? frame : nullptr, pos - frame);
1094
1095 // Cluster groups
1096 frame = pos;
1097 pos += SerializeListFramePreamble(nClusterGroups, *where);
1098 for (unsigned int i = 0; i < nClusterGroups; ++i) {
1099 const auto &cgDesc = desc.GetClusterGroupDescriptor(context.GetMemClusterGroupId(i));
1100 RClusterGroup clusterGroup;
1101 clusterGroup.fNClusters = cgDesc.GetNClusters();
1102 clusterGroup.fPageListEnvelopeLink.fUnzippedSize = cgDesc.GetPageListLength();
1103 clusterGroup.fPageListEnvelopeLink.fLocator = cgDesc.GetPageListLocator();
1104 pos += SerializeClusterGroup(clusterGroup, *where);
1105 }
1106 pos += SerializeFramePostscript(buffer ? frame : nullptr, pos - frame);
1107
1108 // So far no support for meta-data
1109 frame = pos;
1110 pos += SerializeListFramePreamble(0, *where);
1111 pos += SerializeFramePostscript(buffer ? frame : nullptr, pos - frame);
1112
1113 std::uint32_t size = pos - base;
1114 size += SerializeEnvelopePostscript(base, size, *where);
1115 return size;
1116}
1117
1119 const void *buffer, std::uint32_t bufSize, RNTupleDescriptorBuilder &descBuilder)
1120{
1121 auto base = reinterpret_cast<const unsigned char *>(buffer);
1122 auto bytes = base;
1123 auto fnBufSizeLeft = [&]() { return bufSize - (bytes - base); };
1125
1126 std::uint32_t crc32{0};
1127 result = DeserializeEnvelope(bytes, fnBufSizeLeft(), crc32);
1128 if (!result)
1129 return R__FORWARD_ERROR(result);
1130 bytes += result.Unwrap();
1131 descBuilder.SetHeaderCRC32(crc32);
1132
1133 std::vector<std::int64_t> featureFlags;
1134 result = DeserializeFeatureFlags(bytes, fnBufSizeLeft(), featureFlags);
1135 if (!result)
1136 return R__FORWARD_ERROR(result);
1137 bytes += result.Unwrap();
1138 for (auto f: featureFlags) {
1139 if (f)
1140 R__LOG_WARNING(NTupleLog()) << "Unsupported feature flag! " << f;
1141 }
1142
1143 std::uint32_t rcTag;
1144 if (fnBufSizeLeft() < static_cast<int>(sizeof(std::uint32_t)))
1145 return R__FAIL("header too short");
1146 bytes += DeserializeUInt32(bytes, rcTag);
1147 if (rcTag > 0) {
1148 R__LOG_WARNING(NTupleLog()) << "Pre-release format version: RC " << rcTag;
1149 }
1150
1151 std::string name;
1152 std::string description;
1153 std::string writer;
1154 result = DeserializeString(bytes, fnBufSizeLeft(), name);
1155 if (!result)
1156 return R__FORWARD_ERROR(result);
1157 bytes += result.Unwrap();
1158 result = DeserializeString(bytes, fnBufSizeLeft(), description);
1159 if (!result)
1160 return R__FORWARD_ERROR(result);
1161 bytes += result.Unwrap();
1162 result = DeserializeString(bytes, fnBufSizeLeft(), writer);
1163 if (!result)
1164 return R__FORWARD_ERROR(result);
1165 bytes += result.Unwrap();
1166 descBuilder.SetNTuple(name, description);
1167
1168 std::uint32_t frameSize;
1169 auto frame = bytes;
1170 auto fnFrameSizeLeft = [&]() { return frameSize - (bytes - frame); };
1171
1172 std::uint32_t nFields;
1173 result = DeserializeFrameHeader(bytes, fnBufSizeLeft(), frameSize, nFields);
1174 if (!result)
1175 return R__FORWARD_ERROR(result);
1176 bytes += result.Unwrap();
1177 // Zero field
1178 descBuilder.AddField(RFieldDescriptorBuilder()
1179 .FieldId(kZeroFieldId)
1180 .Structure(ENTupleStructure::kRecord)
1181 .MakeDescriptor()
1182 .Unwrap());
1183 for (std::uint32_t fieldId = 0; fieldId < nFields; ++fieldId) {
1184 RFieldDescriptorBuilder fieldBuilder;
1185 result = DeserializeFieldV1(bytes, fnFrameSizeLeft(), fieldBuilder);
1186 if (!result)
1187 return R__FORWARD_ERROR(result);
1188 bytes += result.Unwrap();
1189 if (fieldId == fieldBuilder.GetParentId())
1190 fieldBuilder.ParentId(kZeroFieldId);
1191 auto fieldDesc = fieldBuilder.FieldId(fieldId).MakeDescriptor();
1192 if (!fieldDesc)
1193 return R__FORWARD_ERROR(fieldDesc);
1194 auto parentId = fieldDesc.Inspect().GetParentId();
1195 descBuilder.AddField(fieldDesc.Unwrap());
1196 auto resVoid = descBuilder.AddFieldLink(parentId, fieldId);
1197 if (!resVoid)
1198 return R__FORWARD_ERROR(resVoid);
1199 }
1200 bytes = frame + frameSize;
1201
1202 std::uint32_t nColumns;
1203 frame = bytes;
1204 result = DeserializeFrameHeader(bytes, fnBufSizeLeft(), frameSize, nColumns);
1205 if (!result)
1206 return R__FORWARD_ERROR(result);
1207 bytes += result.Unwrap();
1208 std::unordered_map<DescriptorId_t, std::uint32_t> maxIndexes;
1209 for (std::uint32_t columnId = 0; columnId < nColumns; ++columnId) {
1210 RColumnDescriptorBuilder columnBuilder;
1211 result = DeserializeColumnV1(bytes, fnFrameSizeLeft(), columnBuilder);
1212 if (!result)
1213 return R__FORWARD_ERROR(result);
1214 bytes += result.Unwrap();
1215
1216 std::uint32_t idx = 0;
1217 const auto fieldId = columnBuilder.GetFieldId();
1218 auto maxIdx = maxIndexes.find(fieldId);
1219 if (maxIdx != maxIndexes.end())
1220 idx = maxIdx->second + 1;
1221 maxIndexes[fieldId] = idx;
1222
1223 auto columnDesc = columnBuilder.Index(idx).ColumnId(columnId).MakeDescriptor();
1224 if (!columnDesc)
1225 return R__FORWARD_ERROR(columnDesc);
1226 auto resVoid = descBuilder.AddColumn(columnDesc.Unwrap());
1227 if (!resVoid)
1228 return R__FORWARD_ERROR(resVoid);
1229 }
1230 bytes = frame + frameSize;
1231
1232 std::uint32_t nAliasColumns;
1233 frame = bytes;
1234 result = DeserializeFrameHeader(bytes, fnBufSizeLeft(), frameSize, nAliasColumns);
1235 if (!result)
1236 return R__FORWARD_ERROR(result);
1237 bytes += result.Unwrap();
1238 if (nAliasColumns > 0)
1239 R__LOG_WARNING(NTupleLog()) << "Alias columns are still unsupported! ";
1240
1241 std::uint32_t nTypeInfo;
1242 frame = bytes;
1243 result = DeserializeFrameHeader(bytes, fnBufSizeLeft(), frameSize, nTypeInfo);
1244 if (!result)
1245 return R__FORWARD_ERROR(result);
1246 bytes += result.Unwrap();
1247 if (nTypeInfo > 0)
1248 R__LOG_WARNING(NTupleLog()) << "Extra type information is still unsupported! ";
1249
1250 return RResult<void>::Success();
1251}
1252
1253
1255 const void *buffer, std::uint32_t bufSize, RNTupleDescriptorBuilder &descBuilder)
1256{
1257 auto base = reinterpret_cast<const unsigned char *>(buffer);
1258 auto bytes = base;
1259 auto fnBufSizeLeft = [&]() { return bufSize - (bytes - base); };
1261
1262 result = DeserializeEnvelope(bytes, fnBufSizeLeft());
1263 if (!result)
1264 return R__FORWARD_ERROR(result);
1265 bytes += result.Unwrap();
1266
1267 std::vector<std::int64_t> featureFlags;
1268 result = DeserializeFeatureFlags(bytes, fnBufSizeLeft(), featureFlags);
1269 if (!result)
1270 return R__FORWARD_ERROR(result);
1271 bytes += result.Unwrap();
1272 for (auto f: featureFlags) {
1273 if (f)
1274 R__LOG_WARNING(NTupleLog()) << "Unsupported feature flag! " << f;
1275 }
1276
1277 std::uint32_t crc32{0};
1278 if (fnBufSizeLeft() < static_cast<int>(sizeof(std::uint32_t)))
1279 return R__FAIL("footer too short");
1280 bytes += DeserializeUInt32(bytes, crc32);
1281 if (crc32 != descBuilder.GetHeaderCRC32())
1282 return R__FAIL("CRC32 mismatch between header and footer");
1283
1284 std::uint32_t frameSize;
1285 auto frame = bytes;
1286 auto fnFrameSizeLeft = [&]() { return frameSize - (bytes - frame); };
1287
1288 std::uint32_t nXHeaders;
1289 result = DeserializeFrameHeader(bytes, fnBufSizeLeft(), frameSize, nXHeaders);
1290 if (!result)
1291 return R__FORWARD_ERROR(result);
1292 if (nXHeaders > 0)
1293 R__LOG_WARNING(NTupleLog()) << "extension headers are still unsupported";
1294 bytes = frame + frameSize;
1295
1296 std::uint32_t nColumnGroups;
1297 frame = bytes;
1298 result = DeserializeFrameHeader(bytes, fnBufSizeLeft(), frameSize, nColumnGroups);
1299 if (!result)
1300 return R__FORWARD_ERROR(result);
1301 if (nColumnGroups > 0)
1302 return R__FAIL("sharded clusters are still unsupported");
1303 bytes = frame + frameSize;
1304
1305 std::uint32_t nClusterSummaries;
1306 frame = bytes;
1307 result = DeserializeFrameHeader(bytes, fnBufSizeLeft(), frameSize, nClusterSummaries);
1308 if (!result)
1309 return R__FORWARD_ERROR(result);
1310 bytes += result.Unwrap();
1311 for (std::uint32_t clusterId = 0; clusterId < nClusterSummaries; ++clusterId) {
1312 RClusterSummary clusterSummary;
1313 result = DeserializeClusterSummary(bytes, fnFrameSizeLeft(), clusterSummary);
1314 if (!result)
1315 return R__FORWARD_ERROR(result);
1316 bytes += result.Unwrap();
1317 if (clusterSummary.fColumnGroupID >= 0)
1318 return R__FAIL("sharded clusters are still unsupported");
1319 descBuilder.AddClusterSummary(clusterId, clusterSummary.fFirstEntry, clusterSummary.fNEntries);
1320 }
1321 bytes = frame + frameSize;
1322
1323 std::uint32_t nClusterGroups;
1324 frame = bytes;
1325 result = DeserializeFrameHeader(bytes, fnBufSizeLeft(), frameSize, nClusterGroups);
1326 if (!result)
1327 return R__FORWARD_ERROR(result);
1328 bytes += result.Unwrap();
1329 std::uint64_t clusterId = 0;
1330 for (std::uint32_t groupId = 0; groupId < nClusterGroups; ++groupId) {
1331 RClusterGroup clusterGroup;
1332 result = DeserializeClusterGroup(bytes, fnFrameSizeLeft(), clusterGroup);
1333 if (!result)
1334 return R__FORWARD_ERROR(result);
1335 bytes += result.Unwrap();
1336
1338 RClusterGroupDescriptorBuilder clusterGroupBuilder;
1339 clusterGroupBuilder.ClusterGroupId(groupId)
1342 for (std::uint64_t i = 0; i < clusterGroup.fNClusters; ++i)
1343 clusterGroupBuilder.AddCluster(clusterId + i);
1344 clusterId += clusterGroup.fNClusters;
1345 descBuilder.AddClusterGroup(std::move(clusterGroupBuilder));
1346 }
1347 bytes = frame + frameSize;
1348
1349 std::uint32_t nMDBlocks;
1350 frame = bytes;
1351 result = DeserializeFrameHeader(bytes, fnBufSizeLeft(), frameSize, nMDBlocks);
1352 if (!result)
1353 return R__FORWARD_ERROR(result);
1354 if (nMDBlocks > 0)
1355 R__LOG_WARNING(NTupleLog()) << "meta-data blocks are still unsupported";
1356 bytes = frame + frameSize;
1357
1358 return RResult<void>::Success();
1359}
1360
1361
1363 const void *buffer, std::uint32_t bufSize, std::vector<RClusterDescriptorBuilder> &clusters)
1364{
1365 auto base = reinterpret_cast<const unsigned char *>(buffer);
1366 auto bytes = base;
1367 auto fnBufSizeLeft = [&]() { return bufSize - (bytes - base); };
1369
1370 result = DeserializeEnvelope(bytes, fnBufSizeLeft());
1371 if (!result)
1372 return R__FORWARD_ERROR(result);
1373 bytes += result.Unwrap();
1374
1375 std::uint32_t topMostFrameSize;
1376 auto topMostFrame = bytes;
1377 auto fnTopMostFrameSizeLeft = [&]() { return topMostFrameSize - (bytes - topMostFrame); };
1378
1379 std::uint32_t nClusters;
1380 result = DeserializeFrameHeader(bytes, fnBufSizeLeft(), topMostFrameSize, nClusters);
1381 if (!result)
1382 return R__FORWARD_ERROR(result);
1383 bytes += result.Unwrap();
1384
1385 if (nClusters != clusters.size())
1386 return R__FAIL("mismatch of page list and cluster summaries");
1387
1388 for (std::uint32_t i = 0; i < nClusters; ++i) {
1389 std::uint32_t outerFrameSize;
1390 auto outerFrame = bytes;
1391 auto fnOuterFrameSizeLeft = [&]() { return outerFrameSize - (bytes - outerFrame); };
1392
1393 std::uint32_t nColumns;
1394 result = DeserializeFrameHeader(bytes, fnTopMostFrameSizeLeft(), outerFrameSize, nColumns);
1395 if (!result)
1396 return R__FORWARD_ERROR(result);
1397 bytes += result.Unwrap();
1398
1399 for (std::uint32_t j = 0; j < nColumns; ++j) {
1400 std::uint32_t innerFrameSize;
1401 auto innerFrame = bytes;
1402 auto fnInnerFrameSizeLeft = [&]() { return innerFrameSize - (bytes - innerFrame); };
1403
1404 std::uint32_t nPages;
1405 result = DeserializeFrameHeader(bytes, fnOuterFrameSizeLeft(), innerFrameSize, nPages);
1406 if (!result)
1407 return R__FORWARD_ERROR(result);
1408 bytes += result.Unwrap();
1409
1411 pageRange.fColumnId = j;
1412 for (std::uint32_t k = 0; k < nPages; ++k) {
1413 if (fnInnerFrameSizeLeft() < static_cast<int>(sizeof(std::uint32_t)))
1414 return R__FAIL("inner frame too short");
1415 std::uint32_t nElements;
1416 RNTupleLocator locator;
1417 bytes += DeserializeUInt32(bytes, nElements);
1418 result = DeserializeLocator(bytes, fnInnerFrameSizeLeft(), locator);
1419 if (!result)
1420 return R__FORWARD_ERROR(result);
1421 pageRange.fPageInfos.push_back({ClusterSize_t(nElements), locator});
1422 bytes += result.Unwrap();
1423 }
1424
1425 if (fnInnerFrameSizeLeft() < static_cast<int>(sizeof(std::uint32_t) + sizeof(std::uint64_t)))
1426 return R__FAIL("page list frame too short");
1427 std::uint64_t columnOffset;
1428 bytes += DeserializeUInt64(bytes, columnOffset);
1429 std::uint32_t compressionSettings;
1430 bytes += DeserializeUInt32(bytes, compressionSettings);
1431
1432 clusters[i].CommitColumnRange(j, columnOffset, compressionSettings, pageRange);
1433 bytes = innerFrame + innerFrameSize;
1434 }
1435
1436 bytes = outerFrame + outerFrameSize;
1437 }
1438
1439 return RResult<void>::Success();
1440}
#define R__FORWARD_ERROR(res)
Short-hand to return an RResult<T> in an error state (i.e. after checking)
Definition RError.hxx:307
#define R__FORWARD_RESULT(res)
Short-hand to return an RResult<T> value from a subroutine to the calling stack frame.
Definition RError.hxx:305
#define R__FAIL(msg)
Short-hand to return an RResult<T> in an error state; the RError is implicitly converted into RResult...
Definition RError.hxx:303
#define R__LOG_WARNING(...)
Definition RLogger.hxx:363
#define f(i)
Definition RSha256.hxx:104
#define c(i)
Definition RSha256.hxx:101
#define ROOT_RELEASE
Definition RVersion.h:17
size_t size(const MatrixT &matrix)
retrieve the size of a square matrix
#define R__ASSERT(e)
Definition TError.h:117
Option_t Option_t TPoint TPoint const char GetTextMagnitude GetFillStyle GetLineColor GetLineWidth GetMarkerStyle GetTextAlign GetTextColor GetTextSize void data
Option_t Option_t TPoint TPoint const char GetTextMagnitude GetFillStyle GetLineColor GetLineWidth GetMarkerStyle GetTextAlign GetTextColor GetTextSize void char Point_t Rectangle_t WindowAttributes_t Float_t Float_t Float_t Int_t Int_t UInt_t UInt_t Rectangle_t Int_t Int_t Window_t TString Int_t GCValues_t GetPrimarySelectionOwner GetDisplay GetScreen GetColormap GetNativeEvent const char const char dpyName wid window const char font_name cursor keysym reg const char only_if_exist regb h Point_t winding char text const char depth char const char Int_t count const char ColorStruct_t color const char Pixmap_t Pixmap_t PictureAttributes_t attr const char char ret_data h unsigned char height h offset
Option_t Option_t TPoint TPoint const char GetTextMagnitude GetFillStyle GetLineColor GetLineWidth GetMarkerStyle GetTextAlign GetTextColor GetTextSize void char Point_t Rectangle_t WindowAttributes_t Float_t Float_t Float_t Int_t Int_t UInt_t UInt_t Rectangle_t result
Option_t Option_t TPoint TPoint const char GetTextMagnitude GetFillStyle GetLineColor GetLineWidth GetMarkerStyle GetTextAlign GetTextColor GetTextSize void char Point_t Rectangle_t WindowAttributes_t Float_t Float_t Float_t Int_t Int_t UInt_t UInt_t Rectangle_t Int_t Int_t Window_t TString Int_t GCValues_t GetPrimarySelectionOwner GetDisplay GetScreen GetColormap GetNativeEvent const char const char dpyName wid window const char font_name cursor keysym reg const char only_if_exist regb h Point_t winding char text const char depth char const char Int_t count const char ColorStruct_t color const char Pixmap_t Pixmap_t PictureAttributes_t attr const char char ret_data h unsigned char height h length
Option_t Option_t TPoint TPoint const char GetTextMagnitude GetFillStyle GetLineColor GetLineWidth GetMarkerStyle GetTextAlign GetTextColor GetTextSize void char Point_t Rectangle_t WindowAttributes_t Float_t Float_t Float_t Int_t Int_t UInt_t UInt_t Rectangle_t Int_t Int_t Window_t TString Int_t GCValues_t GetPrimarySelectionOwner GetDisplay GetScreen GetColormap GetNativeEvent const char const char dpyName wid window const char font_name cursor keysym reg const char only_if_exist regb h Point_t winding char text const char depth char const char Int_t count const char ColorStruct_t color const char Pixmap_t Pixmap_t PictureAttributes_t attr const char char ret_data h unsigned char height h Atom_t Int_t ULong_t nitems
Option_t Option_t TPoint TPoint const char GetTextMagnitude GetFillStyle GetLineColor GetLineWidth GetMarkerStyle GetTextAlign GetTextColor GetTextSize void char Point_t Rectangle_t WindowAttributes_t Float_t Float_t Float_t Int_t Int_t UInt_t UInt_t Rectangle_t Int_t Int_t Window_t TString Int_t GCValues_t GetPrimarySelectionOwner GetDisplay GetScreen GetColormap GetNativeEvent const char const char dpyName wid window const char font_name cursor keysym reg const char only_if_exist regb h Point_t winding char text const char depth char const char Int_t count const char ColorStruct_t color const char Pixmap_t Pixmap_t PictureAttributes_t attr const char char ret_data h unsigned char height h Atom_t Int_t ULong_t ULong_t bytes
Option_t Option_t TPoint TPoint const char GetTextMagnitude GetFillStyle GetLineColor GetLineWidth GetMarkerStyle GetTextAlign GetTextColor GetTextSize void char Point_t Rectangle_t WindowAttributes_t Float_t Float_t Float_t Int_t Int_t UInt_t UInt_t Rectangle_t Int_t Int_t Window_t TString Int_t GCValues_t GetPrimarySelectionOwner GetDisplay GetScreen GetColormap GetNativeEvent const char const char dpyName wid window const char font_name cursor keysym reg const char only_if_exist regb h Point_t winding char text const char depth char const char Int_t count const char ColorStruct_t color const char Pixmap_t Pixmap_t PictureAttributes_t attr const char char ret_data h unsigned char height h Atom_t Int_t ULong_t ULong_t unsigned char prop_list Atom_t Atom_t Atom_t Time_t type
char name[80]
Definition TGX11.cxx:110
The available trivial, native content types of a column.
The serialization context is used for the piecewise serialization of a descriptor.
DescriptorId_t GetMemClusterId(DescriptorId_t physId) const
DescriptorId_t GetPhysColumnId(DescriptorId_t memId) const
DescriptorId_t GetPhysFieldId(DescriptorId_t memId) const
DescriptorId_t GetMemClusterGroupId(DescriptorId_t physId) const
DescriptorId_t GetMemColumnId(DescriptorId_t physId) const
A helper class for serializing and deserialization of the RNTuple binary format.
static RResult< std::uint32_t > DeserializeString(const void *buffer, std::uint32_t bufSize, std::string &val)
static std::uint32_t SerializeFeatureFlags(const std::vector< std::int64_t > &flags, void *buffer)
static std::uint32_t SerializePageListV1(void *buffer, const RNTupleDescriptor &desc, std::span< DescriptorId_t > physClusterIDs, const RContext &context)
static std::uint32_t SerializeListFramePreamble(std::uint32_t nitems, void *buffer)
static RResult< std::uint32_t > DeserializeLocator(const void *buffer, std::uint32_t bufSize, RNTupleLocator &locator)
static std::uint32_t SerializeCRC32(const unsigned char *data, std::uint32_t length, std::uint32_t &crc32, void *buffer)
Writes a CRC32 checksum of the byte range given by data and length.
static std::uint16_t SerializeColumnType(ROOT::Experimental::EColumnType type, void *buffer)
static std::uint32_t DeserializeUInt16(const void *buffer, std::uint16_t &val)
static RResult< void > DeserializePageListV1(const void *buffer, std::uint32_t bufSize, std::vector< RClusterDescriptorBuilder > &clusters)
static std::uint32_t SerializeString(const std::string &val, void *buffer)
static RResult< std::uint32_t > DeserializeEnvelope(const void *buffer, std::uint32_t bufSize)
static std::uint32_t DeserializeUInt32(const void *buffer, std::uint32_t &val)
static std::uint32_t SerializeUInt64(std::uint64_t val, void *buffer)
static std::uint32_t DeserializeInt16(const void *buffer, std::int16_t &val)
static std::uint32_t SerializeClusterSummary(const RClusterSummary &clusterSummary, void *buffer)
static RContext SerializeHeaderV1(void *buffer, const RNTupleDescriptor &desc)
static RResult< void > DeserializeFooterV1(const void *buffer, std::uint32_t bufSize, RNTupleDescriptorBuilder &descBuilder)
static std::uint32_t SerializeInt16(std::int16_t val, void *buffer)
static std::uint32_t SerializeLocator(const RNTupleLocator &locator, void *buffer)
static std::uint32_t SerializeInt32(std::int32_t val, void *buffer)
static std::uint32_t SerializeEnvelopePreamble(void *buffer)
Currently all enevelopes have the same version number (1).
static RResult< std::uint16_t > DeserializeColumnType(const void *buffer, ROOT::Experimental::EColumnType &type)
static RResult< std::uint32_t > DeserializeClusterGroup(const void *buffer, std::uint32_t bufSize, RClusterGroup &clusterGroup)
static std::uint32_t DeserializeUInt64(const void *buffer, std::uint64_t &val)
static std::uint32_t DeserializeInt32(const void *buffer, std::int32_t &val)
static std::uint32_t DeserializeInt64(const void *buffer, std::int64_t &val)
static RResult< std::uint32_t > DeserializeEnvelopeLink(const void *buffer, std::uint32_t bufSize, REnvelopeLink &envelopeLink)
static std::uint32_t SerializeEnvelopePostscript(const unsigned char *envelope, std::uint32_t size, void *buffer)
static RResult< std::uint16_t > DeserializeFieldStructure(const void *buffer, ROOT::Experimental::ENTupleStructure &structure)
static std::uint32_t SerializeEnvelopeLink(const REnvelopeLink &envelopeLink, void *buffer)
static std::uint32_t SerializeRecordFramePreamble(void *buffer)
static std::uint32_t SerializeUInt16(std::uint16_t val, void *buffer)
static RResult< std::uint32_t > DeserializeFrameHeader(const void *buffer, std::uint32_t bufSize, std::uint32_t &frameSize, std::uint32_t &nitems)
static RResult< std::uint32_t > DeserializeFeatureFlags(const void *buffer, std::uint32_t bufSize, std::vector< std::int64_t > &flags)
static std::uint32_t SerializeClusterGroup(const RClusterGroup &clusterGroup, void *buffer)
static std::uint32_t SerializeFramePostscript(void *frame, std::int32_t size)
static std::uint32_t SerializeFooterV1(void *buffer, const RNTupleDescriptor &desc, const RContext &context)
static std::uint32_t SerializeInt64(std::int64_t val, void *buffer)
static RResult< void > VerifyCRC32(const unsigned char *data, std::uint32_t length, std::uint32_t &crc32)
Expects a CRC32 checksum in the 4 bytes following data + length and verifies it.
static std::uint16_t SerializeFieldStructure(ROOT::Experimental::ENTupleStructure structure, void *buffer)
While we could just interpret the enums as ints, we make the translation explicit in order to avoid a...
static std::uint32_t SerializeUInt32(std::uint32_t val, void *buffer)
static RResult< void > DeserializeHeaderV1(const void *buffer, std::uint32_t bufSize, RNTupleDescriptorBuilder &descBuilder)
static RResult< std::uint32_t > DeserializeClusterSummary(const void *buffer, std::uint32_t bufSize, RClusterSummary &clusterSummary)
A helper class for piece-wise construction of an RClusterGroupDescriptor.
RClusterGroupDescriptorBuilder & ClusterGroupId(DescriptorId_t clusterGroupId)
RClusterGroupDescriptorBuilder & PageListLength(std::uint32_t pageListLength)
RClusterGroupDescriptorBuilder & PageListLocator(const RNTupleLocator &pageListLocator)
A helper class for piece-wise construction of an RColumnDescriptor.
RColumnDescriptorBuilder & Model(const RColumnModel &model)
RResult< RColumnDescriptor > MakeDescriptor() const
Attempt to make a column descriptor.
RColumnDescriptorBuilder & FieldId(DescriptorId_t fieldId)
RColumnDescriptorBuilder & Index(std::uint32_t index)
RColumnDescriptorBuilder & ColumnId(DescriptorId_t columnId)
Base class for all ROOT issued exceptions.
Definition RError.hxx:78
A helper class for piece-wise construction of an RFieldDescriptor.
RFieldDescriptorBuilder & FieldName(const std::string &fieldName)
RFieldDescriptorBuilder & NRepetitions(std::uint64_t nRepetitions)
RFieldDescriptorBuilder & Structure(const ENTupleStructure &structure)
RResult< RFieldDescriptor > MakeDescriptor() const
Attempt to make a field descriptor.
RFieldDescriptorBuilder & TypeName(const std::string &typeName)
RFieldDescriptorBuilder & TypeVersion(std::uint32_t typeVersion)
RFieldDescriptorBuilder & ParentId(DescriptorId_t id)
RFieldDescriptorBuilder & FieldDescription(const std::string &fieldDescription)
RFieldDescriptorBuilder & FieldVersion(std::uint32_t fieldVersion)
RFieldDescriptorBuilder & FieldId(DescriptorId_t fieldId)
Meta-data stored for every field of an ntuple.
A helper class for piece-wise construction of an RNTupleDescriptor.
void AddColumn(DescriptorId_t columnId, DescriptorId_t fieldId, const RColumnModel &model, std::uint32_t index)
void AddToOnDiskFooterSize(std::uint64_t size)
The real footer size also include the page list envelopes.
RResult< void > AddClusterSummary(DescriptorId_t clusterId, std::uint64_t firstEntry, std::uint64_t nEntries)
RResult< void > AddFieldLink(DescriptorId_t fieldId, DescriptorId_t linkId)
void SetNTuple(const std::string_view name, const std::string_view description)
void AddClusterGroup(RClusterGroupDescriptorBuilder &&clusterGroup)
void AddField(const RFieldDescriptor &fieldDesc)
The on-storage meta-data of an ntuple.
RClusterGroupDescriptorIterable GetClusterGroupIterable() const
DescriptorId_t GetFieldZeroId() const
Returns the logical parent of all top-level NTuple data fields.
RColumnDescriptorIterable GetColumnIterable(const RFieldDescriptor &fieldDesc) const
const RClusterDescriptor & GetClusterDescriptor(DescriptorId_t clusterId) const
RFieldDescriptorIterable GetFieldIterable(const RFieldDescriptor &fieldDesc) const
const RClusterGroupDescriptor & GetClusterGroupDescriptor(DescriptorId_t clusterGroupId) const
The class is used as a return type for operations that can fail; wraps a value of type T or an RError...
Definition RError.hxx:207
RLogChannel & NTupleLog()
Log channel for RNTuple diagnostics.
RClusterSize ClusterSize_t
ENTupleStructure
The fields in the ntuple model tree can carry different structural information about the type system.
std::uint64_t DescriptorId_t
Distriniguishes elements of the same type within a descriptor, e.g. different fields.
Records the parition of data into pages for a particular column in a particular cluster.
RNTupleLocator payload that is common for object stores using 64bit location information.
Generic information about the physical location of data.
ELocatorType
Values for the Type field in non-disk locators; see doc/specifications.md for details.
std::uint8_t fReserved
Reserved for use by concrete storage backends.
ELocatorType fType
For non-disk locators, the value for the Type field.
std::variant< std::uint64_t, std::string, RNTupleLocatorObject64 > fPosition
Simple on-disk locators consisting of a 64-bit offset use variant type uint64_t; extended locators ha...