Logo ROOT  
Reference Guide
 
Loading...
Searching...
No Matches
RNTupleDescriptor.cxx
Go to the documentation of this file.
1/// \file RNTupleDescriptor.cxx
2/// \ingroup NTuple ROOT7
3/// \author Jakob Blomer <jblomer@cern.ch>
4/// \date 2018-10-04
5/// \warning This is part of the ROOT 7 prototype! It will change without notice. It might trigger earthquakes. Feedback
6/// is welcome!
7
8/*************************************************************************
9 * Copyright (C) 1995-2019, Rene Brun and Fons Rademakers. *
10 * All rights reserved. *
11 * *
12 * For the licensing terms see $ROOTSYS/LICENSE. *
13 * For the list of contributors see $ROOTSYS/README/CREDITS. *
14 *************************************************************************/
15
16#include <ROOT/RError.hxx>
17#include <ROOT/RField.hxx>
19#include <ROOT/RNTupleModel.hxx>
20#include <ROOT/RNTupleUtil.hxx>
21#include <ROOT/RStringView.hxx>
22
23#include <RZip.h>
24#include <TError.h>
25
26#include <algorithm>
27#include <cstdint>
28#include <deque>
29#include <iostream>
30#include <set>
31#include <utility>
32
33namespace {
34
35/// The machine-independent serialization of meta-data wraps the header and footer as well as sub structures in
36/// frames. The frame layout is
37///
38/// -----------------------------------------------------------
39/// | TYPE | DESCRIPTION |
40/// |----------------------------------------------------------
41/// | std::uint16_t | Version used to write the frame |
42/// | std::uint16_t | Minimum version for reading the frame |
43/// | std::uint32_t | Length of the frame incl. preamble |
44/// -----------------------------------------------------------
45///
46/// In addition, the header and footer store a 4 byte CRC32 checksum of the frame immediately after the frame.
47/// The footer also repeats the frame size just before the CRC32 checksum. That means, one can read the last 8 bytes
48/// to determine the footer length, and the first 8 bytes to determine the header length.
49///
50/// Within the frames, integers of different lengths are stored in a machine-independent representation. Strings and
51/// vectors store the number of items followed by the items. Time stamps are stored in number of seconds since the
52/// UNIX epoch.
53
55
56std::uint32_t SerializeClusterSize(ROOT::Experimental::ClusterSize_t val, void *buffer)
57{
58 return SerializeUInt32(val, buffer);
59}
60
61std::uint32_t DeserializeClusterSize(const void *buffer, ROOT::Experimental::ClusterSize_t *val)
62{
63 std::uint32_t size;
64 auto nbytes = DeserializeUInt32(buffer, &size);
65 *val = size;
66 return nbytes;
67}
68
69std::uint32_t SerializeLocator(const ROOT::Experimental::RNTupleLocator &val, void *buffer)
70{
71 // In order to keep the meta-data small, we don't wrap the locator in a frame
72 if (buffer != nullptr) {
73 auto pos = reinterpret_cast<unsigned char *>(buffer);
74 pos += SerializeInt64(val.fPosition, pos);
75 pos += SerializeUInt32(val.fBytesOnStorage, pos);
76 pos += SerializeString(val.fUrl, pos);
77 }
78 return SerializeString(val.fUrl, nullptr) + 12;
79}
80
81std::uint32_t DeserializeLocator(const void *buffer, ROOT::Experimental::RNTupleLocator *val)
82{
83 auto bytes = reinterpret_cast<const unsigned char *>(buffer);
84 bytes += DeserializeInt64(bytes, &val->fPosition);
85 bytes += DeserializeUInt32(bytes, &val->fBytesOnStorage);
86 bytes += DeserializeString(bytes, &val->fUrl);
87 return SerializeString(val->fUrl, nullptr) + 12;
88}
89
90std::uint32_t SerializeFrame(std::uint16_t protocolVersionCurrent, std::uint16_t protocolVersionMin, void *buffer,
91 void **ptrSize)
92{
93 if (buffer != nullptr) {
94 auto pos = reinterpret_cast<unsigned char *>(buffer);
95 pos += SerializeUInt16(protocolVersionCurrent, pos); // The protocol version used to write the structure
96 pos += SerializeUInt16(protocolVersionMin, pos); // The minimum protocol version required to read the data
97 *ptrSize = pos;
98 pos += SerializeUInt32(0, pos); // placeholder for the size of the frame
99 }
100 return 8;
101}
102
103std::uint32_t DeserializeFrame(std::uint16_t protocolVersion, const void *buffer, std::uint32_t *size)
104{
105 auto bytes = reinterpret_cast<const unsigned char *>(buffer);
106 std::uint16_t protocolVersionAtWrite;
107 std::uint16_t protocolVersionMinRequired;
108 bytes += DeserializeUInt16(bytes, &protocolVersionAtWrite);
109 bytes += DeserializeUInt16(bytes, &protocolVersionMinRequired);
110 if (protocolVersion < protocolVersionMinRequired) {
111 throw ROOT::Experimental::RException(R__FAIL("RNTuple version too new (version "
112 + std::to_string(protocolVersionMinRequired)
113 + "), version <= " + std::to_string(protocolVersion) + " required"));
114 }
115 bytes += DeserializeUInt32(bytes, size);
116 return 8;
117}
118
119std::uint32_t SerializeVersion(const ROOT::Experimental::RNTupleVersion &val, void *buffer)
120{
121 auto base = reinterpret_cast<unsigned char *>((buffer != nullptr) ? buffer : 0);
122 auto pos = base;
123 void** where = (buffer == nullptr) ? &buffer : reinterpret_cast<void**>(&pos);
124
125 void *ptrSize = nullptr;
126 pos += SerializeFrame(0, 0, *where, &ptrSize);
127
128 pos += SerializeUInt32(val.GetVersionUse(), *where);
129 pos += SerializeUInt32(val.GetVersionMin(), *where);
130 pos += SerializeUInt64(val.GetFlags(), *where);
131
132 auto size = pos - base;
133 SerializeUInt32(size, ptrSize);
134 return size;
135}
136
137std::uint32_t DeserializeVersion(const void *buffer, ROOT::Experimental::RNTupleVersion *version)
138{
139 auto bytes = reinterpret_cast<const unsigned char *>(buffer);
140 std::uint32_t frameSize;
141 bytes += DeserializeFrame(0, bytes, &frameSize);
142
143 std::uint32_t versionUse;
144 std::uint32_t versionMin;
145 std::uint64_t flags;
146 bytes += DeserializeUInt32(bytes, &versionUse);
147 bytes += DeserializeUInt32(bytes, &versionMin);
148 bytes += DeserializeUInt64(bytes, &flags);
149 *version = ROOT::Experimental::RNTupleVersion(versionUse, versionMin, flags);
150
151 return frameSize;
152}
153
154std::uint32_t SerializeUuid(const ROOT::Experimental::RNTupleUuid &val, void *buffer)
155{
156 auto base = reinterpret_cast<unsigned char *>((buffer != nullptr) ? buffer : 0);
157 auto pos = base;
158 void** where = (buffer == nullptr) ? &buffer : reinterpret_cast<void**>(&pos);
159
160 void *ptrSize = nullptr;
161 pos += SerializeFrame(0, 0, *where, &ptrSize);
162
163 pos += SerializeString(val, *where);
164
165 auto size = pos - base;
166 SerializeUInt32(size, ptrSize);
167 return size;
168}
169
170std::uint32_t DeserializeUuid(const void *buffer, ROOT::Experimental::RNTupleUuid *uuid)
171{
172 auto bytes = reinterpret_cast<const unsigned char *>(buffer);
173 std::uint32_t frameSize;
174 bytes += DeserializeFrame(0, bytes, &frameSize);
175
176 bytes += DeserializeString(bytes, uuid);
177
178 return frameSize;
179}
180
181std::uint32_t SerializeColumnModel(const ROOT::Experimental::RColumnModel &val, void *buffer)
182{
183 auto base = reinterpret_cast<unsigned char *>((buffer != nullptr) ? buffer : 0);
184 auto pos = base;
185 void** where = (buffer == nullptr) ? &buffer : reinterpret_cast<void**>(&pos);
186
187 void *ptrSize = nullptr;
188 pos += SerializeFrame(0, 0, *where, &ptrSize);
189
190 pos += SerializeInt32(static_cast<int>(val.GetType()), *where);
191 pos += SerializeInt32(static_cast<int>(val.GetIsSorted()), *where);
192
193 auto size = pos - base;
194 SerializeUInt32(size, ptrSize);
195 return size;
196}
197
198std::uint32_t DeserializeColumnModel(const void *buffer, ROOT::Experimental::RColumnModel *columnModel)
199{
200 auto bytes = reinterpret_cast<const unsigned char *>(buffer);
201 std::uint32_t frameSize;
202 bytes += DeserializeFrame(0, bytes, &frameSize);
203
204 std::int32_t type;
205 std::int32_t isSorted;
206 bytes += DeserializeInt32(bytes, &type);
207 bytes += DeserializeInt32(bytes, &isSorted);
208 *columnModel = ROOT::Experimental::RColumnModel(static_cast<ROOT::Experimental::EColumnType>(type), isSorted);
209
210 return frameSize;
211}
212
213std::uint32_t SerializeTimeStamp(const std::chrono::system_clock::time_point &val, void *buffer)
214{
215 return SerializeInt64(std::chrono::system_clock::to_time_t(val), buffer);
216}
217
218std::uint32_t DeserializeTimeStamp(const void *buffer, std::chrono::system_clock::time_point *timeStamp)
219{
220 std::int64_t secSinceUnixEpoch;
221 auto size = DeserializeInt64(buffer, &secSinceUnixEpoch);
222 *timeStamp = std::chrono::system_clock::from_time_t(secSinceUnixEpoch);
223 return size;
224}
225
226std::uint32_t SerializeColumnRange(const ROOT::Experimental::RClusterDescriptor::RColumnRange &val, void *buffer)
227{
228 // To keep the cluster footers small, we don't put a frame around individual column ranges.
229 if (buffer != nullptr) {
230 auto pos = reinterpret_cast<unsigned char *>(buffer);
231 // The column id is stored in SerializeFooter() for the column range and the page range altogether
232 pos += SerializeUInt64(val.fFirstElementIndex, pos);
233 pos += SerializeClusterSize(val.fNElements, pos);
234 pos += SerializeInt64(val.fCompressionSettings, pos);
235 }
236 return 20;
237}
238
239std::uint32_t DeserializeColumnRange(const void *buffer,
241{
242 auto bytes = reinterpret_cast<const unsigned char *>(buffer);
243 // The column id is set elsewhere (see AddClustersFromFooter())
244 bytes += DeserializeUInt64(bytes, &columnRange->fFirstElementIndex);
245 bytes += DeserializeClusterSize(bytes, &columnRange->fNElements);
246 bytes += DeserializeInt64(bytes, &columnRange->fCompressionSettings);
247 return 20;
248}
249
250std::uint32_t SerializePageInfo(const ROOT::Experimental::RClusterDescriptor::RPageRange::RPageInfo &val, void *buffer)
251{
252 // To keep the cluster footers small, we don't put a frame around individual page infos.
253 if (buffer != nullptr) {
254 auto pos = reinterpret_cast<unsigned char *>(buffer);
255 // The column id is stored in SerializeFooter() for the column range and the page range altogether
256 pos += SerializeClusterSize(val.fNElements, pos);
257 pos += SerializeLocator(val.fLocator, pos);
258 }
259 return 4 + SerializeLocator(val.fLocator, nullptr);
260}
261
262std::uint32_t DeserializePageInfo(const void *buffer,
264{
265 auto base = reinterpret_cast<const unsigned char *>(buffer);
266 auto bytes = base;
267 // The column id is set elsewhere (see AddClustersFromFooter())
268 bytes += DeserializeClusterSize(bytes, &pageInfo->fNElements);
269 bytes += DeserializeLocator(bytes, &pageInfo->fLocator);
270 return bytes - base;
271}
272
273std::uint32_t SerializeCrc32(const unsigned char *data, std::uint32_t length, void *buffer)
274{
275 auto checksum = R__crc32(0, nullptr, 0);
276 if (buffer != nullptr) {
277 checksum = R__crc32(checksum, data, length);
278 SerializeUInt32(checksum, buffer);
279 }
280 return 4;
281}
282
283void VerifyCrc32(const unsigned char *data, std::uint32_t length)
284{
285 auto checksumReal = R__crc32(0, nullptr, 0);
286 checksumReal = R__crc32(checksumReal, data, length);
287 std::uint32_t checksumFound;
288 DeserializeUInt32(data + length, &checksumFound);
289 if (checksumFound != checksumReal)
290 throw ROOT::Experimental::RException(R__FAIL("CRC32 checksum mismatch"));
291}
292
293std::uint32_t SerializeField(const ROOT::Experimental::RFieldDescriptor &val, void *buffer)
294{
295 auto base = reinterpret_cast<unsigned char *>((buffer != nullptr) ? buffer : 0);
296 auto pos = base;
297 void** where = (buffer == nullptr) ? &buffer : reinterpret_cast<void**>(&pos);
298
299 void *ptrSize = nullptr;
302
303 pos += SerializeUInt64(val.GetId(), *where);
304 pos += SerializeVersion(val.GetFieldVersion(), *where);
305 pos += SerializeVersion(val.GetTypeVersion(), *where);
306 pos += SerializeString(val.GetFieldName(), *where);
307 pos += SerializeString(val.GetFieldDescription(), *where);
308 pos += SerializeString(val.GetTypeName(), *where);
309 pos += SerializeUInt64(val.GetNRepetitions(), *where);
310 pos += SerializeUInt32(static_cast<int>(val.GetStructure()), *where);
311 pos += SerializeUInt64(val.GetParentId(), *where);
312 pos += SerializeUInt32(val.GetLinkIds().size(), *where);
313 for (const auto& l : val.GetLinkIds())
314 pos += SerializeUInt64(l, *where);
315
316 auto size = pos - base;
317 SerializeUInt32(size, ptrSize);
318 return size;
319}
320
321std::uint32_t SerializeColumn(const ROOT::Experimental::RColumnDescriptor &val, void *buffer)
322{
323 auto base = reinterpret_cast<unsigned char *>((buffer != nullptr) ? buffer : 0);
324 auto pos = base;
325 void** where = (buffer == nullptr) ? &buffer : reinterpret_cast<void**>(&pos);
326
327 void *ptrSize = nullptr;
330
331 pos += SerializeUInt64(val.GetId(), *where);
332 pos += SerializeVersion(val.GetVersion(), *where);
333 pos += SerializeColumnModel(val.GetModel(), *where);
334 pos += SerializeUInt64(val.GetFieldId(), *where);
335 pos += SerializeUInt32(val.GetIndex(), *where);
336
337 auto size = pos - base;
338 SerializeUInt32(size, ptrSize);
339 return size;
340}
341
342std::uint32_t SerializeClusterSummary(const ROOT::Experimental::RClusterDescriptor &val, void *buffer)
343{
344 auto base = reinterpret_cast<unsigned char *>((buffer != nullptr) ? buffer : 0);
345 auto pos = base;
346 void** where = (buffer == nullptr) ? &buffer : reinterpret_cast<void**>(&pos);
347
348 void *ptrSize = nullptr;
351
352 pos += SerializeUInt64(val.GetId(), *where);
353 pos += SerializeVersion(val.GetVersion(), *where);
354 pos += SerializeUInt64(val.GetFirstEntryIndex(), *where);
355 pos += SerializeUInt64(val.GetNEntries(), *where);
356 pos += SerializeLocator({0, 1, ""}, *where); // reserved for later use.
357
358 auto size = pos - base;
359 SerializeUInt32(size, ptrSize);
360 return size;
361}
362
363} // anonymous namespace
364
365
367{
368 return fFieldId == other.fFieldId &&
369 fFieldVersion == other.fFieldVersion &&
370 fTypeVersion == other.fTypeVersion &&
371 fFieldName == other.fFieldName &&
373 fTypeName == other.fTypeName &&
374 fNRepetitions == other.fNRepetitions &&
375 fStructure == other.fStructure &&
376 fParentId == other.fParentId &&
377 fLinkIds == other.fLinkIds;
378}
379
382{
383 RFieldDescriptor clone;
384 clone.fFieldId = fFieldId;
385 clone.fFieldVersion = fFieldVersion;
386 clone.fTypeVersion = fTypeVersion;
387 clone.fFieldName = fFieldName;
388 clone.fFieldDescription = fFieldDescription;
389 clone.fTypeName = fTypeName;
390 clone.fNRepetitions = fNRepetitions;
391 clone.fStructure = fStructure;
392 clone.fParentId = fParentId;
393 clone.fLinkIds = fLinkIds;
394 return clone;
395}
396
397std::unique_ptr<ROOT::Experimental::Detail::RFieldBase>
399{
400 if (GetTypeName().empty() && GetStructure() == ENTupleStructure::kCollection) {
401 // For untyped collections, we have no class available to collect all the sub fields.
402 // Therefore, we create an untyped record field as an artifical binder for the collection items.
403 std::vector<std::unique_ptr<Detail::RFieldBase>> memberFields;
404 for (auto id : fLinkIds) {
405 const auto &memberDesc = ntplDesc.GetFieldDescriptor(id);
406 memberFields.emplace_back(memberDesc.CreateField(ntplDesc));
407 }
408 auto recordField = std::make_unique<RRecordField>("_0", memberFields);
409 auto collectionField = std::make_unique<RVectorField>(GetFieldName(), std::move(recordField));
410 collectionField->SetOnDiskId(fFieldId);
411 return collectionField;
412 }
413
414 auto field = Detail::RFieldBase::Create(GetFieldName(), GetTypeName()).Unwrap();
415 field->SetOnDiskId(fFieldId);
416 for (auto &f : *field)
417 f.SetOnDiskId(ntplDesc.FindFieldId(f.GetName(), f.GetParent()->GetOnDiskId()));
418 return field;
419}
420
421
422////////////////////////////////////////////////////////////////////////////////
423
424
426{
427 return fColumnId == other.fColumnId &&
428 fVersion == other.fVersion &&
429 fModel == other.fModel &&
430 fFieldId == other.fFieldId &&
431 fIndex == other.fIndex;
432}
433
434
437{
438 RColumnDescriptor clone;
439 clone.fColumnId = fColumnId;
440 clone.fVersion = fVersion;
441 clone.fModel = fModel;
442 clone.fFieldId = fFieldId;
443 clone.fIndex = fIndex;
444 return clone;
445}
446
447
448////////////////////////////////////////////////////////////////////////////////
449
450
453{
454 // TODO(jblomer): binary search
455 RPageInfo pageInfo;
456 decltype(idxInCluster) firstInPage = 0;
457 NTupleSize_t pageNo = 0;
458 for (const auto &pi : fPageInfos) {
459 if (firstInPage + pi.fNElements > idxInCluster) {
460 pageInfo = pi;
461 break;
462 }
463 firstInPage += pi.fNElements;
464 ++pageNo;
465 }
466 R__ASSERT(firstInPage <= idxInCluster);
467 R__ASSERT((firstInPage + pageInfo.fNElements) > idxInCluster);
468 return RPageInfoExtended{pageInfo, firstInPage, pageNo};
469}
470
471
473{
474 return fClusterId == other.fClusterId &&
475 fVersion == other.fVersion &&
476 fFirstEntryIndex == other.fFirstEntryIndex &&
477 fNEntries == other.fNEntries &&
478 fColumnRanges == other.fColumnRanges &&
479 fPageRanges == other.fPageRanges;
480}
481
482
483std::unordered_set<ROOT::Experimental::DescriptorId_t> ROOT::Experimental::RClusterDescriptor::GetColumnIds() const
484{
485 std::unordered_set<DescriptorId_t> result;
486 for (const auto &x : fColumnRanges)
487 result.emplace(x.first);
488 return result;
489}
490
491
493{
494 return fColumnRanges.find(columnId) != fColumnRanges.end();
495}
496
497
499{
500 std::uint64_t nbytes = 0;
501 for (const auto &pr : fPageRanges) {
502 for (const auto &pi : pr.second.fPageInfos) {
503 nbytes += pi.fLocator.fBytesOnStorage;
504 }
505 }
506 return nbytes;
507}
508
509
510////////////////////////////////////////////////////////////////////////////////
511
512
514{
515 return fName == other.fName &&
516 fDescription == other.fDescription &&
517 fAuthor == other.fAuthor &&
518 fCustodian == other.fCustodian &&
519 fTimeStampData == other.fTimeStampData &&
520 fTimeStampWritten == other.fTimeStampWritten &&
521 fVersion == other.fVersion &&
522 fOwnUuid == other.fOwnUuid &&
523 fGroupUuid == other.fGroupUuid &&
524 fFieldDescriptors == other.fFieldDescriptors &&
525 fColumnDescriptors == other.fColumnDescriptors &&
526 fClusterDescriptors == other.fClusterDescriptors;
527}
528
529
531{
532 auto base = reinterpret_cast<unsigned char *>((buffer != nullptr) ? buffer : 0);
533 auto pos = base;
534 void** where = (buffer == nullptr) ? &buffer : reinterpret_cast<void**>(&pos);
535
536 void *ptrSize = nullptr;
537 pos += SerializeFrame(
539 pos += SerializeUInt64(0, *where); // reserved; can be at some point used, e.g., for compression flags
540
541 pos += SerializeString(fName, *where);
542 pos += SerializeString(fDescription, *where);
543 pos += SerializeString(fAuthor, *where);
544 pos += SerializeString(fCustodian, *where);
545 pos += SerializeTimeStamp(fTimeStampData, *where);
546 pos += SerializeTimeStamp(fTimeStampWritten, *where);
547 pos += SerializeVersion(fVersion, *where);
548 pos += SerializeUuid(fOwnUuid, *where);
549 pos += SerializeUuid(fGroupUuid, *where);
550 pos += SerializeUInt32(fFieldDescriptors.size(), *where);
551 for (const auto& f : fFieldDescriptors) {
552 pos += SerializeField(f.second, *where);
553 }
554 pos += SerializeUInt32(fColumnDescriptors.size(), *where);
555 for (const auto& c : fColumnDescriptors) {
556 pos += SerializeColumn(c.second, *where);
557 }
558
559 std::uint32_t size = pos - base;
560 SerializeUInt32(size, ptrSize);
561 size += SerializeCrc32(base, size, *where);
562
563 return size;
564}
565
567{
568 auto base = reinterpret_cast<unsigned char *>((buffer != nullptr) ? buffer : 0);
569 auto pos = base;
570 void** where = (buffer == nullptr) ? &buffer : reinterpret_cast<void**>(&pos);
571
572 void *ptrSize = nullptr;
573 pos += SerializeFrame(
575 pos += SerializeUInt64(0, *where); // reserved; can be at some point used, e.g., for compression flags
576
577 pos += SerializeUInt64(fClusterDescriptors.size(), *where);
578 for (const auto& cluster : fClusterDescriptors) {
579 pos += SerializeUuid(fOwnUuid, *where); // in order to verify that header and footer belong together
580 pos += SerializeClusterSummary(cluster.second, *where);
581
582 pos += SerializeUInt32(fColumnDescriptors.size(), *where);
583 for (const auto& column : fColumnDescriptors) {
584 auto columnId = column.first;
585 pos += SerializeUInt64(columnId, *where);
586
587 const auto &columnRange = cluster.second.GetColumnRange(columnId);
588 R__ASSERT(columnRange.fColumnId == columnId);
589 pos += SerializeColumnRange(columnRange, *where);
590
591 const auto &pageRange = cluster.second.GetPageRange(columnId);
592 R__ASSERT(pageRange.fColumnId == columnId);
593 auto nPages = pageRange.fPageInfos.size();
594 pos += SerializeUInt32(nPages, *where);
595 for (unsigned int i = 0; i < nPages; ++i) {
596 pos += SerializePageInfo(pageRange.fPageInfos[i], *where);
597 }
598 }
599 }
600
601 // The next 16 bytes make the ntuple's postscript
602 pos += SerializeUInt16(kFrameVersionCurrent, *where);
603 pos += SerializeUInt16(kFrameVersionMin, *where);
604 // Add the CRC32 bytes to the header and footer sizes
605 pos += SerializeUInt32(GetHeaderSize(), *where);
606 std::uint32_t size = pos - base + 4;
607 pos += SerializeUInt32(size + 4, *where);
608 size += SerializeCrc32(base, size, *where);
609
610 return size;
611}
612
613
615 const void *postscript, std::uint32_t &szHeader, std::uint32_t &szFooter)
616{
617 auto pos = reinterpret_cast<const unsigned char *>(postscript);
618 std::uint16_t dummy;
619 pos += DeserializeUInt16(pos, &dummy);
620 pos += DeserializeUInt16(pos, &dummy);
621 pos += DeserializeUInt32(pos, &szHeader);
622 pos += DeserializeUInt32(pos, &szFooter);
623}
624
625
627{
628 NTupleSize_t result = 0;
629 for (const auto &cd : fClusterDescriptors) {
630 result = std::max(result, cd.second.GetFirstEntryIndex() + cd.second.GetNEntries());
631 }
632 return result;
633}
634
636{
637 NTupleSize_t result = 0;
638 for (const auto &cd : fClusterDescriptors) {
639 if (!cd.second.ContainsColumn(columnId))
640 continue;
641 auto columnRange = cd.second.GetColumnRange(columnId);
642 result = std::max(result, columnRange.fFirstElementIndex + columnRange.fNElements);
643 }
644 return result;
645}
646
647
650{
651 std::string leafName(fieldName);
652 auto posDot = leafName.find_last_of('.');
653 if (posDot != std::string::npos) {
654 auto parentName = leafName.substr(0, posDot);
655 leafName = leafName.substr(posDot + 1);
656 parentId = FindFieldId(parentName, parentId);
657 }
658 for (const auto &fd : fFieldDescriptors) {
659 if (fd.second.GetParentId() == parentId && fd.second.GetFieldName() == leafName)
660 return fd.second.GetId();
661 }
663}
664
665
667{
668 if (fieldId == kInvalidDescriptorId)
669 return "";
670
671 const auto &fieldDescriptor = fFieldDescriptors.at(fieldId);
672 auto prefix = GetQualifiedFieldName(fieldDescriptor.GetParentId());
673 if (prefix.empty())
674 return fieldDescriptor.GetFieldName();
675 return prefix + "." + fieldDescriptor.GetFieldName();
676}
677
678
681{
682 return FindFieldId("", kInvalidDescriptorId);
683}
684
685
688{
689 return FindFieldId(fieldName, GetFieldZeroId());
690}
691
692
695{
696 for (const auto &cd : fColumnDescriptors) {
697 if (cd.second.GetFieldId() == fieldId && cd.second.GetIndex() == columnIndex)
698 return cd.second.GetId();
699 }
701}
702
703
706{
707 // TODO(jblomer): binary search?
708 for (const auto &cd : fClusterDescriptors) {
709 if (!cd.second.ContainsColumn(columnId))
710 continue;
711 auto columnRange = cd.second.GetColumnRange(columnId);
712 if (columnRange.Contains(index))
713 return cd.second.GetId();
714 }
716}
717
718
719// TODO(jblomer): fix for cases of sharded clasters
722{
723 const auto &clusterDesc = GetClusterDescriptor(clusterId);
724 auto firstEntryInNextCluster = clusterDesc.GetFirstEntryIndex() + clusterDesc.GetNEntries();
725 // TODO(jblomer): binary search?
726 for (const auto &cd : fClusterDescriptors) {
727 if (cd.second.GetFirstEntryIndex() == firstEntryInNextCluster)
728 return cd.second.GetId();
729 }
731}
732
733
734// TODO(jblomer): fix for cases of sharded clasters
737{
738 const auto &clusterDesc = GetClusterDescriptor(clusterId);
739 // TODO(jblomer): binary search?
740 for (const auto &cd : fClusterDescriptors) {
741 if (cd.second.GetFirstEntryIndex() + cd.second.GetNEntries() == clusterDesc.GetFirstEntryIndex())
742 return cd.second.GetId();
743 }
745}
746
747
748std::unique_ptr<ROOT::Experimental::RNTupleModel> ROOT::Experimental::RNTupleDescriptor::GenerateModel() const
749{
750 auto model = std::make_unique<RNTupleModel>();
751 model->GetFieldZero()->SetOnDiskId(GetFieldZeroId());
752 for (const auto &topDesc : GetTopLevelFields())
753 model->AddField(topDesc.CreateField(*this));
754 return model;
755}
756
757
758////////////////////////////////////////////////////////////////////////////////
759
760
763 DescriptorId_t columnId, std::uint64_t firstElementIndex, std::uint32_t compressionSettings,
764 const RClusterDescriptor::RPageRange &pageRange)
765{
766 if (columnId != pageRange.fColumnId)
767 return R__FAIL("column ID mismatch");
768 if (fCluster.fPageRanges.count(columnId) > 0)
769 return R__FAIL("column ID conflict");
770 RClusterDescriptor::RColumnRange columnRange{columnId, firstElementIndex, RClusterSize(0)};
771 columnRange.fCompressionSettings = compressionSettings;
772 for (const auto &pi : pageRange.fPageInfos) {
773 columnRange.fNElements += pi.fNElements;
774 }
775 fCluster.fPageRanges[columnId] = pageRange.Clone();
776 fCluster.fColumnRanges[columnId] = columnRange;
777 return RResult<void>::Success();
778}
779
780
783{
784 if (fCluster.fClusterId == kInvalidDescriptorId)
785 return R__FAIL("unset cluster ID");
786 if (fCluster.fNEntries == 0)
787 return R__FAIL("empty cluster");
788 for (const auto &pr : fCluster.fPageRanges) {
789 if (fCluster.fColumnRanges.count(pr.first) == 0) {
790 return R__FAIL("missing column range");
791 }
792 }
793 RClusterDescriptor result;
794 std::swap(result, fCluster);
795 return result;
796}
797
798
799////////////////////////////////////////////////////////////////////////////////
800
801
804 if (fDescriptor.fFieldDescriptors.count(fieldId) == 0)
805 return R__FAIL("field with id '" + std::to_string(fieldId) + "' doesn't exist");
806 return RResult<void>::Success();
807}
808
811 // Reuse field name validity check
812 auto validName = Detail::RFieldBase::EnsureValidFieldName(fDescriptor.GetName());
813 if (!validName) {
814 return R__FORWARD_ERROR(validName);
815 }
816 // open-ended list of invariant checks
817 for (const auto& key_val: fDescriptor.fFieldDescriptors) {
818 const auto& id = key_val.first;
819 const auto& desc = key_val.second;
820 // parent not properly set
821 if (id != DescriptorId_t(0) && desc.GetParentId() == kInvalidDescriptorId) {
822 return R__FAIL("field with id '" + std::to_string(id) + "' has an invalid parent id");
823 }
824 }
825 return RResult<void>::Success();
826}
827
829{
830 RNTupleDescriptor result;
831 std::swap(result, fDescriptor);
832 return result;
833}
834
836{
837 auto pos = reinterpret_cast<unsigned char *>(headerBuffer);
838 auto base = pos;
839
840 std::uint32_t frameSize;
841 pos += DeserializeFrame(RNTupleDescriptor::kFrameVersionCurrent, base, &frameSize);
842 VerifyCrc32(base, frameSize);
843 std::uint64_t reserved;
844 pos += DeserializeUInt64(pos, &reserved);
845
846 pos += DeserializeString(pos, &fDescriptor.fName);
847 pos += DeserializeString(pos, &fDescriptor.fDescription);
848 pos += DeserializeString(pos, &fDescriptor.fAuthor);
849 pos += DeserializeString(pos, &fDescriptor.fCustodian);
850 pos += DeserializeTimeStamp(pos, &fDescriptor.fTimeStampData);
851 pos += DeserializeTimeStamp(pos, &fDescriptor.fTimeStampWritten);
852 pos += DeserializeVersion(pos, &fDescriptor.fVersion);
853 pos += DeserializeUuid(pos, &fDescriptor.fOwnUuid);
854 pos += DeserializeUuid(pos, &fDescriptor.fGroupUuid);
855
856 std::uint32_t nFields;
857 pos += DeserializeUInt32(pos, &nFields);
858 for (std::uint32_t i = 0; i < nFields; ++i) {
859 auto fieldBase = pos;
860 pos += DeserializeFrame(RFieldDescriptor::kFrameVersionCurrent, fieldBase, &frameSize);
861
863 pos += DeserializeUInt64(pos, &f.fFieldId);
864 pos += DeserializeVersion(pos, &f.fFieldVersion);
865 pos += DeserializeVersion(pos, &f.fTypeVersion);
866 pos += DeserializeString(pos, &f.fFieldName);
867 pos += DeserializeString(pos, &f.fFieldDescription);
868 pos += DeserializeString(pos, &f.fTypeName);
869 pos += DeserializeUInt64(pos, &f.fNRepetitions);
870 std::int32_t structure;
871 pos += DeserializeInt32(pos, &structure);
872 f.fStructure = static_cast<ENTupleStructure>(structure);
873 pos += DeserializeUInt64(pos, &f.fParentId);
874
875 std::uint32_t nLinks;
876 pos += DeserializeUInt32(pos, &nLinks);
877 f.fLinkIds.resize(nLinks);
878 for (std::uint32_t j = 0; j < nLinks; ++j) {
879 pos += DeserializeUInt64(pos, &f.fLinkIds[j]);
880 }
881
882 pos = fieldBase + frameSize;
883 fDescriptor.fFieldDescriptors.emplace(f.fFieldId, std::move(f));
884 }
885
886 std::uint32_t nColumns;
887 pos += DeserializeUInt32(pos, &nColumns);
888 for (std::uint32_t i = 0; i < nColumns; ++i) {
889 auto columnBase = pos;
890 pos += DeserializeFrame(RColumnDescriptor::kFrameVersionCurrent, columnBase, &frameSize);
891
893 pos += DeserializeUInt64(pos, &c.fColumnId);
894 pos += DeserializeVersion(pos, &c.fVersion);
895 pos += DeserializeColumnModel(pos, &c.fModel);
896 pos += DeserializeUInt64(pos, &c.fFieldId);
897 pos += DeserializeUInt32(pos, &c.fIndex);
898
899 pos = columnBase + frameSize;
900 fDescriptor.fColumnDescriptors.emplace(c.fColumnId, std::move(c));
901 }
902}
903
905 auto pos = reinterpret_cast<unsigned char *>(footerBuffer);
906 auto base = pos;
907
908 std::uint32_t frameSize;
909 pos += DeserializeFrame(RNTupleDescriptor::kFrameVersionCurrent, pos, &frameSize);
910 VerifyCrc32(base, frameSize);
911 std::uint64_t reserved;
912 pos += DeserializeUInt64(pos, &reserved);
913
914 std::uint64_t nClusters;
915 pos += DeserializeUInt64(pos, &nClusters);
916 for (std::uint64_t i = 0; i < nClusters; ++i) {
917 RNTupleUuid uuid;
918 pos += DeserializeUuid(pos, &uuid);
919 R__ASSERT(uuid == fDescriptor.fOwnUuid);
920 auto clusterBase = pos;
921 pos += DeserializeFrame(RClusterDescriptor::kFrameVersionCurrent, clusterBase, &frameSize);
922
923 std::uint64_t clusterId;
924 RNTupleVersion version;
925 std::uint64_t firstEntry;
926 std::uint64_t nEntries;
927 pos += DeserializeUInt64(pos, &clusterId);
928 pos += DeserializeVersion(pos, &version);
929 pos += DeserializeUInt64(pos, &firstEntry);
930 pos += DeserializeUInt64(pos, &nEntries);
931 AddCluster(clusterId, version, firstEntry, ROOT::Experimental::ClusterSize_t(nEntries));
932 RNTupleLocator locator;
933 pos += DeserializeLocator(pos, &locator); // unused
934
935 pos = clusterBase + frameSize;
936
937 std::uint32_t nColumns;
938 pos += DeserializeUInt32(pos, &nColumns);
939 for (std::uint32_t j = 0; j < nColumns; ++j) {
940 uint64_t columnId;
941 pos += DeserializeUInt64(pos, &columnId);
942
944 columnRange.fColumnId = columnId;
945 pos += DeserializeColumnRange(pos, &columnRange);
946 AddClusterColumnRange(clusterId, columnRange);
947
949 pageRange.fColumnId = columnId;
950 std::uint32_t nPages;
951 pos += DeserializeUInt32(pos, &nPages);
952 for (unsigned int k = 0; k < nPages; ++k) {
954 pos += DeserializePageInfo(pos, &pageInfo);
955 pageRange.fPageInfos.emplace_back(pageInfo);
956 }
957 AddClusterPageRange(clusterId, std::move(pageRange));
958 }
959 }
960}
961
963 const std::string_view name, const std::string_view description, const std::string_view author,
964 const RNTupleVersion &version, const RNTupleUuid &uuid)
965{
966 fDescriptor.fName = std::string(name);
967 fDescriptor.fDescription = std::string(description);
968 fDescriptor.fAuthor = std::string(author);
969 fDescriptor.fVersion = version;
970 fDescriptor.fOwnUuid = uuid;
971 fDescriptor.fGroupUuid = uuid;
972}
973
976{
977 if (fColumn.GetId() == kInvalidDescriptorId)
978 return R__FAIL("invalid column id");
979 if (fColumn.GetModel().GetType() == EColumnType::kUnknown)
980 return R__FAIL("invalid column model");
981 if (fColumn.GetFieldId() == kInvalidDescriptorId)
982 return R__FAIL("invalid field id, dangling column");
983 return fColumn.Clone();
984}
985
987 const RFieldDescriptor& fieldDesc) : fField(fieldDesc.Clone())
988{
990 fField.fLinkIds = {};
991}
992
995 RFieldDescriptorBuilder fieldDesc;
996 fieldDesc.FieldVersion(field.GetFieldVersion())
998 .FieldName(field.GetName())
1000 .TypeName(field.GetType())
1001 .Structure(field.GetStructure())
1002 .NRepetitions(field.GetNRepetitions());
1003 return fieldDesc;
1004}
1005
1008 if (fField.GetId() == kInvalidDescriptorId) {
1009 return R__FAIL("invalid field id");
1010 }
1011 if (fField.GetStructure() == ENTupleStructure::kInvalid) {
1012 return R__FAIL("invalid field structure");
1013 }
1014 // FieldZero is usually named "" and would be a false positive here
1015 if (fField.GetParentId() != kInvalidDescriptorId) {
1016 auto validName = Detail::RFieldBase::EnsureValidFieldName(fField.GetFieldName());
1017 if (!validName) {
1018 return R__FORWARD_ERROR(validName);
1019 }
1020 }
1021 return fField.Clone();
1022}
1023
1025 fDescriptor.fFieldDescriptors.emplace(fieldDesc.GetId(), fieldDesc.Clone());
1026}
1027
1030{
1031 auto fieldExists = RResult<void>::Success();
1032 if (!(fieldExists = EnsureFieldExists(fieldId)))
1033 return R__FORWARD_ERROR(fieldExists);
1034 if (!(fieldExists = EnsureFieldExists(linkId)))
1035 return R__FAIL("child field with id '" + std::to_string(linkId) + "' doesn't exist in NTuple");
1036
1037 if (linkId == fDescriptor.GetFieldZeroId()) {
1038 return R__FAIL("cannot make FieldZero a child field");
1039 }
1040 // fail if field already has another valid parent
1041 auto parentId = fDescriptor.fFieldDescriptors.at(linkId).GetParentId();
1042 if ((parentId != kInvalidDescriptorId) && (parentId != fieldId)) {
1043 return R__FAIL("field '" + std::to_string(linkId) + "' already has a parent ('" +
1044 std::to_string(parentId) + ")");
1045 }
1046 if (fieldId == linkId) {
1047 return R__FAIL("cannot make field '" + std::to_string(fieldId) + "' a child of itself");
1048 }
1049 fDescriptor.fFieldDescriptors.at(linkId).fParentId = fieldId;
1050 fDescriptor.fFieldDescriptors.at(fieldId).fLinkIds.push_back(linkId);
1051 return RResult<void>::Success();
1052}
1053
1055 DescriptorId_t columnId, DescriptorId_t fieldId, const RNTupleVersion &version, const RColumnModel &model,
1056 std::uint32_t index)
1057{
1059 c.fColumnId = columnId;
1060 c.fFieldId = fieldId;
1061 c.fVersion = version;
1062 c.fModel = model;
1063 c.fIndex = index;
1064 fDescriptor.fColumnDescriptors.emplace(columnId, std::move(c));
1065}
1066
1067
1070{
1071 const auto fieldId = columnDesc.GetFieldId();
1072 const auto index = columnDesc.GetIndex();
1073
1074 auto fieldExists = EnsureFieldExists(fieldId);
1075 if (!fieldExists)
1076 return R__FORWARD_ERROR(fieldExists);
1077 if (fDescriptor.FindColumnId(fieldId, index) != kInvalidDescriptorId) {
1078 return R__FAIL("column index clash");
1079 }
1080 if (index > 0) {
1081 if (fDescriptor.FindColumnId(fieldId, index - 1) == kInvalidDescriptorId)
1082 return R__FAIL("out of bounds column index");
1083 }
1084
1085 fDescriptor.fColumnDescriptors.emplace(columnDesc.GetId(), std::move(columnDesc));
1086
1087 return RResult<void>::Success();
1088}
1089
1090
1092 DescriptorId_t clusterId, RNTupleVersion version, NTupleSize_t firstEntryIndex, ClusterSize_t nEntries)
1093{
1095 c.fClusterId = clusterId;
1096 c.fVersion = version;
1097 c.fFirstEntryIndex = firstEntryIndex;
1098 c.fNEntries = nEntries;
1099 fDescriptor.fClusterDescriptors.emplace(clusterId, std::move(c));
1100}
1101
1103 DescriptorId_t clusterId, const RClusterDescriptor::RColumnRange &columnRange)
1104{
1105 fDescriptor.fClusterDescriptors[clusterId].fColumnRanges[columnRange.fColumnId] = columnRange;
1106}
1107
1109 DescriptorId_t clusterId, RClusterDescriptor::RPageRange &&pageRange)
1110{
1111 fDescriptor.fClusterDescriptors[clusterId].fPageRanges.emplace(pageRange.fColumnId, std::move(pageRange));
1112}
1113
1116{
1117 fClusterSummaries.push_back(clusterSummary);
1118}
1119
1122{
1123 fClusterGroups.push_back(clusterGroup);
1124}
1125
1128 DescriptorId_t clusterId, RClusterDescriptorBuilder &&partialCluster)
1129{
1130 if (clusterId >= fClusterSummaries.size())
1131 return R__FAIL("unknown cluster id");
1132 if (fDescriptor.fClusterDescriptors.count(clusterId) != 0)
1133 return R__FAIL("cluster clash");
1134 const auto &summary = fClusterSummaries.at(clusterId);
1135 partialCluster.ClusterId(clusterId)
1136 .FirstEntryIndex(summary.fFirstEntry)
1137 .NEntries(summary.fNEntries);
1138 auto cluster = partialCluster.MoveDescriptor();
1139 if (!cluster)
1140 return R__FORWARD_ERROR(cluster);
1141 fDescriptor.fClusterDescriptors.emplace(clusterId, cluster.Unwrap());
1142 return RResult<void>::Success();
1143}
1144
1146{
1147 fDescriptor.fName = "";
1148 fDescriptor.fVersion = RNTupleVersion();
1149 fDescriptor.fFieldDescriptors.clear();
1150 fDescriptor.fColumnDescriptors.clear();
1151 fDescriptor.fClusterDescriptors.clear();
1152}
typedef void(GLAPIENTRYP _GLUfuncptr)(void)
#define R__FORWARD_ERROR(res)
Short-hand to return an RResult<T> in an error state (i.e. after checking)
Definition RError.hxx:295
#define R__FAIL(msg)
Short-hand to return an RResult<T> in an error state; the RError is implicitly converted into RResult...
Definition RError.hxx:291
#define f(i)
Definition RSha256.hxx:104
#define c(i)
Definition RSha256.hxx:101
size_t size(const MatrixT &matrix)
retrieve the size of a square matrix
#define R__ASSERT(e)
Definition TError.h:118
char name[80]
Definition TGX11.cxx:110
int type
Definition TGX11.cxx:121
std::string GetDescription() const
Get the field's description.
Definition RField.hxx:244
std::size_t GetNRepetitions() const
Definition RField.hxx:238
virtual RNTupleVersion GetTypeVersion() const
Indicates an evolution of the C++ type itself.
Definition RField.hxx:258
static RResult< void > EnsureValidFieldName(std::string_view fieldName)
Check whether a given string is a valid field name.
Definition RField.cxx:232
static RResult< std::unique_ptr< RFieldBase > > Create(const std::string &fieldName, const std::string &typeName)
Factory method to resurrect a field from the stored on-disk type information.
Definition RField.cxx:149
virtual RNTupleVersion GetFieldVersion() const
Indicates an evolution of the mapping scheme from C++ type to columns.
Definition RField.hxx:256
ENTupleStructure GetStructure() const
Definition RField.hxx:237
The available trivial, native content types of a column.
A helper class for piece-wise construction of an RClusterDescriptor.
RResult< void > CommitColumnRange(DescriptorId_t columnId, std::uint64_t firstElementIndex, std::uint32_t compressionSettings, const RClusterDescriptor::RPageRange &pageRange)
RResult< RClusterDescriptor > MoveDescriptor()
Attempt to make a cluster descriptor.
Meta-data for a set of ntuple clusters.
std::unordered_map< DescriptorId_t, RPageRange > fPageRanges
RNTupleVersion fVersion
Future versions of the cluster descriptor might add more meta-data, e.g. a semantic checksum.
bool ContainsColumn(DescriptorId_t columnId) const
static constexpr std::uint16_t kFrameVersionMin
NTupleSize_t fFirstEntryIndex
Clusters can be swapped by adjusting the entry offsets.
std::unordered_set< DescriptorId_t > GetColumnIds() const
std::unordered_map< DescriptorId_t, RColumnRange > fColumnRanges
bool operator==(const RClusterDescriptor &other) const
static constexpr std::uint16_t kFrameVersionCurrent
In order to handle changes to the serialization routine in future ntuple versions.
RResult< RColumnDescriptor > MakeDescriptor() const
Attempt to make a column descriptor.
Meta-data stored for every column of an ntuple.
static constexpr std::uint16_t kFrameVersionCurrent
In order to handle changes to the serialization routine in future ntuple versions.
RColumnDescriptor Clone() const
Get a copy of the descriptor.
DescriptorId_t fFieldId
Every column belongs to one and only one field.
RColumnModel fModel
Contains the column type and whether it is sorted.
static constexpr std::uint16_t kFrameVersionMin
RNTupleVersion fVersion
Versions can change, e.g., when new column types are added.
std::uint32_t fIndex
A field can be serialized into several columns, which are numbered from zero to $n$.
bool operator==(const RColumnDescriptor &other) const
Holds the static meta-data of a column in a tree.
Base class for all ROOT issued exceptions.
Definition RError.hxx:114
A helper class for piece-wise construction of an RFieldDescriptor.
RFieldDescriptorBuilder & FieldName(const std::string &fieldName)
RFieldDescriptorBuilder & NRepetitions(std::uint64_t nRepetitions)
static RFieldDescriptorBuilder FromField(const Detail::RFieldBase &field)
Make a new RFieldDescriptorBuilder based off a live NTuple field.
RFieldDescriptorBuilder & FieldVersion(const RNTupleVersion &fieldVersion)
RFieldDescriptorBuilder & Structure(const ENTupleStructure &structure)
RResult< RFieldDescriptor > MakeDescriptor() const
Attempt to make a field descriptor.
RFieldDescriptorBuilder & TypeName(const std::string &typeName)
RFieldDescriptorBuilder & FieldDescription(const std::string &fieldDescription)
RFieldDescriptorBuilder & TypeVersion(const RNTupleVersion &typeVersion)
RFieldDescriptorBuilder()=default
Make an empty dangling field descriptor.
Meta-data stored for every field of an ntuple.
std::vector< DescriptorId_t > fLinkIds
The pointers in the other direction from parent to children.
RNTupleVersion fFieldVersion
The version of the C++-type-to-column translation mechanics.
std::unique_ptr< Detail::RFieldBase > CreateField(const RNTupleDescriptor &ntplDesc) const
In general, we create a field simply from the C++ type name.
std::string fFieldDescription
Free text set by the user.
static constexpr std::uint16_t kFrameVersionMin
std::string fFieldName
The leaf name, not including parent fields.
const std::vector< DescriptorId_t > & GetLinkIds() const
DescriptorId_t fParentId
Establishes sub field relationships, such as classes and collections.
RNTupleVersion fTypeVersion
The version of the C++ type itself.
RFieldDescriptor Clone() const
Get a copy of the descriptor.
bool operator==(const RFieldDescriptor &other) const
ENTupleStructure fStructure
The structural information carried by this field in the data model tree.
std::string fTypeName
The C++ type that was used when writing the field.
std::uint64_t fNRepetitions
The number of elements per entry for fixed-size arrays.
static constexpr std::uint16_t kFrameVersionCurrent
In order to handle changes to the serialization routine in future ntuple versions.
RResult< void > EnsureValidDescriptor() const
Checks whether invariants hold:
void AddClusterSummary(Internal::RNTupleSerializer::RClusterSummary &clusterSummary)
RResult< void > EnsureFieldExists(DescriptorId_t fieldId) const
void AddCluster(DescriptorId_t clusterId, RNTupleVersion version, NTupleSize_t firstEntryIndex, ClusterSize_t nEntries)
RResult< void > AddFieldLink(DescriptorId_t fieldId, DescriptorId_t linkId)
void AddColumn(DescriptorId_t columnId, DescriptorId_t fieldId, const RNTupleVersion &version, const RColumnModel &model, std::uint32_t index)
void Reset()
Clears so-far stored clusters, fields, and columns and return to a pristine ntuple descriptor.
void AddClusterColumnRange(DescriptorId_t clusterId, const RClusterDescriptor::RColumnRange &columnRange)
void AddClusterGroup(Internal::RNTupleSerializer::RClusterGroup &clusterGroup)
void SetNTuple(const std::string_view name, const std::string_view description, const std::string_view author, const RNTupleVersion &version, const RNTupleUuid &uuid)
void AddClusterPageRange(DescriptorId_t clusterId, RClusterDescriptor::RPageRange &&pageRange)
void AddField(const RFieldDescriptor &fieldDesc)
The on-storage meta-data of an ntuple.
std::unordered_map< DescriptorId_t, RClusterDescriptor > fClusterDescriptors
May contain only a subset of all the available clusters, e.g.
RNTupleUuid fGroupUuid
Column sets that are created as derived sets from existing NTuples share the same group id.
std::unique_ptr< RNTupleModel > GenerateModel() const
Re-create the C++ model from the stored meta-data.
std::chrono::system_clock::time_point fTimeStampWritten
The time stamp of writing the data to storage, which gets updated when re-written.
DescriptorId_t FindNextClusterId(DescriptorId_t clusterId) const
std::uint32_t SerializeHeader(void *buffer) const
We deliberately do not use ROOT's built-in serialization in order to allow for use of RNTuple's witho...
DescriptorId_t FindPrevClusterId(DescriptorId_t clusterId) const
DescriptorId_t GetFieldZeroId() const
Returns the logical parent of all top-level NTuple data fields.
std::unordered_map< DescriptorId_t, RColumnDescriptor > fColumnDescriptors
std::string fName
The ntuple name needs to be unique in a given storage location (file)
std::uint32_t SerializeFooter(void *buffer) const
Serializes cluster meta data. Returns the number of bytes and fills buffer if it is not nullptr.
std::string fAuthor
The origin of the data.
std::unordered_map< DescriptorId_t, RFieldDescriptor > fFieldDescriptors
static constexpr std::uint16_t kFrameVersionMin
RNTupleVersion fVersion
The version evolves with the ntuple summary meta-data.
bool operator==(const RNTupleDescriptor &other) const
std::string GetQualifiedFieldName(DescriptorId_t fieldId) const
Walks up the parents of the field ID and returns a field name of the form a.b.c.d In case of invalid ...
DescriptorId_t FindFieldId(std::string_view fieldName, DescriptorId_t parentId) const
const RFieldDescriptor & GetFieldDescriptor(DescriptorId_t fieldId) const
std::string fCustodian
The current responsible for storing the data.
DescriptorId_t FindColumnId(DescriptorId_t fieldId, std::uint32_t columnIndex) const
NTupleSize_t GetNElements(DescriptorId_t columnId) const
static void LocateMetadata(const void *postscript, std::uint32_t &szHeader, std::uint32_t &szFooter)
Given kNBytesPostscript bytes, extract the header and footer lengths in bytes.
std::string fDescription
Free text from the user.
static constexpr std::uint16_t kFrameVersionCurrent
In order to handle changes to the serialization routine in future ntuple versions.
RNTupleUuid fOwnUuid
Every NTuple gets a unique identifier.
std::chrono::system_clock::time_point fTimeStampData
The time stamp of the ntuple data (immutable)
DescriptorId_t FindClusterId(DescriptorId_t columnId, NTupleSize_t index) const
For forward and backward compatibility, attach version information to the consitituents of the file f...
std::uint32_t GetVersionUse() const
std::uint32_t GetVersionMin() const
The class is used as a return type for operations that can fail; wraps a value of type T or an RError...
Definition RError.hxx:195
struct void * fTypeName
Definition cppyy.h:9
Double_t x[n]
Definition legend1.C:17
Machine-independent serialization functions for fundamental types.
std::uint32_t DeserializeInt64(const void *buffer, std::int64_t *val)
std::uint32_t DeserializeUInt16(const void *buffer, std::uint16_t *val)
std::uint32_t SerializeInt64(std::int64_t val, void *buffer)
std::uint32_t SerializeUInt32(std::uint32_t val, void *buffer)
std::uint32_t SerializeInt32(std::int32_t val, void *buffer)
std::uint32_t SerializeUInt16(std::uint16_t val, void *buffer)
std::uint32_t DeserializeUInt64(const void *buffer, std::uint64_t *val)
std::uint32_t SerializeUInt64(std::uint64_t val, void *buffer)
std::uint32_t DeserializeString(const void *buffer, std::string *val)
std::uint32_t DeserializeInt32(const void *buffer, std::int32_t *val)
std::uint32_t DeserializeUInt32(const void *buffer, std::uint32_t *val)
std::uint32_t SerializeString(const std::string &val, void *buffer)
std::uint64_t NTupleSize_t
Integer type long enough to hold the maximum number of entries in a column.
ENTupleStructure
The fields in the ntuple model tree can carry different structural information about the type system.
std::uint64_t DescriptorId_t
Distriniguishes elements of the same type within a descriptor, e.g. different fields.
std::string RNTupleUuid
Every NTuple is identified by a UUID. TODO(jblomer): should this be a TUUID?
constexpr DescriptorId_t kInvalidDescriptorId
The window of element indexes of a particular column in a particular cluster.
std::int64_t fCompressionSettings
The usual format for ROOT compression settings (see Compression.h).
ClusterSize_t fNElements
A 32bit value for the number of column elements in the cluster.
We do not need to store the element size / uncompressed page size because we know to which column the...
RNTupleLocator fLocator
The meaning of fLocator depends on the storage backend.
ClusterSize_t fNElements
The sum of the elements of all the pages must match the corresponding fNElements field in fColumnRang...
Records the parition of data into pages for a particular column in a particular cluster.
RPageInfoExtended Find(RClusterSize::ValueType idxInCluster) const
Find the page in the RPageRange that contains the given element. The element must exist.
Wrap the 32bit integer in a struct in order to avoid template specialization clash with std::uint32_t...
Generic information about the physical location of data.
auto * l
Definition textangle.C:4