Logo ROOT  
Reference Guide
 
Loading...
Searching...
No Matches
RNTupleDescriptor.cxx
Go to the documentation of this file.
1/// \file RNTupleDescriptor.cxx
2/// \ingroup NTuple
3/// \author Jakob Blomer <jblomer@cern.ch>
4/// \author Javier Lopez-Gomez <javier.lopez.gomez@cern.ch>
5/// \date 2018-10-04
6
7/*************************************************************************
8 * Copyright (C) 1995-2019, Rene Brun and Fons Rademakers. *
9 * All rights reserved. *
10 * *
11 * For the licensing terms see $ROOTSYS/LICENSE. *
12 * For the list of contributors see $ROOTSYS/README/CREDITS. *
13 *************************************************************************/
14
15#include <ROOT/RError.hxx>
16#include <ROOT/RFieldBase.hxx>
17#include <ROOT/RNTuple.hxx>
19#include <ROOT/RNTupleModel.hxx>
20#include <ROOT/RNTupleUtil.hxx>
21#include <ROOT/RPage.hxx>
22#include <string_view>
23
24#include <RZip.h>
25#include <TError.h>
26
27#include <algorithm>
28#include <cstdint>
29#include <deque>
30#include <functional>
31#include <iostream>
32#include <set>
33#include <utility>
34
36
38{
39 return fFieldId == other.fFieldId && fFieldVersion == other.fFieldVersion && fTypeVersion == other.fTypeVersion &&
40 fFieldName == other.fFieldName && fFieldDescription == other.fFieldDescription &&
41 fTypeName == other.fTypeName && fTypeAlias == other.fTypeAlias && fNRepetitions == other.fNRepetitions &&
42 fStructure == other.fStructure && fParentId == other.fParentId &&
43 fProjectionSourceId == other.fProjectionSourceId && fLinkIds == other.fLinkIds &&
44 fLogicalColumnIds == other.fLogicalColumnIds && other.fTypeChecksum == other.fTypeChecksum;
45}
46
48{
49 RFieldDescriptor clone;
50 clone.fFieldId = fFieldId;
51 clone.fFieldVersion = fFieldVersion;
52 clone.fTypeVersion = fTypeVersion;
53 clone.fFieldName = fFieldName;
54 clone.fFieldDescription = fFieldDescription;
55 clone.fTypeName = fTypeName;
56 clone.fTypeAlias = fTypeAlias;
57 clone.fNRepetitions = fNRepetitions;
58 clone.fStructure = fStructure;
59 clone.fParentId = fParentId;
60 clone.fProjectionSourceId = fProjectionSourceId;
61 clone.fLinkIds = fLinkIds;
62 clone.fColumnCardinality = fColumnCardinality;
63 clone.fLogicalColumnIds = fLogicalColumnIds;
64 clone.fTypeChecksum = fTypeChecksum;
65 return clone;
66}
67
68std::unique_ptr<ROOT::RFieldBase>
70{
71 if (GetStructure() == ROOT::ENTupleStructure::kStreamer) {
72 auto streamerField = std::make_unique<ROOT::RStreamerField>(GetFieldName(), GetTypeName());
73 streamerField->SetOnDiskId(fFieldId);
74 return streamerField;
75 }
76
77 // The structure may be unknown if the descriptor comes from a deserialized field with an unknown structural role.
78 // For forward compatibility, we allow this case and return an InvalidField.
79 if (GetStructure() == ROOT::ENTupleStructure::kUnknown) {
80 if (options.GetReturnInvalidOnError()) {
81 auto invalidField = std::make_unique<ROOT::RInvalidField>(GetFieldName(), GetTypeName(), "",
83 invalidField->SetOnDiskId(fFieldId);
84 return invalidField;
85 } else {
86 throw RException(R__FAIL("unexpected on-disk field structure value for field \"" + GetFieldName() + "\""));
87 }
88 }
89
90 // Untyped records and collections
91 if (GetTypeName().empty()) {
92 switch (GetStructure()) {
94 std::vector<std::unique_ptr<ROOT::RFieldBase>> memberFields;
95 memberFields.reserve(fLinkIds.size());
96 for (auto id : fLinkIds) {
97 const auto &memberDesc = ntplDesc.GetFieldDescriptor(id);
98 auto field = memberDesc.CreateField(ntplDesc, options);
100 return field;
101 memberFields.emplace_back(std::move(field));
102 }
103 auto recordField = std::make_unique<ROOT::RRecordField>(GetFieldName(), std::move(memberFields));
104 recordField->SetOnDiskId(fFieldId);
105 return recordField;
106 }
108 if (fLinkIds.size() != 1) {
109 throw RException(R__FAIL("unsupported untyped collection for field \"" + GetFieldName() + "\""));
110 }
111 auto itemField = ntplDesc.GetFieldDescriptor(fLinkIds[0]).CreateField(ntplDesc, options);
113 return itemField;
114 auto collectionField = ROOT::RVectorField::CreateUntyped(GetFieldName(), std::move(itemField));
115 collectionField->SetOnDiskId(fFieldId);
116 return collectionField;
117 }
118 default: throw RException(R__FAIL("unsupported untyped field structure for field \"" + GetFieldName() + "\""));
119 }
120 }
121
122 try {
123 const auto &fieldName = GetFieldName();
124 const auto &typeName = GetTypeAlias().empty() ? GetTypeName() : GetTypeAlias();
125 // NOTE: Unwrap() here may throw an exception, hence the try block.
126 // If options.fReturnInvalidOnError is false we just rethrow it, otherwise we return an InvalidField wrapping the
127 // error.
128 auto field = ROOT::Internal::CallFieldBaseCreate(fieldName, typeName, options, &ntplDesc, fFieldId).Unwrap();
129 field->SetOnDiskId(fFieldId);
130
131 for (auto &subfield : *field) {
132 const auto subfieldId = ntplDesc.FindFieldId(subfield.GetFieldName(), subfield.GetParent()->GetOnDiskId());
133 subfield.SetOnDiskId(subfieldId);
135 auto &invalidField = static_cast<ROOT::RInvalidField &>(subfield);
136 // A subfield being invalid "infects" its entire ancestry.
137 return invalidField.Clone(fieldName);
138 }
139 }
140
141 return field;
142 } catch (const RException &ex) {
143 if (options.GetReturnInvalidOnError())
144 return std::make_unique<ROOT::RInvalidField>(GetFieldName(), GetTypeName(), ex.GetError().GetReport(),
146 else
147 throw ex;
148 }
149}
150
152{
154 return false;
155
156 // Skip untyped structs
157 if (fTypeName.empty())
158 return false;
159
160 if (fStructure == ROOT::ENTupleStructure::kRecord) {
161 if (fTypeName.compare(0, 10, "std::pair<") == 0)
162 return false;
163 if (fTypeName.compare(0, 11, "std::tuple<") == 0)
164 return false;
165 }
166
167 return true;
168}
169
170////////////////////////////////////////////////////////////////////////////////
171
173{
174 return fLogicalColumnId == other.fLogicalColumnId && fPhysicalColumnId == other.fPhysicalColumnId &&
175 fBitsOnStorage == other.fBitsOnStorage && fType == other.fType && fFieldId == other.fFieldId &&
176 fIndex == other.fIndex && fRepresentationIndex == other.fRepresentationIndex &&
177 fValueRange == other.fValueRange;
178}
179
181{
182 RColumnDescriptor clone;
183 clone.fLogicalColumnId = fLogicalColumnId;
184 clone.fPhysicalColumnId = fPhysicalColumnId;
185 clone.fBitsOnStorage = fBitsOnStorage;
186 clone.fType = fType;
187 clone.fFieldId = fFieldId;
188 clone.fIndex = fIndex;
189 clone.fFirstElementIndex = fFirstElementIndex;
190 clone.fRepresentationIndex = fRepresentationIndex;
191 clone.fValueRange = fValueRange;
192 return clone;
193}
194
195////////////////////////////////////////////////////////////////////////////////
196
199{
200 const auto N = fCumulativeNElements.size();
201 R__ASSERT(N > 0);
202 R__ASSERT(N == fPageInfos.size());
203
204 std::size_t left = 0;
205 std::size_t right = N - 1;
206 std::size_t midpoint = N;
207 while (left <= right) {
208 midpoint = (left + right) / 2;
209 if (fCumulativeNElements[midpoint] <= idxInCluster) {
210 left = midpoint + 1;
211 continue;
212 }
213
214 if ((midpoint == 0) || (fCumulativeNElements[midpoint - 1] <= idxInCluster))
215 break;
216
217 right = midpoint - 1;
218 }
220
221 auto pageInfo = fPageInfos[midpoint];
222 decltype(idxInCluster) firstInPage = (midpoint == 0) ? 0 : fCumulativeNElements[midpoint - 1];
224 R__ASSERT((firstInPage + pageInfo.GetNElements()) > idxInCluster);
226}
227
228std::size_t
231 std::size_t pageSize)
232{
233 R__ASSERT(fPhysicalColumnId == columnRange.GetPhysicalColumnId());
234 R__ASSERT(!columnRange.IsSuppressed());
235
236 const auto nElements =
237 std::accumulate(fPageInfos.begin(), fPageInfos.end(), 0U,
238 [](std::size_t n, const auto &pageInfo) { return n + pageInfo.GetNElements(); });
239 const auto nElementsRequired = static_cast<std::uint64_t>(columnRange.GetNElements());
240
242 return 0U;
243 R__ASSERT((nElementsRequired > nElements) && "invalid attempt to shrink RPageRange");
244
245 std::vector<RPageInfo> pageInfos;
246 // Synthesize new `RPageInfo`s as needed
247 const std::uint64_t nElementsPerPage = pageSize / element.GetSize();
251 pageInfo.SetNElements(std::min(nElementsPerPage, nRemainingElements));
254 locator.SetNBytesOnStorage(element.GetPackedSize(pageInfo.GetNElements()));
255 pageInfo.SetLocator(locator);
256 pageInfos.emplace_back(pageInfo);
257 nRemainingElements -= pageInfo.GetNElements();
258 }
259
260 pageInfos.insert(pageInfos.end(), std::make_move_iterator(fPageInfos.begin()),
261 std::make_move_iterator(fPageInfos.end()));
262 std::swap(fPageInfos, pageInfos);
264}
265
267{
268 return fClusterId == other.fClusterId && fFirstEntryIndex == other.fFirstEntryIndex &&
269 fNEntries == other.fNEntries && fColumnRanges == other.fColumnRanges && fPageRanges == other.fPageRanges;
270}
271
273{
274 std::uint64_t nbytes = 0;
275 for (const auto &pr : fPageRanges) {
276 for (const auto &pi : pr.second.GetPageInfos()) {
277 nbytes += pi.GetLocator().GetNBytesOnStorage();
278 }
279 }
280 return nbytes;
281}
282
284{
285 RClusterDescriptor clone;
286 clone.fClusterId = fClusterId;
287 clone.fFirstEntryIndex = fFirstEntryIndex;
288 clone.fNEntries = fNEntries;
289 clone.fColumnRanges = fColumnRanges;
290 for (const auto &d : fPageRanges)
291 clone.fPageRanges.emplace(d.first, d.second.Clone());
292 return clone;
293}
294
295////////////////////////////////////////////////////////////////////////////////
296
298{
299 return fContentId == other.fContentId && fTypeName == other.fTypeName && fTypeVersion == other.fTypeVersion;
300}
301
303{
305 clone.fContentId = fContentId;
306 clone.fTypeVersion = fTypeVersion;
307 clone.fTypeName = fTypeName;
308 clone.fContent = fContent;
309 return clone;
310}
311
312////////////////////////////////////////////////////////////////////////////////
313
315{
316 // clang-format off
317 return fName == other.fName &&
318 fDescription == other.fDescription &&
319 fNEntries == other.fNEntries &&
320 fGeneration == other.fGeneration &&
321 fFieldZeroId == other.fFieldZeroId &&
322 fFieldDescriptors == other.fFieldDescriptors &&
323 fColumnDescriptors == other.fColumnDescriptors &&
324 fClusterGroupDescriptors == other.fClusterGroupDescriptors &&
325 fClusterDescriptors == other.fClusterDescriptors;
326 // clang-format on
327}
328
330{
332 for (const auto &cd : fClusterDescriptors) {
333 if (!cd.second.ContainsColumn(physicalColumnId))
334 continue;
335 auto columnRange = cd.second.GetColumnRange(physicalColumnId);
336 result = std::max(result, columnRange.GetFirstElementIndex() + columnRange.GetNElements());
337 }
338 return result;
339}
340
343{
344 std::string leafName(fieldName);
345 auto posDot = leafName.find_last_of('.');
346 if (posDot != std::string::npos) {
347 auto parentName = leafName.substr(0, posDot);
348 leafName = leafName.substr(posDot + 1);
349 parentId = FindFieldId(parentName, parentId);
350 }
351 auto itrFieldDesc = fFieldDescriptors.find(parentId);
352 if (itrFieldDesc == fFieldDescriptors.end())
354 for (const auto linkId : itrFieldDesc->second.GetLinkIds()) {
355 if (fFieldDescriptors.at(linkId).GetFieldName() == leafName)
356 return linkId;
357 }
359}
360
362{
364 return "";
365
366 const auto &fieldDescriptor = fFieldDescriptors.at(fieldId);
367 auto prefix = GetQualifiedFieldName(fieldDescriptor.GetParentId());
368 if (prefix.empty())
369 return fieldDescriptor.GetFieldName();
370 return prefix + "." + fieldDescriptor.GetFieldName();
371}
372
374{
375 std::string typeName = fieldDesc.GetTypeName();
376
377 // ROOT v6.34, with spec versions before 1.0.0.1, did not properly renormalize the type name.
378 R__ASSERT(fVersionEpoch == 1);
379 if (fVersionMajor == 0 && fVersionMinor == 0 && fVersionPatch < 1) {
380 typeName = ROOT::Internal::GetRenormalizedTypeName(typeName);
381 }
382
383 return typeName;
384}
385
387{
388 return FindFieldId(fieldName, GetFieldZeroId());
389}
390
392 std::uint32_t columnIndex,
393 std::uint16_t representationIndex) const
394{
395 auto itr = fFieldDescriptors.find(fieldId);
396 if (itr == fFieldDescriptors.cend())
398 if (columnIndex >= itr->second.GetColumnCardinality())
400 const auto idx = representationIndex * itr->second.GetColumnCardinality() + columnIndex;
401 if (itr->second.GetLogicalColumnIds().size() <= idx)
403 return itr->second.GetLogicalColumnIds()[idx];
404}
405
407 std::uint32_t columnIndex,
408 std::uint16_t representationIndex) const
409{
410 auto logicalId = FindLogicalColumnId(fieldId, columnIndex, representationIndex);
413 return GetColumnDescriptor(logicalId).GetPhysicalId();
414}
415
418{
419 if (GetNClusterGroups() == 0)
421
422 // Binary search in the cluster group list, followed by a binary search in the clusters of that cluster group
423
424 std::size_t cgLeft = 0;
425 std::size_t cgRight = GetNClusterGroups() - 1;
426 while (cgLeft <= cgRight) {
427 const std::size_t cgMidpoint = (cgLeft + cgRight) / 2;
428 const auto &clusterIds = GetClusterGroupDescriptor(fSortedClusterGroupIds[cgMidpoint]).GetClusterIds();
429 R__ASSERT(!clusterIds.empty());
430
431 const auto &clusterDesc = GetClusterDescriptor(clusterIds.front());
432 // this may happen if the RNTuple has an empty schema
433 if (!clusterDesc.ContainsColumn(physicalColumnId))
435
436 const auto firstElementInGroup = clusterDesc.GetColumnRange(physicalColumnId).GetFirstElementIndex();
438 // Look into the lower half of cluster groups
440 cgRight = cgMidpoint - 1;
441 continue;
442 }
443
444 const auto &lastColumnRange = GetClusterDescriptor(clusterIds.back()).GetColumnRange(physicalColumnId);
445 if ((lastColumnRange.GetFirstElementIndex() + lastColumnRange.GetNElements()) <= index) {
446 // Look into the upper half of cluster groups
447 cgLeft = cgMidpoint + 1;
448 continue;
449 }
450
451 // Binary search in the current cluster group; since we already checked the element range boundaries,
452 // the element must be in that cluster group.
453 std::size_t clusterLeft = 0;
454 std::size_t clusterRight = clusterIds.size() - 1;
455 while (clusterLeft <= clusterRight) {
456 const std::size_t clusterMidpoint = (clusterLeft + clusterRight) / 2;
458 const auto &columnRange = GetClusterDescriptor(clusterId).GetColumnRange(physicalColumnId);
459
460 if (columnRange.Contains(index))
461 return clusterId;
462
463 if (columnRange.GetFirstElementIndex() > index) {
466 continue;
467 }
468
469 if (columnRange.GetFirstElementIndex() + columnRange.GetNElements() <= index) {
471 continue;
472 }
473 }
474 R__ASSERT(false);
475 }
477}
478
480{
481 if (GetNClusterGroups() == 0)
483
484 // Binary search in the cluster group list, followed by a binary search in the clusters of that cluster group
485
486 std::size_t cgLeft = 0;
487 std::size_t cgRight = GetNClusterGroups() - 1;
488 while (cgLeft <= cgRight) {
489 const std::size_t cgMidpoint = (cgLeft + cgRight) / 2;
490 const auto &cgDesc = GetClusterGroupDescriptor(fSortedClusterGroupIds[cgMidpoint]);
491
492 if (cgDesc.GetMinEntry() > entryIdx) {
494 cgRight = cgMidpoint - 1;
495 continue;
496 }
497
498 if (cgDesc.GetMinEntry() + cgDesc.GetEntrySpan() <= entryIdx) {
499 cgLeft = cgMidpoint + 1;
500 continue;
501 }
502
503 // Binary search in the current cluster group; since we already checked the element range boundaries,
504 // the element must be in that cluster group.
505 const auto &clusterIds = cgDesc.GetClusterIds();
506 R__ASSERT(!clusterIds.empty());
507 std::size_t clusterLeft = 0;
508 std::size_t clusterRight = clusterIds.size() - 1;
509 while (clusterLeft <= clusterRight) {
510 const std::size_t clusterMidpoint = (clusterLeft + clusterRight) / 2;
511 const auto &clusterDesc = GetClusterDescriptor(clusterIds[clusterMidpoint]);
512
513 if (clusterDesc.GetFirstEntryIndex() > entryIdx) {
516 continue;
517 }
518
519 if (clusterDesc.GetFirstEntryIndex() + clusterDesc.GetNEntries() <= entryIdx) {
521 continue;
522 }
523
525 }
526 R__ASSERT(false);
527 }
529}
530
532{
533 // TODO(jblomer): we may want to shortcut the common case and check if clusterId + 1 contains
534 // firstEntryInNextCluster. This shortcut would currently always trigger. We do not want, however, to depend
535 // on the linearity of the descriptor IDs, so we should only enable the shortcut if we can ensure that the
536 // binary search code path remains tested.
537 const auto &clusterDesc = GetClusterDescriptor(clusterId);
538 const auto firstEntryInNextCluster = clusterDesc.GetFirstEntryIndex() + clusterDesc.GetNEntries();
539 return FindClusterId(firstEntryInNextCluster);
540}
541
543{
544 // TODO(jblomer): we may want to shortcut the common case and check if clusterId - 1 contains
545 // firstEntryInNextCluster. This shortcut would currently always trigger. We do not want, however, to depend
546 // on the linearity of the descriptor IDs, so we should only enable the shortcut if we can ensure that the
547 // binary search code path remains tested.
548 const auto &clusterDesc = GetClusterDescriptor(clusterId);
549 if (clusterDesc.GetFirstEntryIndex() == 0)
551 return FindClusterId(clusterDesc.GetFirstEntryIndex() - 1);
552}
553
554std::vector<ROOT::DescriptorId_t>
556{
557 auto fieldZeroId = desc.GetFieldZeroId();
558
559 std::vector<ROOT::DescriptorId_t> fields;
560 for (const auto fieldId : fFieldIdsOrder) {
561 if (desc.GetFieldDescriptor(fieldId).GetParentId() == fieldZeroId)
562 fields.emplace_back(fieldId);
563 }
564 return fields;
565}
566
572
574 : fNTuple(ntuple)
575{
576 std::deque<ROOT::DescriptorId_t> fieldIdQueue{ntuple.GetFieldZeroId()};
577
578 while (!fieldIdQueue.empty()) {
579 auto currFieldId = fieldIdQueue.front();
580 fieldIdQueue.pop_front();
581
582 const auto &columns = ntuple.GetFieldDescriptor(currFieldId).GetLogicalColumnIds();
583 fColumns.insert(fColumns.end(), columns.begin(), columns.end());
584
585 for (const auto &field : ntuple.GetFieldIterable(currFieldId)) {
586 auto fieldId = field.GetId();
587 fieldIdQueue.push_back(fieldId);
588 }
589 }
590}
591
592std::vector<std::uint64_t> ROOT::RNTupleDescriptor::GetFeatureFlags() const
593{
594 std::vector<std::uint64_t> result;
595 unsigned int base = 0;
596 std::uint64_t flags = 0;
597 for (auto f : fFeatureFlags) {
598 if ((f > 0) && ((f % 64) == 0))
599 throw RException(R__FAIL("invalid feature flag: " + std::to_string(f)));
600 while (f > base + 64) {
601 result.emplace_back(flags);
602 flags = 0;
603 base += 64;
604 }
605 f -= base;
606 flags |= 1 << f;
607 }
608 result.emplace_back(flags);
609 return result;
610}
611
613 std::vector<RClusterDescriptor> &clusterDescs)
614{
616 if (iter == fClusterGroupDescriptors.end())
617 return R__FAIL("invalid attempt to add details of unknown cluster group");
618 if (iter->second.HasClusterDetails())
619 return R__FAIL("invalid attempt to re-populate cluster group details");
620 if (iter->second.GetNClusters() != clusterDescs.size())
621 return R__FAIL("mismatch of number of clusters");
622
623 std::vector<ROOT::DescriptorId_t> clusterIds;
624 for (unsigned i = 0; i < clusterDescs.size(); ++i) {
625 clusterIds.emplace_back(clusterDescs[i].GetId());
626 auto [_, success] = fClusterDescriptors.emplace(clusterIds.back(), std::move(clusterDescs[i]));
627 if (!success) {
628 return R__FAIL("invalid attempt to re-populate existing cluster");
629 }
630 }
632 return fClusterDescriptors[a].GetFirstEntryIndex() < fClusterDescriptors[b].GetFirstEntryIndex();
633 });
635 cgBuilder.AddSortedClusters(clusterIds);
636 iter->second = cgBuilder.MoveDescriptor().Unwrap();
637 return RResult<void>::Success();
638}
639
641{
643 if (iter == fClusterGroupDescriptors.end())
644 return R__FAIL("invalid attempt to drop cluster details of unknown cluster group");
645 if (!iter->second.HasClusterDetails())
646 return R__FAIL("invalid attempt to drop details of cluster group summary");
647
648 for (auto clusterId : iter->second.GetClusterIds())
650 iter->second = iter->second.CloneSummary();
651 return RResult<void>::Success();
652}
653
654std::unique_ptr<ROOT::RNTupleModel> ROOT::RNTupleDescriptor::CreateModel(const RCreateModelOptions &options) const
655{
656 // Collect all top-level fields that have invalid columns (recursively): by default if we find any we throw an
657 // exception; if we are in ForwardCompatible mode, we proceed but skip of all those top-level fields.
658 std::unordered_set<ROOT::DescriptorId_t> invalidFields;
659 for (const auto &colDesc : GetColumnIterable()) {
661 auto fieldId = colDesc.GetFieldId();
662 while (1) {
663 const auto &field = GetFieldDescriptor(fieldId);
664 if (field.GetParentId() == GetFieldZeroId())
665 break;
666 fieldId = field.GetParentId();
667 }
668 invalidFields.insert(fieldId);
669
670 // No need to look for all invalid fields if we're gonna error out anyway
671 if (!options.GetForwardCompatible())
672 break;
673 }
674 }
675
676 if (!options.GetForwardCompatible() && !invalidFields.empty())
678 "cannot create Model: descriptor contains unknown column types. Use 'SetForwardCompatible(true)' on the "
679 "RCreateModelOptions to create a partial model containing only the fields made up by known columns."));
680
681 auto fieldZero = std::make_unique<ROOT::RFieldZero>();
682 fieldZero->SetOnDiskId(GetFieldZeroId());
683 auto model = options.GetCreateBare() ? RNTupleModel::CreateBare(std::move(fieldZero))
684 : RNTupleModel::Create(std::move(fieldZero));
686 createFieldOpts.SetReturnInvalidOnError(options.GetForwardCompatible());
687 createFieldOpts.SetEmulateUnknownTypes(options.GetEmulateUnknownTypes());
688 for (const auto &topDesc : GetTopLevelFields()) {
689 if (invalidFields.count(topDesc.GetId()) > 0) {
690 // Field contains invalid columns: skip it
691 continue;
692 }
693
694 auto field = topDesc.CreateField(*this, createFieldOpts);
695
696 // If we got an InvalidField here, figure out if it's a hard error or if the field must simply be skipped.
697 // The only case where it's not a hard error is if the field has an unknown structure, as that case is
698 // covered by the ForwardCompatible flag (note that if the flag is off we would not get here
699 // in the first place, so we don't need to check for that flag again).
700 if (field->GetTraits() & ROOT::RFieldBase::kTraitInvalidField) {
701 const auto &invalid = static_cast<const RInvalidField &>(*field);
702 const auto cat = invalid.GetCategory();
704 if (mustThrow)
705 throw invalid.GetError();
706
707 // Not a hard error: skip the field and go on.
708 continue;
709 }
710
711 if (options.GetReconstructProjections() && topDesc.IsProjectedField()) {
712 model->AddProjectedField(std::move(field), [this](const std::string &targetName) -> std::string {
713 return GetQualifiedFieldName(GetFieldDescriptor(FindFieldId(targetName)).GetProjectionSourceId());
714 });
715 } else {
716 model->AddField(std::move(field));
717 }
718 }
719 model->Freeze();
720 return model;
721}
722
724{
725 RNTupleDescriptor clone;
726 clone.fName = fName;
731 // OnDiskHeaderSize, OnDiskHeaderXxHash3 not copied because they may come from a merged header + extension header
732 // and therefore not represent the actual sources's header.
733 // OnDiskFooterSize not copied because it contains information beyond the schema, for example the clustering.
734
735 for (const auto &d : fFieldDescriptors)
736 clone.fFieldDescriptors.emplace(d.first, d.second.Clone());
737 for (const auto &d : fColumnDescriptors)
738 clone.fColumnDescriptors.emplace(d.first, d.second.Clone());
739
740 for (const auto &d : fExtraTypeInfoDescriptors)
741 clone.fExtraTypeInfoDescriptors.emplace_back(d.Clone());
743 clone.fHeaderExtension = std::make_unique<RHeaderExtension>(*fHeaderExtension);
744
745 return clone;
746}
747
749{
751
756
760 clone.fNEntries = fNEntries;
761 clone.fNClusters = fNClusters;
762 clone.fGeneration = fGeneration;
763 for (const auto &d : fClusterGroupDescriptors)
764 clone.fClusterGroupDescriptors.emplace(d.first, d.second.Clone());
766 for (const auto &d : fClusterDescriptors)
767 clone.fClusterDescriptors.emplace(d.first, d.second.Clone());
768 return clone;
769}
770
771////////////////////////////////////////////////////////////////////////////////
772
774{
775 return fClusterGroupId == other.fClusterGroupId && fClusterIds == other.fClusterIds &&
776 fMinEntry == other.fMinEntry && fEntrySpan == other.fEntrySpan && fNClusters == other.fNClusters;
777}
778
780{
782 clone.fClusterGroupId = fClusterGroupId;
783 clone.fPageListLocator = fPageListLocator;
784 clone.fPageListLength = fPageListLength;
785 clone.fMinEntry = fMinEntry;
786 clone.fEntrySpan = fEntrySpan;
787 clone.fNClusters = fNClusters;
788 return clone;
789}
790
792{
793 RClusterGroupDescriptor clone = CloneSummary();
794 clone.fClusterIds = fClusterIds;
795 return clone;
796}
797
798////////////////////////////////////////////////////////////////////////////////
799
802 std::uint64_t firstElementIndex,
803 std::uint32_t compressionSettings,
805{
806 if (physicalId != pageRange.fPhysicalColumnId)
807 return R__FAIL("column ID mismatch");
808 if (fCluster.fColumnRanges.count(physicalId) > 0)
809 return R__FAIL("column ID conflict");
811 for (const auto &pi : pageRange.fPageInfos) {
812 columnRange.IncrementNElements(pi.GetNElements());
813 }
814 fCluster.fPageRanges[physicalId] = pageRange.Clone();
815 fCluster.fColumnRanges[physicalId] = columnRange;
816 return RResult<void>::Success();
817}
818
821{
822 if (fCluster.fColumnRanges.count(physicalId) > 0)
823 return R__FAIL("column ID conflict");
824
826 columnRange.SetPhysicalColumnId(physicalId);
827 columnRange.SetIsSuppressed(true);
828 fCluster.fColumnRanges[physicalId] = columnRange;
829 return RResult<void>::Success();
830}
831
834{
835 for (auto &[_, columnRange] : fCluster.fColumnRanges) {
836 if (!columnRange.IsSuppressed())
837 continue;
838 R__ASSERT(columnRange.GetFirstElementIndex() == ROOT::kInvalidNTupleIndex);
839
840 const auto &columnDesc = desc.GetColumnDescriptor(columnRange.GetPhysicalColumnId());
841 const auto &fieldDesc = desc.GetFieldDescriptor(columnDesc.GetFieldId());
842 // We expect only few columns and column representations per field, so we do a linear search
843 for (const auto otherColumnLogicalId : fieldDesc.GetLogicalColumnIds()) {
845 if (otherColumnDesc.GetRepresentationIndex() == columnDesc.GetRepresentationIndex())
846 continue;
847 if (otherColumnDesc.GetIndex() != columnDesc.GetIndex())
848 continue;
849
850 // Found corresponding column of a different column representation
851 const auto &otherColumnRange = fCluster.GetColumnRange(otherColumnDesc.GetPhysicalId());
852 if (otherColumnRange.IsSuppressed())
853 continue;
854
855 columnRange.SetFirstElementIndex(otherColumnRange.GetFirstElementIndex());
856 columnRange.SetNElements(otherColumnRange.GetNElements());
857 break;
858 }
859
860 if (columnRange.GetFirstElementIndex() == ROOT::kInvalidNTupleIndex) {
861 return R__FAIL(std::string("cannot find non-suppressed column for column ID ") +
862 std::to_string(columnRange.GetPhysicalColumnId()) +
863 ", cluster ID: " + std::to_string(fCluster.GetId()));
864 }
865 }
866 return RResult<void>::Success();
867}
868
871{
872 /// Carries out a depth-first traversal of a field subtree rooted at `rootFieldId`. For each field, `visitField` is
873 /// called passing the field ID and the number of overall repetitions, taking into account the repetitions of each
874 /// parent field in the hierarchy.
876 const auto &visitField, const auto &enterSubtree) -> void {
878 for (const auto &f : desc.GetFieldIterable(rootFieldId)) {
879 const std::uint64_t nRepetitions = std::max(f.GetNRepetitions(), std::uint64_t{1U}) * nRepetitionsAtThisLevel;
881 }
882 };
883
884 // Extended columns can only be part of the header extension
885 if (!desc.GetHeaderExtension())
886 return *this;
887
888 // Ensure that all columns in the header extension have their associated `R(Column|Page)Range`
889 // Extended columns can be attached both to fields of the regular header and to fields of the extension header
890 for (const auto &topLevelField : desc.GetTopLevelFields()) {
892 topLevelField.GetId(), std::max(topLevelField.GetNRepetitions(), std::uint64_t{1U}),
893 [&](ROOT::DescriptorId_t fieldId, std::uint64_t nRepetitions) {
894 for (const auto &c : desc.GetColumnIterable(fieldId)) {
895 const ROOT::DescriptorId_t physicalId = c.GetPhysicalId();
896 auto &columnRange = fCluster.fColumnRanges[physicalId];
897
898 // Initialize a RColumnRange for `physicalId` if it was not there. Columns that were created during model
899 // extension won't have on-disk metadata for the clusters that were already committed before the model
900 // was extended. Therefore, these need to be synthetically initialized upon reading.
901 if (columnRange.GetPhysicalColumnId() == ROOT::kInvalidDescriptorId) {
902 columnRange.SetPhysicalColumnId(physicalId);
903 columnRange.SetFirstElementIndex(0);
904 columnRange.SetNElements(0);
905 columnRange.SetIsSuppressed(c.IsSuppressedDeferredColumn());
906 }
907 // Fixup the RColumnRange and RPageRange in deferred columns. We know what the first element index and
908 // number of elements should have been if the column was not deferred; fix those and let
909 // `ExtendToFitColumnRange()` synthesize RPageInfos accordingly.
910 // Note that a deferred column (i.e, whose first element index is > 0) already met the criteria of
911 // `ROOT::RFieldBase::EntryToColumnElementIndex()`, i.e. it is a principal column reachable from the
912 // field zero excluding subfields of collection and variant fields.
913 if (c.IsDeferredColumn()) {
914 columnRange.SetFirstElementIndex(fCluster.GetFirstEntryIndex() * nRepetitions);
915 columnRange.SetNElements(fCluster.GetNEntries() * nRepetitions);
916 if (!columnRange.IsSuppressed()) {
917 auto &pageRange = fCluster.fPageRanges[physicalId];
918 pageRange.fPhysicalColumnId = physicalId;
919 const auto element = ROOT::Internal::RColumnElementBase::Generate<void>(c.GetType());
920 pageRange.ExtendToFitColumnRange(columnRange, *element, ROOT::Internal::RPage::kPageZeroSize);
921 }
922 } else if (!columnRange.IsSuppressed()) {
923 fCluster.fPageRanges[physicalId].fPhysicalColumnId = physicalId;
924 }
925 }
926 },
928 }
929 return *this;
930}
931
933{
934 if (fCluster.fClusterId == ROOT::kInvalidDescriptorId)
935 return R__FAIL("unset cluster ID");
936 if (fCluster.fNEntries == 0)
937 return R__FAIL("empty cluster");
938 for (auto &pr : fCluster.fPageRanges) {
939 if (fCluster.fColumnRanges.count(pr.first) == 0) {
940 return R__FAIL("missing column range");
941 }
942 pr.second.fCumulativeNElements.clear();
943 pr.second.fCumulativeNElements.reserve(pr.second.fPageInfos.size());
945 for (const auto &pi : pr.second.fPageInfos) {
946 sum += pi.GetNElements();
947 pr.second.fCumulativeNElements.emplace_back(sum);
948 }
949 }
951 std::swap(result, fCluster);
952 return result;
953}
954
955////////////////////////////////////////////////////////////////////////////////
956
959{
961 builder.ClusterGroupId(clusterGroupDesc.GetId())
962 .PageListLocator(clusterGroupDesc.GetPageListLocator())
963 .PageListLength(clusterGroupDesc.GetPageListLength())
964 .MinEntry(clusterGroupDesc.GetMinEntry())
965 .EntrySpan(clusterGroupDesc.GetEntrySpan())
966 .NClusters(clusterGroupDesc.GetNClusters());
967 return builder;
968}
969
971{
972 if (fClusterGroup.fClusterGroupId == ROOT::kInvalidDescriptorId)
973 return R__FAIL("unset cluster group ID");
975 std::swap(result, fClusterGroup);
976 return result;
977}
978
979////////////////////////////////////////////////////////////////////////////////
980
982{
983 if (fExtraTypeInfo.fContentId == EExtraTypeInfoIds::kInvalid)
984 throw RException(R__FAIL("invalid extra type info content id"));
986 std::swap(result, fExtraTypeInfo);
987 return result;
988}
989
990////////////////////////////////////////////////////////////////////////////////
991
993{
994 if (fDescriptor.fFieldDescriptors.count(fieldId) == 0)
995 return R__FAIL("field with id '" + std::to_string(fieldId) + "' doesn't exist");
996 return RResult<void>::Success();
997}
998
1000{
1001 if (fDescriptor.fVersionEpoch != RNTuple::kVersionEpoch) {
1002 return R__FAIL("unset or unsupported RNTuple epoch version");
1003 }
1004
1005 // Reuse field name validity check
1006 auto validName = ROOT::Internal::EnsureValidNameForRNTuple(fDescriptor.GetName(), "Field");
1007 if (!validName) {
1009 }
1010
1011 for (const auto &[fieldId, fieldDesc] : fDescriptor.fFieldDescriptors) {
1012 // parent not properly set?
1013 if (fieldId != fDescriptor.GetFieldZeroId() && fieldDesc.GetParentId() == ROOT::kInvalidDescriptorId) {
1014 return R__FAIL("field with id '" + std::to_string(fieldId) + "' has an invalid parent id");
1015 }
1016
1017 // Same number of columns in every column representation?
1018 const auto columnCardinality = fieldDesc.GetColumnCardinality();
1019 if (columnCardinality == 0)
1020 continue;
1021
1022 // In AddColumn, we already checked that all but the last representation are complete.
1023 // Check that the last column representation is complete, i.e. has all columns.
1024 const auto &logicalColumnIds = fieldDesc.GetLogicalColumnIds();
1025 const auto nColumns = logicalColumnIds.size();
1026 // If we have only a single column representation, the following condition is true by construction
1027 if ((nColumns + 1) == columnCardinality)
1028 continue;
1029
1030 const auto &lastColumn = fDescriptor.GetColumnDescriptor(logicalColumnIds.back());
1031 if (lastColumn.GetIndex() + 1 != columnCardinality)
1032 return R__FAIL("field with id '" + std::to_string(fieldId) + "' has incomplete column representations");
1033 }
1034
1035 return RResult<void>::Success();
1036}
1037
1039{
1040 EnsureValidDescriptor().ThrowOnError();
1041 fDescriptor.fSortedClusterGroupIds.reserve(fDescriptor.fClusterGroupDescriptors.size());
1042 for (const auto &[id, _] : fDescriptor.fClusterGroupDescriptors)
1043 fDescriptor.fSortedClusterGroupIds.emplace_back(id);
1044 std::sort(fDescriptor.fSortedClusterGroupIds.begin(), fDescriptor.fSortedClusterGroupIds.end(),
1046 return fDescriptor.fClusterGroupDescriptors[a].GetMinEntry() <
1047 fDescriptor.fClusterGroupDescriptors[b].GetMinEntry();
1048 });
1050 std::swap(result, fDescriptor);
1051 return result;
1052}
1053
1055 std::uint16_t versionMinor, std::uint16_t versionPatch)
1056{
1058 throw RException(R__FAIL("unsupported RNTuple epoch version: " + std::to_string(versionEpoch)));
1059 }
1060 fDescriptor.fVersionEpoch = versionEpoch;
1061 fDescriptor.fVersionMajor = versionMajor;
1062 fDescriptor.fVersionMinor = versionMinor;
1063 fDescriptor.fVersionPatch = versionPatch;
1064}
1065
1067{
1068 fDescriptor.fVersionEpoch = RNTuple::kVersionEpoch;
1069 fDescriptor.fVersionMajor = RNTuple::kVersionMajor;
1070 fDescriptor.fVersionMinor = RNTuple::kVersionMinor;
1071 fDescriptor.fVersionPatch = RNTuple::kVersionPatch;
1072}
1073
1075 const std::string_view description)
1076{
1077 fDescriptor.fName = std::string(name);
1078 fDescriptor.fDescription = std::string(description);
1079}
1080
1082{
1083 if (flag % 64 == 0)
1084 throw RException(R__FAIL("invalid feature flag: " + std::to_string(flag)));
1085 fDescriptor.fFeatureFlags.insert(flag);
1086}
1087
1089{
1090 if (fColumn.GetLogicalId() == ROOT::kInvalidDescriptorId)
1091 return R__FAIL("invalid logical column id");
1092 if (fColumn.GetPhysicalId() == ROOT::kInvalidDescriptorId)
1093 return R__FAIL("invalid physical column id");
1094 if (fColumn.GetFieldId() == ROOT::kInvalidDescriptorId)
1095 return R__FAIL("invalid field id, dangling column");
1096
1097 // NOTE: if the column type is unknown we don't want to fail, as we might be reading an RNTuple
1098 // created with a future version of ROOT. In this case we just skip the valid bit range check,
1099 // as we have no idea what the valid range is.
1100 // In general, reading the metadata of an unknown column is fine, it becomes an error only when
1101 // we try to read the actual data contained in it.
1102 if (fColumn.GetType() != ENTupleColumnType::kUnknown) {
1103 const auto [minBits, maxBits] = ROOT::Internal::RColumnElementBase::GetValidBitRange(fColumn.GetType());
1104 if (fColumn.GetBitsOnStorage() < minBits || fColumn.GetBitsOnStorage() > maxBits)
1105 return R__FAIL("invalid column bit width");
1106 }
1107
1108 return fColumn.Clone();
1109}
1110
1118
1121{
1123 fieldDesc.FieldVersion(field.GetFieldVersion())
1124 .TypeVersion(field.GetTypeVersion())
1125 .FieldName(field.GetFieldName())
1126 .FieldDescription(field.GetDescription())
1127 .TypeName(field.GetTypeName())
1128 .TypeAlias(field.GetTypeAlias())
1129 .Structure(field.GetStructure())
1130 .NRepetitions(field.GetNRepetitions());
1132 fieldDesc.TypeChecksum(field.GetTypeChecksum());
1133 return fieldDesc;
1134}
1135
1137{
1138 if (fField.GetId() == ROOT::kInvalidDescriptorId) {
1139 return R__FAIL("invalid field id");
1140 }
1141 if (fField.GetStructure() == ROOT::ENTupleStructure::kInvalid) {
1142 return R__FAIL("invalid field structure");
1143 }
1144 // FieldZero is usually named "" and would be a false positive here
1145 if (fField.GetParentId() != ROOT::kInvalidDescriptorId) {
1146 auto validName = ROOT::Internal::EnsureValidNameForRNTuple(fField.GetFieldName(), "Field");
1147 if (!validName) {
1149 }
1150 if (fField.GetFieldName().empty()) {
1151 return R__FAIL("name cannot be empty string \"\"");
1152 }
1153 }
1154 return fField.Clone();
1155}
1156
1158{
1159 fDescriptor.fFieldDescriptors.emplace(fieldDesc.GetId(), fieldDesc.Clone());
1160 if (fDescriptor.fHeaderExtension)
1161 fDescriptor.fHeaderExtension->MarkExtendedField(fieldDesc);
1162 if (fieldDesc.GetFieldName().empty() && fieldDesc.GetParentId() == ROOT::kInvalidDescriptorId) {
1163 fDescriptor.fFieldZeroId = fieldDesc.GetId();
1164 }
1165}
1166
1169{
1171 if (!(fieldExists = EnsureFieldExists(fieldId)))
1173 if (!(fieldExists = EnsureFieldExists(linkId)))
1174 return R__FAIL("child field with id '" + std::to_string(linkId) + "' doesn't exist in NTuple");
1175
1176 if (linkId == fDescriptor.GetFieldZeroId()) {
1177 return R__FAIL("cannot make FieldZero a child field");
1178 }
1179 // fail if field already has another valid parent
1180 auto parentId = fDescriptor.fFieldDescriptors.at(linkId).GetParentId();
1182 return R__FAIL("field '" + std::to_string(linkId) + "' already has a parent ('" + std::to_string(parentId) + ")");
1183 }
1184 if (fieldId == linkId) {
1185 return R__FAIL("cannot make field '" + std::to_string(fieldId) + "' a child of itself");
1186 }
1187 fDescriptor.fFieldDescriptors.at(linkId).fParentId = fieldId;
1188 fDescriptor.fFieldDescriptors.at(fieldId).fLinkIds.push_back(linkId);
1189 return RResult<void>::Success();
1190}
1191
1194{
1196 if (!(fieldExists = EnsureFieldExists(sourceId)))
1198 if (!(fieldExists = EnsureFieldExists(targetId)))
1199 return R__FAIL("projected field with id '" + std::to_string(targetId) + "' doesn't exist in NTuple");
1200
1201 if (targetId == fDescriptor.GetFieldZeroId()) {
1202 return R__FAIL("cannot make FieldZero a projected field");
1203 }
1204 if (sourceId == targetId) {
1205 return R__FAIL("cannot make field '" + std::to_string(targetId) + "' a projection of itself");
1206 }
1207 if (fDescriptor.fFieldDescriptors.at(sourceId).IsProjectedField()) {
1208 return R__FAIL("cannot make field '" + std::to_string(targetId) + "' a projection of an already projected field");
1209 }
1210 // fail if target field already has another valid projection source
1211 auto &targetDesc = fDescriptor.fFieldDescriptors.at(targetId);
1212 if (targetDesc.IsProjectedField() && targetDesc.GetProjectionSourceId() != sourceId) {
1213 return R__FAIL("field '" + std::to_string(targetId) + "' has already a projection source ('" +
1214 std::to_string(targetDesc.GetProjectionSourceId()) + ")");
1215 }
1216 fDescriptor.fFieldDescriptors.at(targetId).fProjectionSourceId = sourceId;
1217 return RResult<void>::Success();
1218}
1219
1221{
1222 const auto fieldId = columnDesc.GetFieldId();
1223 const auto columnIndex = columnDesc.GetIndex();
1224 const auto representationIndex = columnDesc.GetRepresentationIndex();
1225
1226 auto fieldExists = EnsureFieldExists(fieldId);
1227 if (!fieldExists) {
1229 }
1230 auto &fieldDesc = fDescriptor.fFieldDescriptors.find(fieldId)->second;
1231
1232 if (columnDesc.IsAliasColumn()) {
1233 if (columnDesc.GetType() != fDescriptor.GetColumnDescriptor(columnDesc.GetPhysicalId()).GetType())
1234 return R__FAIL("alias column type mismatch");
1235 }
1236 if (fDescriptor.FindLogicalColumnId(fieldId, columnIndex, representationIndex) != ROOT::kInvalidDescriptorId) {
1237 return R__FAIL("column index clash");
1238 }
1239 if (columnIndex > 0) {
1240 if (fDescriptor.FindLogicalColumnId(fieldId, columnIndex - 1, representationIndex) == ROOT::kInvalidDescriptorId)
1241 return R__FAIL("out of bounds column index");
1242 }
1243 if (representationIndex > 0) {
1244 if (fDescriptor.FindLogicalColumnId(fieldId, 0, representationIndex - 1) == ROOT::kInvalidDescriptorId) {
1245 return R__FAIL("out of bounds representation index");
1246 }
1247 if (columnIndex == 0) {
1248 assert(fieldDesc.fColumnCardinality > 0);
1249 if (fDescriptor.FindLogicalColumnId(fieldId, fieldDesc.fColumnCardinality - 1, representationIndex - 1) ==
1251 return R__FAIL("incomplete column representations");
1252 }
1253 } else {
1254 if (columnIndex >= fieldDesc.fColumnCardinality)
1255 return R__FAIL("irregular column representations");
1256 }
1257 } else {
1258 // This will set the column cardinality to the number of columns of the first representation
1259 fieldDesc.fColumnCardinality = columnIndex + 1;
1260 }
1261
1262 const auto logicalId = columnDesc.GetLogicalId();
1263 fieldDesc.fLogicalColumnIds.emplace_back(logicalId);
1264
1265 if (!columnDesc.IsAliasColumn())
1266 fDescriptor.fNPhysicalColumns++;
1267 fDescriptor.fColumnDescriptors.emplace(logicalId, std::move(columnDesc));
1268 if (fDescriptor.fHeaderExtension)
1269 fDescriptor.fHeaderExtension->MarkExtendedColumn(columnDesc);
1270
1271 return RResult<void>::Success();
1272}
1273
1275{
1276 const auto id = clusterGroup.GetId();
1277 if (fDescriptor.fClusterGroupDescriptors.count(id) > 0)
1278 return R__FAIL("cluster group id clash");
1279 fDescriptor.fNEntries = std::max(fDescriptor.fNEntries, clusterGroup.GetMinEntry() + clusterGroup.GetEntrySpan());
1280 fDescriptor.fNClusters += clusterGroup.GetNClusters();
1281 fDescriptor.fClusterGroupDescriptors.emplace(id, std::move(clusterGroup));
1282 return RResult<void>::Success();
1283}
1284
1286{
1287 fDescriptor.fName = "";
1288 fDescriptor.fDescription = "";
1289 fDescriptor.fFieldDescriptors.clear();
1290 fDescriptor.fColumnDescriptors.clear();
1291 fDescriptor.fClusterDescriptors.clear();
1292 fDescriptor.fClusterGroupDescriptors.clear();
1293 fDescriptor.fHeaderExtension.reset();
1294}
1295
1300
1302{
1303 if (!fDescriptor.fHeaderExtension)
1304 fDescriptor.fHeaderExtension = std::make_unique<RNTupleDescriptor::RHeaderExtension>();
1305}
1306
1308{
1309 if (fDescriptor.GetNLogicalColumns() == 0)
1310 return;
1311 R__ASSERT(fDescriptor.GetNPhysicalColumns() > 0);
1312
1313 for (ROOT::DescriptorId_t id = fDescriptor.GetNLogicalColumns() - 1; id >= fDescriptor.GetNPhysicalColumns(); --id) {
1314 auto c = fDescriptor.fColumnDescriptors[id].Clone();
1315 R__ASSERT(c.IsAliasColumn());
1316 R__ASSERT(id == c.GetLogicalId());
1317 fDescriptor.fColumnDescriptors.erase(id);
1318 for (auto &link : fDescriptor.fFieldDescriptors[c.fFieldId].fLogicalColumnIds) {
1319 if (link == c.fLogicalColumnId) {
1320 link += offset;
1321 break;
1322 }
1323 }
1324 c.fLogicalColumnId += offset;
1325 R__ASSERT(fDescriptor.fColumnDescriptors.count(c.fLogicalColumnId) == 0);
1326 fDescriptor.fColumnDescriptors.emplace(c.fLogicalColumnId, std::move(c));
1327 }
1328}
1329
1331{
1332 auto clusterId = clusterDesc.GetId();
1333 if (fDescriptor.fClusterDescriptors.count(clusterId) > 0)
1334 return R__FAIL("cluster id clash");
1335 fDescriptor.fClusterDescriptors.emplace(clusterId, std::move(clusterDesc));
1336 return RResult<void>::Success();
1337}
1338
1341{
1342 // Make sure we have no duplicates
1343 if (std::find(fDescriptor.fExtraTypeInfoDescriptors.begin(), fDescriptor.fExtraTypeInfoDescriptors.end(),
1344 extraTypeInfoDesc) != fDescriptor.fExtraTypeInfoDescriptors.end()) {
1345 return R__FAIL("extra type info duplicates");
1346 }
1347 fDescriptor.fExtraTypeInfoDescriptors.emplace_back(std::move(extraTypeInfoDesc));
1348 return RResult<void>::Success();
1349}
1350
1352{
1353 auto it = std::find(fDescriptor.fExtraTypeInfoDescriptors.begin(), fDescriptor.fExtraTypeInfoDescriptors.end(),
1355 if (it != fDescriptor.fExtraTypeInfoDescriptors.end())
1356 *it = std::move(extraTypeInfoDesc);
1357 else
1358 fDescriptor.fExtraTypeInfoDescriptors.emplace_back(std::move(extraTypeInfoDesc));
1359}
1360
1365
1371
1378
1381{
1382 return GetFieldIterable(GetFieldDescriptor(fieldId));
1383}
1384
1391
1393{
1394 return GetFieldIterable(GetFieldZeroId());
1395}
1396
1398 const std::function<bool(ROOT::DescriptorId_t, ROOT::DescriptorId_t)> &comparator) const
1399{
1400 return GetFieldIterable(GetFieldZeroId(), comparator);
1401}
1402
1407
1413
1419
1424
1429
#define R__FORWARD_ERROR(res)
Short-hand to return an RResult<T> in an error state (i.e. after checking)
Definition RError.hxx:303
#define R__FAIL(msg)
Short-hand to return an RResult<T> in an error state; the RError is implicitly converted into RResult...
Definition RError.hxx:299
#define d(i)
Definition RSha256.hxx:102
#define b(i)
Definition RSha256.hxx:100
#define f(i)
Definition RSha256.hxx:104
#define c(i)
Definition RSha256.hxx:101
#define a(i)
Definition RSha256.hxx:99
ROOT::Detail::TRangeCast< T, true > TRangeDynCast
TRangeDynCast is an adapter class that allows the typed iteration through a TCollection.
#define R__ASSERT(e)
Checks condition e and reports a fatal error if it's false.
Definition TError.h:125
#define N
Option_t Option_t TPoint TPoint const char GetTextMagnitude GetFillStyle GetLineColor GetLineWidth GetMarkerStyle GetTextAlign GetTextColor GetTextSize void char Point_t Rectangle_t WindowAttributes_t Float_t Float_t Float_t Int_t Int_t UInt_t UInt_t Rectangle_t Int_t Int_t Window_t TString Int_t GCValues_t GetPrimarySelectionOwner GetDisplay GetScreen GetColormap GetNativeEvent const char const char dpyName wid window const char font_name cursor keysym reg const char only_if_exist regb h Point_t winding char text const char depth char const char Int_t count const char ColorStruct_t color const char Pixmap_t Pixmap_t PictureAttributes_t attr const char char ret_data h unsigned char height h offset
Option_t Option_t TPoint TPoint const char GetTextMagnitude GetFillStyle GetLineColor GetLineWidth GetMarkerStyle GetTextAlign GetTextColor GetTextSize void char Point_t Rectangle_t WindowAttributes_t Float_t Float_t Float_t Int_t Int_t UInt_t UInt_t Rectangle_t result
Option_t Option_t TPoint TPoint const char GetTextMagnitude GetFillStyle GetLineColor GetLineWidth GetMarkerStyle GetTextAlign GetTextColor GetTextSize void char Point_t Rectangle_t WindowAttributes_t index
Option_t Option_t TPoint TPoint const char GetTextMagnitude GetFillStyle GetLineColor GetLineWidth GetMarkerStyle GetTextAlign GetTextColor GetTextSize id
char name[80]
Definition TGX11.cxx:110
void cd(Int_t id=-1)
#define _(A, B)
Definition cfortran.h:108
A helper class for piece-wise construction of an RClusterDescriptor.
RResult< void > MarkSuppressedColumnRange(ROOT::DescriptorId_t physicalId)
Books the given column ID as being suppressed in this cluster.
RResult< void > CommitColumnRange(ROOT::DescriptorId_t physicalId, std::uint64_t firstElementIndex, std::uint32_t compressionSettings, const RClusterDescriptor::RPageRange &pageRange)
RClusterDescriptorBuilder & AddExtendedColumnRanges(const RNTupleDescriptor &desc)
Add column and page ranges for columns created during late model extension missing in this cluster.
RResult< void > CommitSuppressedColumnRanges(const RNTupleDescriptor &desc)
Sets the first element index and number of elements for all the suppressed column ranges.
RResult< RClusterDescriptor > MoveDescriptor()
Move out the full cluster descriptor including page locations.
A helper class for piece-wise construction of an RClusterGroupDescriptor.
RClusterGroupDescriptorBuilder & EntrySpan(std::uint64_t entrySpan)
RClusterGroupDescriptorBuilder & PageListLocator(const RNTupleLocator &pageListLocator)
static RClusterGroupDescriptorBuilder FromSummary(const RClusterGroupDescriptor &clusterGroupDesc)
RClusterGroupDescriptorBuilder & PageListLength(std::uint64_t pageListLength)
RClusterGroupDescriptorBuilder & MinEntry(std::uint64_t minEntry)
RResult< RClusterGroupDescriptor > MoveDescriptor()
RClusterGroupDescriptorBuilder & ClusterGroupId(ROOT::DescriptorId_t clusterGroupId)
RClusterGroupDescriptorBuilder & NClusters(std::uint32_t nClusters)
RResult< RColumnDescriptor > MakeDescriptor() const
Attempt to make a column descriptor.
A column element encapsulates the translation between basic C++ types and their column representation...
static std::pair< std::uint16_t, std::uint16_t > GetValidBitRange(ROOT::ENTupleColumnType type)
Most types have a fixed on-disk bit width.
RResult< RExtraTypeInfoDescriptor > MoveDescriptor()
A helper class for piece-wise construction of an RFieldDescriptor.
RFieldDescriptorBuilder()=default
Make an empty dangling field descriptor.
RResult< RFieldDescriptor > MakeDescriptor() const
Attempt to make a field descriptor.
static RFieldDescriptorBuilder FromField(const ROOT::RFieldBase &field)
Make a new RFieldDescriptorBuilder based off a live RNTuple field.
void SetNTuple(const std::string_view name, const std::string_view description)
void SetSchemaFromExisting(const RNTupleDescriptor &descriptor)
Copies the "schema" part of descriptor into the builder's descriptor.
RResult< void > AddColumn(RColumnDescriptor &&columnDesc)
RResult< void > AddFieldProjection(ROOT::DescriptorId_t sourceId, ROOT::DescriptorId_t targetId)
void ReplaceExtraTypeInfo(RExtraTypeInfoDescriptor &&extraTypeInfoDesc)
RResult< void > AddExtraTypeInfo(RExtraTypeInfoDescriptor &&extraTypeInfoDesc)
void ShiftAliasColumns(std::uint32_t offset)
Shift column IDs of alias columns by offset
void SetVersion(std::uint16_t versionEpoch, std::uint16_t versionMajor, std::uint16_t versionMinor, std::uint16_t versionPatch)
void BeginHeaderExtension()
Mark the beginning of the header extension; any fields and columns added after a call to this functio...
RResult< void > AddCluster(RClusterDescriptor &&clusterDesc)
RResult< void > EnsureValidDescriptor() const
Checks whether invariants hold:
RResult< void > AddFieldLink(ROOT::DescriptorId_t fieldId, ROOT::DescriptorId_t linkId)
void Reset()
Clears so-far stored clusters, fields, and columns and return to a pristine RNTupleDescriptor.
void AddField(const RFieldDescriptor &fieldDesc)
RResult< void > AddClusterGroup(RClusterGroupDescriptor &&clusterGroup)
RResult< void > EnsureFieldExists(ROOT::DescriptorId_t fieldId) const
A helper class for serializing and deserialization of the RNTuple binary format.
The window of element indexes of a particular column in a particular cluster.
Records the partition of data into pages for a particular column in a particular cluster.
RPageInfoExtended Find(ROOT::NTupleSize_t idxInCluster) const
Find the page in the RPageRange that contains the given element. The element must exist.
std::size_t ExtendToFitColumnRange(const RColumnRange &columnRange, const ROOT::Internal::RColumnElementBase &element, std::size_t pageSize)
Extend this RPageRange to fit the given RColumnRange.
Metadata for RNTuple clusters.
ROOT::NTupleSize_t fFirstEntryIndex
Clusters can be swapped by adjusting the entry offsets of the cluster and all ranges.
std::unordered_map< ROOT::DescriptorId_t, RColumnRange > fColumnRanges
ROOT::DescriptorId_t fClusterId
RClusterDescriptor Clone() const
bool operator==(const RClusterDescriptor &other) const
RColumnRangeIterable GetColumnRangeIterable() const
Returns an iterator over pairs { columnId, columnRange }. The iteration order is unspecified.
std::unordered_map< ROOT::DescriptorId_t, RPageRange > fPageRanges
std::uint64_t GetNBytesOnStorage() const
Clusters are bundled in cluster groups.
RNTupleLocator fPageListLocator
The page list that corresponds to the cluster group.
RClusterGroupDescriptor Clone() const
std::vector< ROOT::DescriptorId_t > fClusterIds
The cluster IDs can be empty if the corresponding page list is not loaded.
std::uint64_t fMinEntry
The minimum first entry number of the clusters in the cluster group.
std::uint32_t fNClusters
Number of clusters is always known even if the cluster IDs are not (yet) populated.
std::uint64_t fPageListLength
Uncompressed size of the page list.
std::uint64_t fEntrySpan
Number of entries that are (partially for sharded clusters) covered by this cluster group.
bool operator==(const RClusterGroupDescriptor &other) const
RClusterGroupDescriptor CloneSummary() const
Creates a clone without the cluster IDs.
Metadata stored for every column of an RNTuple.
ROOT::DescriptorId_t fPhysicalColumnId
Usually identical to the logical column ID, except for alias columns where it references the shadowed...
bool operator==(const RColumnDescriptor &other) const
ROOT::DescriptorId_t fLogicalColumnId
The actual column identifier, which is the link to the corresponding field.
ROOT::DescriptorId_t fFieldId
Every column belongs to one and only one field.
std::int64_t fFirstElementIndex
The absolute value specifies the index for the first stored element for this column.
std::uint32_t fIndex
A field can be serialized into several columns, which are numbered from zero to $n$.
std::uint16_t fBitsOnStorage
The size in bits of elements of this column.
std::uint16_t fRepresentationIndex
A field may use multiple column representations, which are numbered from zero to $m$.
ROOT::ENTupleColumnType fType
The on-disk column type.
std::optional< RValueRange > fValueRange
Optional value range (used e.g. by quantized real fields)
RColumnDescriptor Clone() const
Get a copy of the descriptor.
Base class for all ROOT issued exceptions.
Definition RError.hxx:79
Field specific extra type information from the header / extenstion header.
bool operator==(const RExtraTypeInfoDescriptor &other) const
RExtraTypeInfoDescriptor Clone() const
EExtraTypeInfoIds fContentId
Specifies the meaning of the extra information.
std::string fTypeName
The type name the extra information refers to; empty for RNTuple-wide extra information.
std::string fContent
The content format depends on the content ID and may be binary.
std::uint32_t fTypeVersion
Type version the extra type information is bound to.
A field translates read and write calls from/to underlying columns to/from tree values.
@ kTraitInvalidField
This field is an instance of RInvalidField and can be safely static_cast to it.
@ kTraitTypeChecksum
The TClass checksum is set and valid.
Metadata stored for every field of an RNTuple.
std::unique_ptr< ROOT::RFieldBase > CreateField(const RNTupleDescriptor &ntplDesc, const ROOT::RCreateFieldOptions &options={}) const
In general, we create a field simply from the C++ type name.
std::uint32_t fFieldVersion
The version of the C++-type-to-column translation mechanics.
ROOT::DescriptorId_t fFieldId
RFieldDescriptor Clone() const
Get a copy of the descriptor.
std::uint64_t fNRepetitions
The number of elements per entry for fixed-size arrays.
std::uint32_t fColumnCardinality
The number of columns in the column representations of the field.
ROOT::DescriptorId_t fProjectionSourceId
For projected fields, the source field ID.
bool operator==(const RFieldDescriptor &other) const
std::string fFieldDescription
Free text set by the user.
ROOT::DescriptorId_t fParentId
Establishes sub field relationships, such as classes and collections.
bool IsCustomClass() const
Tells if the field describes a user-defined class rather than a fundamental type, a collection,...
std::string fTypeAlias
A typedef or using directive that resolved to the type name during field creation.
ROOT::ENTupleStructure fStructure
The structural information carried by this field in the data model tree.
std::vector< ROOT::DescriptorId_t > fLinkIds
The pointers in the other direction from parent to children.
std::string fFieldName
The leaf name, not including parent fields.
std::uint32_t fTypeVersion
The version of the C++ type itself.
std::string fTypeName
The C++ type that was used when writing the field.
std::vector< ROOT::DescriptorId_t > fLogicalColumnIds
The ordered list of columns attached to this field: first by representation index then by column inde...
std::optional< std::uint32_t > fTypeChecksum
For custom classes, we store the ROOT TClass reported checksum to facilitate the use of I/O rules tha...
Used in RFieldBase::Check() to record field creation failures.
Definition RField.hxx:72
@ kGeneric
Generic unrecoverable error.
@ kUnknownStructure
The field could not be created because its descriptor had an unknown structural role.
Used to loop over all the clusters of an RNTuple (in unspecified order)
Used to loop over all the cluster groups of an RNTuple (in unspecified order)
Used to loop over a field's associated columns.
std::vector< ROOT::DescriptorId_t > fColumns
The descriptor ids of the columns ordered by field, representation, and column index.
RColumnDescriptorIterable(const RNTupleDescriptor &ntuple, const RFieldDescriptor &fieldDesc)
Used to loop over all the extra type info record of an RNTuple (in unspecified order)
Used to loop over a field's child fields.
std::vector< ROOT::DescriptorId_t > GetTopLevelFields(const RNTupleDescriptor &desc) const
Return a vector containing the IDs of the top-level fields defined in the extension header,...
The on-storage metadata of an RNTuple.
const RColumnDescriptor & GetColumnDescriptor(ROOT::DescriptorId_t columnId) const
ROOT::DescriptorId_t FindNextClusterId(ROOT::DescriptorId_t clusterId) const
RFieldDescriptorIterable GetFieldIterable(const RFieldDescriptor &fieldDesc) const
std::set< unsigned int > fFeatureFlags
std::unordered_map< ROOT::DescriptorId_t, RClusterGroupDescriptor > fClusterGroupDescriptors
const RFieldDescriptor & GetFieldDescriptor(ROOT::DescriptorId_t fieldId) const
std::uint64_t fNPhysicalColumns
Updated by the descriptor builder when columns are added.
ROOT::DescriptorId_t fFieldZeroId
Set by the descriptor builder.
std::uint64_t fNEntries
Updated by the descriptor builder when the cluster groups are added.
RClusterGroupDescriptorIterable GetClusterGroupIterable() const
RColumnDescriptorIterable GetColumnIterable() const
bool operator==(const RNTupleDescriptor &other) const
std::uint64_t fOnDiskFooterSize
Like fOnDiskHeaderSize, contains both cluster summaries and page locations.
std::uint16_t fVersionMinor
Set by the descriptor builder when deserialized.
ROOT::DescriptorId_t FindClusterId(ROOT::NTupleSize_t entryIdx) const
std::vector< std::uint64_t > GetFeatureFlags() const
ROOT::DescriptorId_t GetFieldZeroId() const
Returns the logical parent of all top-level RNTuple data fields.
std::unique_ptr< ROOT::RNTupleModel > CreateModel(const RCreateModelOptions &options=RCreateModelOptions()) const
Re-create the C++ model from the stored metadata.
std::string GetTypeNameForComparison(const RFieldDescriptor &fieldDesc) const
Adjust the type name of the passed RFieldDescriptor for comparison with another renormalized type nam...
std::unordered_map< ROOT::DescriptorId_t, RClusterDescriptor > fClusterDescriptors
Potentially a subset of all the available clusters.
ROOT::DescriptorId_t FindPhysicalColumnId(ROOT::DescriptorId_t fieldId, std::uint32_t columnIndex, std::uint16_t representationIndex) const
RExtraTypeInfoDescriptorIterable GetExtraTypeInfoIterable() const
const RHeaderExtension * GetHeaderExtension() const
Return header extension information; if the descriptor does not have a header extension,...
std::uint64_t fNClusters
Updated by the descriptor builder when the cluster groups are added.
std::uint64_t fOnDiskHeaderXxHash3
Set by the descriptor builder when deserialized.
ROOT::DescriptorId_t FindFieldId(std::string_view fieldName, ROOT::DescriptorId_t parentId) const
std::string fName
The RNTuple name needs to be unique in a given storage location (file)
std::uint64_t fOnDiskHeaderSize
Set by the descriptor builder when deserialized.
RResult< void > DropClusterGroupDetails(ROOT::DescriptorId_t clusterGroupId)
std::uint16_t fVersionMajor
Set by the descriptor builder when deserialized.
std::vector< ROOT::DescriptorId_t > fSortedClusterGroupIds
References cluster groups sorted by entry range and thus allows for binary search.
std::unordered_map< ROOT::DescriptorId_t, RColumnDescriptor > fColumnDescriptors
ROOT::DescriptorId_t FindLogicalColumnId(ROOT::DescriptorId_t fieldId, std::uint32_t columnIndex, std::uint16_t representationIndex) const
std::unordered_map< ROOT::DescriptorId_t, RFieldDescriptor > fFieldDescriptors
ROOT::NTupleSize_t GetNElements(ROOT::DescriptorId_t physicalColumnId) const
RResult< void > AddClusterGroupDetails(ROOT::DescriptorId_t clusterGroupId, std::vector< RClusterDescriptor > &clusterDescs)
Methods to load and drop cluster group details (cluster IDs and page locations)
std::uint16_t fVersionPatch
Set by the descriptor builder when deserialized.
std::string fDescription
Free text from the user.
RFieldDescriptorIterable GetTopLevelFields() const
std::uint16_t fVersionEpoch
Set by the descriptor builder when deserialized.
std::vector< RExtraTypeInfoDescriptor > fExtraTypeInfoDescriptors
RNTupleDescriptor Clone() const
std::string GetQualifiedFieldName(ROOT::DescriptorId_t fieldId) const
Walks up the parents of the field ID and returns a field name of the form a.b.c.d In case of invalid ...
RClusterDescriptorIterable GetClusterIterable() const
RNTupleDescriptor CloneSchema() const
Creates a descriptor containing only the schema information about this RNTuple, i....
std::uint64_t fGeneration
The generation of the descriptor.
ROOT::DescriptorId_t FindPrevClusterId(ROOT::DescriptorId_t clusterId) const
std::unique_ptr< RHeaderExtension > fHeaderExtension
Generic information about the physical location of data.
static std::unique_ptr< RNTupleModel > Create()
static std::unique_ptr< RNTupleModel > CreateBare()
Creates a "bare model", i.e. an RNTupleModel with no default entry.
static constexpr std::uint16_t kVersionPatch
Definition RNTuple.hxx:79
static constexpr std::uint16_t kVersionMajor
Definition RNTuple.hxx:77
static constexpr std::uint16_t kVersionEpoch
Definition RNTuple.hxx:76
static constexpr std::uint16_t kVersionMinor
Definition RNTuple.hxx:78
const_iterator begin() const
const_iterator end() const
The class is used as a return type for operations that can fail; wraps a value of type T or an RError...
Definition RError.hxx:197
static std::unique_ptr< RVectorField > CreateUntyped(std::string_view fieldName, std::unique_ptr< RFieldBase > itemField)
struct void * fTypeName
Definition cppyy.h:9
const Int_t n
Definition legend1.C:16
Double_t ex[n]
Definition legend1.C:17
RResult< void > EnsureValidNameForRNTuple(std::string_view name, std::string_view where)
Check whether a given string is a valid name according to the RNTuple specification.
ROOT::RResult< std::unique_ptr< ROOT::RFieldBase > > CallFieldBaseCreate(const std::string &fieldName, const std::string &typeName, const ROOT::RCreateFieldOptions &options, const ROOT::RNTupleDescriptor *desc, ROOT::DescriptorId_t fieldId)
std::string GetRenormalizedTypeName(const std::string &metaNormalizedName)
Given a type name normalized by ROOT meta, renormalize it for RNTuple. E.g., insert std::prefix.
std::uint64_t DescriptorId_t
Distriniguishes elements of the same type within a descriptor, e.g. different fields.
constexpr NTupleSize_t kInvalidNTupleIndex
std::uint64_t NTupleSize_t
Integer type long enough to hold the maximum number of entries in a column.
constexpr DescriptorId_t kInvalidDescriptorId
Additional information about a page in an in-memory RPageRange.
Information about a single page in the context of a cluster's page range.
static uint64_t sum(uint64_t i)
Definition Factory.cxx:2345