Logo ROOT  
Reference Guide
 
All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Properties Friends Macros Modules Pages
Loading...
Searching...
No Matches
RNTupleDescriptor.cxx
Go to the documentation of this file.
1/// \file RNTupleDescriptor.cxx
2/// \ingroup NTuple
3/// \author Jakob Blomer <jblomer@cern.ch>
4/// \author Javier Lopez-Gomez <javier.lopez.gomez@cern.ch>
5/// \date 2018-10-04
6
7/*************************************************************************
8 * Copyright (C) 1995-2019, Rene Brun and Fons Rademakers. *
9 * All rights reserved. *
10 * *
11 * For the licensing terms see $ROOTSYS/LICENSE. *
12 * For the list of contributors see $ROOTSYS/README/CREDITS. *
13 *************************************************************************/
14
15#include <ROOT/RError.hxx>
16#include <ROOT/RFieldBase.hxx>
18#include <ROOT/RNTupleModel.hxx>
19#include <ROOT/RNTupleUtil.hxx>
20#include <ROOT/RPage.hxx>
21#include <string_view>
22
23#include <RZip.h>
24#include <TError.h>
26
27#include <algorithm>
28#include <cstdint>
29#include <deque>
30#include <functional>
31#include <iostream>
32#include <set>
33#include <utility>
34
36
38{
39 return fFieldId == other.fFieldId && fFieldVersion == other.fFieldVersion && fTypeVersion == other.fTypeVersion &&
40 fFieldName == other.fFieldName && fFieldDescription == other.fFieldDescription &&
41 fTypeName == other.fTypeName && fTypeAlias == other.fTypeAlias && fNRepetitions == other.fNRepetitions &&
42 fStructure == other.fStructure && fParentId == other.fParentId &&
43 fProjectionSourceId == other.fProjectionSourceId && fLinkIds == other.fLinkIds &&
44 fLogicalColumnIds == other.fLogicalColumnIds && other.fTypeChecksum == other.fTypeChecksum;
45}
46
48{
49 RFieldDescriptor clone;
50 clone.fFieldId = fFieldId;
51 clone.fFieldVersion = fFieldVersion;
52 clone.fTypeVersion = fTypeVersion;
53 clone.fFieldName = fFieldName;
54 clone.fFieldDescription = fFieldDescription;
55 clone.fTypeName = fTypeName;
56 clone.fTypeAlias = fTypeAlias;
57 clone.fNRepetitions = fNRepetitions;
58 clone.fStructure = fStructure;
59 clone.fParentId = fParentId;
60 clone.fProjectionSourceId = fProjectionSourceId;
61 clone.fLinkIds = fLinkIds;
62 clone.fColumnCardinality = fColumnCardinality;
63 clone.fLogicalColumnIds = fLogicalColumnIds;
64 clone.fTypeChecksum = fTypeChecksum;
65 return clone;
66}
67
68std::unique_ptr<ROOT::RFieldBase>
70{
71 if (GetStructure() == ROOT::ENTupleStructure::kStreamer) {
72 auto streamerField = std::make_unique<ROOT::RStreamerField>(GetFieldName(), GetTypeName());
73 streamerField->SetOnDiskId(fFieldId);
74 return streamerField;
75 }
76
77 // The structure may be unknown if the descriptor comes from a deserialized field with an unknown structural role.
78 // For forward compatibility, we allow this case and return an InvalidField.
79 if (GetStructure() == ROOT::ENTupleStructure::kUnknown) {
80 if (options.GetReturnInvalidOnError()) {
81 auto invalidField = std::make_unique<ROOT::RInvalidField>(GetFieldName(), GetTypeName(), "",
83 invalidField->SetOnDiskId(fFieldId);
84 return invalidField;
85 } else {
86 throw RException(R__FAIL("unexpected on-disk field structure value for field \"" + GetFieldName() + "\""));
87 }
88 }
89
90 if (GetTypeName().empty()) {
91 switch (GetStructure()) {
93 std::vector<std::unique_ptr<ROOT::RFieldBase>> memberFields;
94 memberFields.reserve(fLinkIds.size());
95 for (auto id : fLinkIds) {
96 const auto &memberDesc = ntplDesc.GetFieldDescriptor(id);
97 auto field = memberDesc.CreateField(ntplDesc, options);
99 return field;
100 memberFields.emplace_back(std::move(field));
101 }
102 auto recordField = std::make_unique<ROOT::RRecordField>(GetFieldName(), std::move(memberFields));
103 recordField->SetOnDiskId(fFieldId);
104 return recordField;
105 }
107 if (fLinkIds.size() != 1) {
108 throw RException(R__FAIL("unsupported untyped collection for field \"" + GetFieldName() + "\""));
109 }
110 auto itemField = ntplDesc.GetFieldDescriptor(fLinkIds[0]).CreateField(ntplDesc, options);
112 return itemField;
113 auto collectionField = ROOT::RVectorField::CreateUntyped(GetFieldName(), std::move(itemField));
114 collectionField->SetOnDiskId(fFieldId);
115 return collectionField;
116 }
117 default: throw RException(R__FAIL("unsupported untyped field structure for field \"" + GetFieldName() + "\""));
118 }
119 }
120
121 try {
122 const auto &fieldName = GetFieldName();
123 const auto &typeName = GetTypeAlias().empty() ? GetTypeName() : GetTypeAlias();
124 // NOTE: Unwrap() here may throw an exception, hence the try block.
125 // If options.fReturnInvalidOnError is false we just rethrow it, otherwise we return an InvalidField wrapping the
126 // error.
127 auto field = ROOT::Internal::CallFieldBaseCreate(fieldName, typeName, options, &ntplDesc, fFieldId).Unwrap();
128 field->SetOnDiskId(fFieldId);
129
130 for (auto &subfield : *field) {
131 const auto subfieldId = ntplDesc.FindFieldId(subfield.GetFieldName(), subfield.GetParent()->GetOnDiskId());
132 subfield.SetOnDiskId(subfieldId);
134 auto &invalidField = static_cast<ROOT::RInvalidField &>(subfield);
135 // A subfield being invalid "infects" its entire ancestry.
136 return invalidField.Clone(fieldName);
137 }
138 }
139
140 return field;
141 } catch (RException &ex) {
142 if (options.GetReturnInvalidOnError())
143 return std::make_unique<ROOT::RInvalidField>(GetFieldName(), GetTypeName(), ex.GetError().GetReport(),
145 else
146 throw ex;
147 }
148}
149
151{
153 return false;
154
155 // Skip untyped structs
156 if (fTypeName.empty())
157 return false;
158
159 if (fStructure == ROOT::ENTupleStructure::kRecord) {
160 if (fTypeName.compare(0, 10, "std::pair<") == 0)
161 return false;
162 if (fTypeName.compare(0, 11, "std::tuple<") == 0)
163 return false;
164 }
165
166 return true;
167}
168
169////////////////////////////////////////////////////////////////////////////////
170
172{
173 return fLogicalColumnId == other.fLogicalColumnId && fPhysicalColumnId == other.fPhysicalColumnId &&
174 fBitsOnStorage == other.fBitsOnStorage && fType == other.fType && fFieldId == other.fFieldId &&
175 fIndex == other.fIndex && fRepresentationIndex == other.fRepresentationIndex &&
176 fValueRange == other.fValueRange;
177}
178
180{
181 RColumnDescriptor clone;
182 clone.fLogicalColumnId = fLogicalColumnId;
183 clone.fPhysicalColumnId = fPhysicalColumnId;
184 clone.fBitsOnStorage = fBitsOnStorage;
185 clone.fType = fType;
186 clone.fFieldId = fFieldId;
187 clone.fIndex = fIndex;
188 clone.fFirstElementIndex = fFirstElementIndex;
189 clone.fRepresentationIndex = fRepresentationIndex;
190 clone.fValueRange = fValueRange;
191 return clone;
192}
193
194////////////////////////////////////////////////////////////////////////////////
195
198{
199 const auto N = fCumulativeNElements.size();
200 R__ASSERT(N > 0);
201 R__ASSERT(N == fPageInfos.size());
202
203 std::size_t left = 0;
204 std::size_t right = N - 1;
205 std::size_t midpoint = N;
206 while (left <= right) {
207 midpoint = (left + right) / 2;
208 if (fCumulativeNElements[midpoint] <= idxInCluster) {
209 left = midpoint + 1;
210 continue;
211 }
212
213 if ((midpoint == 0) || (fCumulativeNElements[midpoint - 1] <= idxInCluster))
214 break;
215
216 right = midpoint - 1;
217 }
219
220 auto pageInfo = fPageInfos[midpoint];
221 decltype(idxInCluster) firstInPage = (midpoint == 0) ? 0 : fCumulativeNElements[midpoint - 1];
223 R__ASSERT((firstInPage + pageInfo.GetNElements()) > idxInCluster);
225}
226
227std::size_t
230 std::size_t pageSize)
231{
232 R__ASSERT(fPhysicalColumnId == columnRange.GetPhysicalColumnId());
233 R__ASSERT(!columnRange.IsSuppressed());
234
235 const auto nElements =
236 std::accumulate(fPageInfos.begin(), fPageInfos.end(), 0U,
237 [](std::size_t n, const auto &pageInfo) { return n + pageInfo.GetNElements(); });
238 const auto nElementsRequired = static_cast<std::uint64_t>(columnRange.GetNElements());
239
241 return 0U;
242 R__ASSERT((nElementsRequired > nElements) && "invalid attempt to shrink RPageRange");
243
244 std::vector<RPageInfo> pageInfos;
245 // Synthesize new `RPageInfo`s as needed
246 const std::uint64_t nElementsPerPage = pageSize / element.GetSize();
250 pageInfo.SetNElements(std::min(nElementsPerPage, nRemainingElements));
253 locator.SetNBytesOnStorage(element.GetPackedSize(pageInfo.GetNElements()));
254 pageInfo.SetLocator(locator);
255 pageInfos.emplace_back(pageInfo);
256 nRemainingElements -= pageInfo.GetNElements();
257 }
258
259 pageInfos.insert(pageInfos.end(), std::make_move_iterator(fPageInfos.begin()),
260 std::make_move_iterator(fPageInfos.end()));
261 std::swap(fPageInfos, pageInfos);
263}
264
266{
267 return fClusterId == other.fClusterId && fFirstEntryIndex == other.fFirstEntryIndex &&
268 fNEntries == other.fNEntries && fColumnRanges == other.fColumnRanges && fPageRanges == other.fPageRanges;
269}
270
272{
273 std::uint64_t nbytes = 0;
274 for (const auto &pr : fPageRanges) {
275 for (const auto &pi : pr.second.GetPageInfos()) {
276 nbytes += pi.GetLocator().GetNBytesOnStorage();
277 }
278 }
279 return nbytes;
280}
281
283{
284 RClusterDescriptor clone;
285 clone.fClusterId = fClusterId;
286 clone.fFirstEntryIndex = fFirstEntryIndex;
287 clone.fNEntries = fNEntries;
288 clone.fColumnRanges = fColumnRanges;
289 for (const auto &d : fPageRanges)
290 clone.fPageRanges.emplace(d.first, d.second.Clone());
291 return clone;
292}
293
294////////////////////////////////////////////////////////////////////////////////
295
297{
298 return fContentId == other.fContentId && fTypeName == other.fTypeName && fTypeVersion == other.fTypeVersion;
299}
300
302{
304 clone.fContentId = fContentId;
305 clone.fTypeVersion = fTypeVersion;
306 clone.fTypeName = fTypeName;
307 clone.fContent = fContent;
308 return clone;
309}
310
311////////////////////////////////////////////////////////////////////////////////
312
314{
315 // clang-format off
316 return fName == other.fName &&
317 fDescription == other.fDescription &&
318 fNEntries == other.fNEntries &&
319 fGeneration == other.fGeneration &&
320 fFieldZeroId == other.fFieldZeroId &&
321 fFieldDescriptors == other.fFieldDescriptors &&
322 fColumnDescriptors == other.fColumnDescriptors &&
323 fClusterGroupDescriptors == other.fClusterGroupDescriptors &&
324 fClusterDescriptors == other.fClusterDescriptors;
325 // clang-format on
326}
327
329{
331 for (const auto &cd : fClusterDescriptors) {
332 if (!cd.second.ContainsColumn(physicalColumnId))
333 continue;
334 auto columnRange = cd.second.GetColumnRange(physicalColumnId);
335 result = std::max(result, columnRange.GetFirstElementIndex() + columnRange.GetNElements());
336 }
337 return result;
338}
339
342{
343 std::string leafName(fieldName);
344 auto posDot = leafName.find_last_of('.');
345 if (posDot != std::string::npos) {
346 auto parentName = leafName.substr(0, posDot);
347 leafName = leafName.substr(posDot + 1);
348 parentId = FindFieldId(parentName, parentId);
349 }
350 auto itrFieldDesc = fFieldDescriptors.find(parentId);
351 if (itrFieldDesc == fFieldDescriptors.end())
353 for (const auto linkId : itrFieldDesc->second.GetLinkIds()) {
354 if (fFieldDescriptors.at(linkId).GetFieldName() == leafName)
355 return linkId;
356 }
358}
359
361{
363 return "";
364
365 const auto &fieldDescriptor = fFieldDescriptors.at(fieldId);
366 auto prefix = GetQualifiedFieldName(fieldDescriptor.GetParentId());
367 if (prefix.empty())
368 return fieldDescriptor.GetFieldName();
369 return prefix + "." + fieldDescriptor.GetFieldName();
370}
371
373{
374 return FindFieldId(fieldName, GetFieldZeroId());
375}
376
378 std::uint32_t columnIndex,
379 std::uint16_t representationIndex) const
380{
381 auto itr = fFieldDescriptors.find(fieldId);
382 if (itr == fFieldDescriptors.cend())
384 if (columnIndex >= itr->second.GetColumnCardinality())
386 const auto idx = representationIndex * itr->second.GetColumnCardinality() + columnIndex;
387 if (itr->second.GetLogicalColumnIds().size() <= idx)
389 return itr->second.GetLogicalColumnIds()[idx];
390}
391
393 std::uint32_t columnIndex,
394 std::uint16_t representationIndex) const
395{
396 auto logicalId = FindLogicalColumnId(fieldId, columnIndex, representationIndex);
399 return GetColumnDescriptor(logicalId).GetPhysicalId();
400}
401
404{
405 if (GetNClusterGroups() == 0)
407
408 // Binary search in the cluster group list, followed by a binary search in the clusters of that cluster group
409
410 std::size_t cgLeft = 0;
411 std::size_t cgRight = GetNClusterGroups() - 1;
412 while (cgLeft <= cgRight) {
413 const std::size_t cgMidpoint = (cgLeft + cgRight) / 2;
414 const auto &clusterIds = GetClusterGroupDescriptor(fSortedClusterGroupIds[cgMidpoint]).GetClusterIds();
415 R__ASSERT(!clusterIds.empty());
416
417 const auto &clusterDesc = GetClusterDescriptor(clusterIds.front());
418 // this may happen if the RNTuple has an empty schema
419 if (!clusterDesc.ContainsColumn(physicalColumnId))
421
422 const auto firstElementInGroup = clusterDesc.GetColumnRange(physicalColumnId).GetFirstElementIndex();
424 // Look into the lower half of cluster groups
426 cgRight = cgMidpoint - 1;
427 continue;
428 }
429
430 const auto &lastColumnRange = GetClusterDescriptor(clusterIds.back()).GetColumnRange(physicalColumnId);
431 if ((lastColumnRange.GetFirstElementIndex() + lastColumnRange.GetNElements()) <= index) {
432 // Look into the upper half of cluster groups
433 cgLeft = cgMidpoint + 1;
434 continue;
435 }
436
437 // Binary search in the current cluster group; since we already checked the element range boundaries,
438 // the element must be in that cluster group.
439 std::size_t clusterLeft = 0;
440 std::size_t clusterRight = clusterIds.size() - 1;
441 while (clusterLeft <= clusterRight) {
442 const std::size_t clusterMidpoint = (clusterLeft + clusterRight) / 2;
444 const auto &columnRange = GetClusterDescriptor(clusterId).GetColumnRange(physicalColumnId);
445
446 if (columnRange.Contains(index))
447 return clusterId;
448
449 if (columnRange.GetFirstElementIndex() > index) {
452 continue;
453 }
454
455 if (columnRange.GetFirstElementIndex() + columnRange.GetNElements() <= index) {
457 continue;
458 }
459 }
460 R__ASSERT(false);
461 }
463}
464
466{
467 if (GetNClusterGroups() == 0)
469
470 // Binary search in the cluster group list, followed by a binary search in the clusters of that cluster group
471
472 std::size_t cgLeft = 0;
473 std::size_t cgRight = GetNClusterGroups() - 1;
474 while (cgLeft <= cgRight) {
475 const std::size_t cgMidpoint = (cgLeft + cgRight) / 2;
476 const auto &cgDesc = GetClusterGroupDescriptor(fSortedClusterGroupIds[cgMidpoint]);
477
478 if (cgDesc.GetMinEntry() > entryIdx) {
480 cgRight = cgMidpoint - 1;
481 continue;
482 }
483
484 if (cgDesc.GetMinEntry() + cgDesc.GetEntrySpan() <= entryIdx) {
485 cgLeft = cgMidpoint + 1;
486 continue;
487 }
488
489 // Binary search in the current cluster group; since we already checked the element range boundaries,
490 // the element must be in that cluster group.
491 const auto &clusterIds = cgDesc.GetClusterIds();
492 R__ASSERT(!clusterIds.empty());
493 std::size_t clusterLeft = 0;
494 std::size_t clusterRight = clusterIds.size() - 1;
495 while (clusterLeft <= clusterRight) {
496 const std::size_t clusterMidpoint = (clusterLeft + clusterRight) / 2;
497 const auto &clusterDesc = GetClusterDescriptor(clusterIds[clusterMidpoint]);
498
499 if (clusterDesc.GetFirstEntryIndex() > entryIdx) {
502 continue;
503 }
504
505 if (clusterDesc.GetFirstEntryIndex() + clusterDesc.GetNEntries() <= entryIdx) {
507 continue;
508 }
509
511 }
512 R__ASSERT(false);
513 }
515}
516
518{
519 // TODO(jblomer): we may want to shortcut the common case and check if clusterId + 1 contains
520 // firstEntryInNextCluster. This shortcut would currently always trigger. We do not want, however, to depend
521 // on the linearity of the descriptor IDs, so we should only enable the shortcut if we can ensure that the
522 // binary search code path remains tested.
523 const auto &clusterDesc = GetClusterDescriptor(clusterId);
524 const auto firstEntryInNextCluster = clusterDesc.GetFirstEntryIndex() + clusterDesc.GetNEntries();
525 return FindClusterId(firstEntryInNextCluster);
526}
527
529{
530 // TODO(jblomer): we may want to shortcut the common case and check if clusterId - 1 contains
531 // firstEntryInNextCluster. This shortcut would currently always trigger. We do not want, however, to depend
532 // on the linearity of the descriptor IDs, so we should only enable the shortcut if we can ensure that the
533 // binary search code path remains tested.
534 const auto &clusterDesc = GetClusterDescriptor(clusterId);
535 if (clusterDesc.GetFirstEntryIndex() == 0)
537 return FindClusterId(clusterDesc.GetFirstEntryIndex() - 1);
538}
539
540std::vector<ROOT::DescriptorId_t>
542{
543 auto fieldZeroId = desc.GetFieldZeroId();
544
545 std::vector<ROOT::DescriptorId_t> fields;
546 for (const auto fieldId : fFieldIdsOrder) {
547 if (desc.GetFieldDescriptor(fieldId).GetParentId() == fieldZeroId)
548 fields.emplace_back(fieldId);
549 }
550 return fields;
551}
552
558
560 : fNTuple(ntuple)
561{
562 std::deque<ROOT::DescriptorId_t> fieldIdQueue{ntuple.GetFieldZeroId()};
563
564 while (!fieldIdQueue.empty()) {
565 auto currFieldId = fieldIdQueue.front();
566 fieldIdQueue.pop_front();
567
568 const auto &columns = ntuple.GetFieldDescriptor(currFieldId).GetLogicalColumnIds();
569 fColumns.insert(fColumns.end(), columns.begin(), columns.end());
570
571 for (const auto &field : ntuple.GetFieldIterable(currFieldId)) {
572 auto fieldId = field.GetId();
573 fieldIdQueue.push_back(fieldId);
574 }
575 }
576}
577
578std::vector<std::uint64_t> ROOT::RNTupleDescriptor::GetFeatureFlags() const
579{
580 std::vector<std::uint64_t> result;
581 unsigned int base = 0;
582 std::uint64_t flags = 0;
583 for (auto f : fFeatureFlags) {
584 if ((f > 0) && ((f % 64) == 0))
585 throw RException(R__FAIL("invalid feature flag: " + std::to_string(f)));
586 while (f > base + 64) {
587 result.emplace_back(flags);
588 flags = 0;
589 base += 64;
590 }
591 f -= base;
592 flags |= 1 << f;
593 }
594 result.emplace_back(flags);
595 return result;
596}
597
599 std::vector<RClusterDescriptor> &clusterDescs)
600{
602 if (iter == fClusterGroupDescriptors.end())
603 return R__FAIL("invalid attempt to add details of unknown cluster group");
604 if (iter->second.HasClusterDetails())
605 return R__FAIL("invalid attempt to re-populate cluster group details");
606 if (iter->second.GetNClusters() != clusterDescs.size())
607 return R__FAIL("mismatch of number of clusters");
608
609 std::vector<ROOT::DescriptorId_t> clusterIds;
610 for (unsigned i = 0; i < clusterDescs.size(); ++i) {
611 clusterIds.emplace_back(clusterDescs[i].GetId());
612 auto [_, success] = fClusterDescriptors.emplace(clusterIds.back(), std::move(clusterDescs[i]));
613 if (!success) {
614 return R__FAIL("invalid attempt to re-populate existing cluster");
615 }
616 }
618 return fClusterDescriptors[a].GetFirstEntryIndex() < fClusterDescriptors[b].GetFirstEntryIndex();
619 });
621 cgBuilder.AddSortedClusters(clusterIds);
622 iter->second = cgBuilder.MoveDescriptor().Unwrap();
623 return RResult<void>::Success();
624}
625
627{
629 if (iter == fClusterGroupDescriptors.end())
630 return R__FAIL("invalid attempt to drop cluster details of unknown cluster group");
631 if (!iter->second.HasClusterDetails())
632 return R__FAIL("invalid attempt to drop details of cluster group summary");
633
634 for (auto clusterId : iter->second.GetClusterIds())
636 iter->second = iter->second.CloneSummary();
637 return RResult<void>::Success();
638}
639
640std::unique_ptr<ROOT::RNTupleModel> ROOT::RNTupleDescriptor::CreateModel(const RCreateModelOptions &options) const
641{
642 auto fieldZero = std::make_unique<ROOT::RFieldZero>();
643 fieldZero->SetOnDiskId(GetFieldZeroId());
644 auto model = options.GetCreateBare() ? RNTupleModel::CreateBare(std::move(fieldZero))
645 : RNTupleModel::Create(std::move(fieldZero));
647 createFieldOpts.SetReturnInvalidOnError(options.GetForwardCompatible());
648 createFieldOpts.SetEmulateUnknownTypes(options.GetEmulateUnknownTypes());
649 for (const auto &topDesc : GetTopLevelFields()) {
650 auto field = topDesc.CreateField(*this, createFieldOpts);
652 continue;
653
654 if (options.GetReconstructProjections() && topDesc.IsProjectedField()) {
655 model->AddProjectedField(std::move(field), [this](const std::string &targetName) -> std::string {
656 return GetQualifiedFieldName(GetFieldDescriptor(FindFieldId(targetName)).GetProjectionSourceId());
657 });
658 } else {
659 model->AddField(std::move(field));
660 }
661 }
662 model->Freeze();
663 return model;
664}
665
667{
668 RNTupleDescriptor clone;
669 clone.fName = fName;
674 // OnDiskHeaderSize, OnDiskHeaderXxHash3 not copied because they may come from a merged header + extension header
675 // and therefore not represent the actual sources's header.
676 // OnDiskFooterSize not copied because it contains information beyond the schema, for example the clustering.
677
678 for (const auto &d : fFieldDescriptors)
679 clone.fFieldDescriptors.emplace(d.first, d.second.Clone());
680 for (const auto &d : fColumnDescriptors)
681 clone.fColumnDescriptors.emplace(d.first, d.second.Clone());
682
683 for (const auto &d : fExtraTypeInfoDescriptors)
684 clone.fExtraTypeInfoDescriptors.emplace_back(d.Clone());
686 clone.fHeaderExtension = std::make_unique<RHeaderExtension>(*fHeaderExtension);
687
688 return clone;
689}
690
692{
694
698 clone.fNEntries = fNEntries;
699 clone.fNClusters = fNClusters;
700 clone.fGeneration = fGeneration;
701 for (const auto &d : fClusterGroupDescriptors)
702 clone.fClusterGroupDescriptors.emplace(d.first, d.second.Clone());
704 for (const auto &d : fClusterDescriptors)
705 clone.fClusterDescriptors.emplace(d.first, d.second.Clone());
706 return clone;
707}
708
709////////////////////////////////////////////////////////////////////////////////
710
712{
713 return fClusterGroupId == other.fClusterGroupId && fClusterIds == other.fClusterIds &&
714 fMinEntry == other.fMinEntry && fEntrySpan == other.fEntrySpan && fNClusters == other.fNClusters;
715}
716
718{
720 clone.fClusterGroupId = fClusterGroupId;
721 clone.fPageListLocator = fPageListLocator;
722 clone.fPageListLength = fPageListLength;
723 clone.fMinEntry = fMinEntry;
724 clone.fEntrySpan = fEntrySpan;
725 clone.fNClusters = fNClusters;
726 return clone;
727}
728
730{
731 RClusterGroupDescriptor clone = CloneSummary();
732 clone.fClusterIds = fClusterIds;
733 return clone;
734}
735
736////////////////////////////////////////////////////////////////////////////////
737
740 std::uint64_t firstElementIndex,
741 std::uint32_t compressionSettings,
743{
744 if (physicalId != pageRange.fPhysicalColumnId)
745 return R__FAIL("column ID mismatch");
746 if (fCluster.fColumnRanges.count(physicalId) > 0)
747 return R__FAIL("column ID conflict");
749 for (const auto &pi : pageRange.fPageInfos) {
750 columnRange.IncrementNElements(pi.GetNElements());
751 }
752 fCluster.fPageRanges[physicalId] = pageRange.Clone();
753 fCluster.fColumnRanges[physicalId] = columnRange;
754 return RResult<void>::Success();
755}
756
759{
760 if (fCluster.fColumnRanges.count(physicalId) > 0)
761 return R__FAIL("column ID conflict");
762
764 columnRange.SetPhysicalColumnId(physicalId);
765 columnRange.SetIsSuppressed(true);
766 fCluster.fColumnRanges[physicalId] = columnRange;
767 return RResult<void>::Success();
768}
769
772{
773 for (auto &[_, columnRange] : fCluster.fColumnRanges) {
774 if (!columnRange.IsSuppressed())
775 continue;
776 R__ASSERT(columnRange.GetFirstElementIndex() == ROOT::kInvalidNTupleIndex);
777
778 const auto &columnDesc = desc.GetColumnDescriptor(columnRange.GetPhysicalColumnId());
779 const auto &fieldDesc = desc.GetFieldDescriptor(columnDesc.GetFieldId());
780 // We expect only few columns and column representations per field, so we do a linear search
781 for (const auto otherColumnLogicalId : fieldDesc.GetLogicalColumnIds()) {
783 if (otherColumnDesc.GetRepresentationIndex() == columnDesc.GetRepresentationIndex())
784 continue;
785 if (otherColumnDesc.GetIndex() != columnDesc.GetIndex())
786 continue;
787
788 // Found corresponding column of a different column representation
789 const auto &otherColumnRange = fCluster.GetColumnRange(otherColumnDesc.GetPhysicalId());
790 if (otherColumnRange.IsSuppressed())
791 continue;
792
793 columnRange.SetFirstElementIndex(otherColumnRange.GetFirstElementIndex());
794 columnRange.SetNElements(otherColumnRange.GetNElements());
795 break;
796 }
797
798 if (columnRange.GetFirstElementIndex() == ROOT::kInvalidNTupleIndex) {
799 return R__FAIL(std::string("cannot find non-suppressed column for column ID ") +
800 std::to_string(columnRange.GetPhysicalColumnId()) +
801 ", cluster ID: " + std::to_string(fCluster.GetId()));
802 }
803 }
804 return RResult<void>::Success();
805}
806
809{
810 /// Carries out a depth-first traversal of a field subtree rooted at `rootFieldId`. For each field, `visitField` is
811 /// called passing the field ID and the number of overall repetitions, taking into account the repetitions of each
812 /// parent field in the hierarchy.
814 const auto &visitField, const auto &enterSubtree) -> void {
816 for (const auto &f : desc.GetFieldIterable(rootFieldId)) {
817 const std::uint64_t nRepetitions = std::max(f.GetNRepetitions(), std::uint64_t{1U}) * nRepetitionsAtThisLevel;
819 }
820 };
821
822 // Extended columns can only be part of the header extension
823 if (!desc.GetHeaderExtension())
824 return *this;
825
826 // Ensure that all columns in the header extension have their associated `R(Column|Page)Range`
827 // Extended columns can be attached both to fields of the regular header and to fields of the extension header
828 for (const auto &topLevelField : desc.GetTopLevelFields()) {
830 topLevelField.GetId(), std::max(topLevelField.GetNRepetitions(), std::uint64_t{1U}),
831 [&](ROOT::DescriptorId_t fieldId, std::uint64_t nRepetitions) {
832 for (const auto &c : desc.GetColumnIterable(fieldId)) {
833 const ROOT::DescriptorId_t physicalId = c.GetPhysicalId();
834 auto &columnRange = fCluster.fColumnRanges[physicalId];
835
836 // Initialize a RColumnRange for `physicalId` if it was not there. Columns that were created during model
837 // extension won't have on-disk metadata for the clusters that were already committed before the model
838 // was extended. Therefore, these need to be synthetically initialized upon reading.
839 if (columnRange.GetPhysicalColumnId() == ROOT::kInvalidDescriptorId) {
840 columnRange.SetPhysicalColumnId(physicalId);
841 columnRange.SetFirstElementIndex(0);
842 columnRange.SetNElements(0);
843 columnRange.SetIsSuppressed(c.IsSuppressedDeferredColumn());
844 }
845 // Fixup the RColumnRange and RPageRange in deferred columns. We know what the first element index and
846 // number of elements should have been if the column was not deferred; fix those and let
847 // `ExtendToFitColumnRange()` synthesize RPageInfos accordingly.
848 // Note that a deferred column (i.e, whose first element index is > 0) already met the criteria of
849 // `ROOT::RFieldBase::EntryToColumnElementIndex()`, i.e. it is a principal column reachable from the
850 // field zero excluding subfields of collection and variant fields.
851 if (c.IsDeferredColumn()) {
852 columnRange.SetFirstElementIndex(fCluster.GetFirstEntryIndex() * nRepetitions);
853 columnRange.SetNElements(fCluster.GetNEntries() * nRepetitions);
854 if (!columnRange.IsSuppressed()) {
855 auto &pageRange = fCluster.fPageRanges[physicalId];
856 pageRange.fPhysicalColumnId = physicalId;
857 const auto element = ROOT::Internal::RColumnElementBase::Generate<void>(c.GetType());
858 pageRange.ExtendToFitColumnRange(columnRange, *element, ROOT::Internal::RPage::kPageZeroSize);
859 }
860 } else if (!columnRange.IsSuppressed()) {
861 fCluster.fPageRanges[physicalId].fPhysicalColumnId = physicalId;
862 }
863 }
864 },
866 }
867 return *this;
868}
869
871{
872 if (fCluster.fClusterId == ROOT::kInvalidDescriptorId)
873 return R__FAIL("unset cluster ID");
874 if (fCluster.fNEntries == 0)
875 return R__FAIL("empty cluster");
876 for (auto &pr : fCluster.fPageRanges) {
877 if (fCluster.fColumnRanges.count(pr.first) == 0) {
878 return R__FAIL("missing column range");
879 }
880 pr.second.fCumulativeNElements.clear();
881 pr.second.fCumulativeNElements.reserve(pr.second.fPageInfos.size());
883 for (const auto &pi : pr.second.fPageInfos) {
884 sum += pi.GetNElements();
885 pr.second.fCumulativeNElements.emplace_back(sum);
886 }
887 }
889 std::swap(result, fCluster);
890 return result;
891}
892
893////////////////////////////////////////////////////////////////////////////////
894
897{
899 builder.ClusterGroupId(clusterGroupDesc.GetId())
900 .PageListLocator(clusterGroupDesc.GetPageListLocator())
901 .PageListLength(clusterGroupDesc.GetPageListLength())
902 .MinEntry(clusterGroupDesc.GetMinEntry())
903 .EntrySpan(clusterGroupDesc.GetEntrySpan())
904 .NClusters(clusterGroupDesc.GetNClusters());
905 return builder;
906}
907
909{
910 if (fClusterGroup.fClusterGroupId == ROOT::kInvalidDescriptorId)
911 return R__FAIL("unset cluster group ID");
913 std::swap(result, fClusterGroup);
914 return result;
915}
916
917////////////////////////////////////////////////////////////////////////////////
918
920{
921 if (fExtraTypeInfo.fContentId == EExtraTypeInfoIds::kInvalid)
922 throw RException(R__FAIL("invalid extra type info content id"));
924 std::swap(result, fExtraTypeInfo);
925 return result;
926}
927
928////////////////////////////////////////////////////////////////////////////////
929
931{
932 if (fDescriptor.fFieldDescriptors.count(fieldId) == 0)
933 return R__FAIL("field with id '" + std::to_string(fieldId) + "' doesn't exist");
934 return RResult<void>::Success();
935}
936
938{
939 // Reuse field name validity check
940 auto validName = ROOT::Internal::EnsureValidNameForRNTuple(fDescriptor.GetName(), "Field");
941 if (!validName) {
943 }
944
945 for (const auto &[fieldId, fieldDesc] : fDescriptor.fFieldDescriptors) {
946 // parent not properly set?
947 if (fieldId != fDescriptor.GetFieldZeroId() && fieldDesc.GetParentId() == ROOT::kInvalidDescriptorId) {
948 return R__FAIL("field with id '" + std::to_string(fieldId) + "' has an invalid parent id");
949 }
950
951 // Same number of columns in every column representation?
952 const auto columnCardinality = fieldDesc.GetColumnCardinality();
953 if (columnCardinality == 0)
954 continue;
955
956 // In AddColumn, we already checked that all but the last representation are complete.
957 // Check that the last column representation is complete, i.e. has all columns.
958 const auto &logicalColumnIds = fieldDesc.GetLogicalColumnIds();
959 const auto nColumns = logicalColumnIds.size();
960 // If we have only a single column representation, the following condition is true by construction
961 if ((nColumns + 1) == columnCardinality)
962 continue;
963
964 const auto &lastColumn = fDescriptor.GetColumnDescriptor(logicalColumnIds.back());
965 if (lastColumn.GetIndex() + 1 != columnCardinality)
966 return R__FAIL("field with id '" + std::to_string(fieldId) + "' has incomplete column representations");
967 }
968
969 return RResult<void>::Success();
970}
971
973{
974 EnsureValidDescriptor().ThrowOnError();
975 fDescriptor.fSortedClusterGroupIds.reserve(fDescriptor.fClusterGroupDescriptors.size());
976 for (const auto &[id, _] : fDescriptor.fClusterGroupDescriptors)
977 fDescriptor.fSortedClusterGroupIds.emplace_back(id);
978 std::sort(fDescriptor.fSortedClusterGroupIds.begin(), fDescriptor.fSortedClusterGroupIds.end(),
980 return fDescriptor.fClusterGroupDescriptors[a].GetMinEntry() <
981 fDescriptor.fClusterGroupDescriptors[b].GetMinEntry();
982 });
984 std::swap(result, fDescriptor);
985 return result;
986}
987
989 const std::string_view description)
990{
991 fDescriptor.fName = std::string(name);
992 fDescriptor.fDescription = std::string(description);
993}
994
996{
997 if (flag % 64 == 0)
998 throw RException(R__FAIL("invalid feature flag: " + std::to_string(flag)));
999 fDescriptor.fFeatureFlags.insert(flag);
1000}
1001
1003{
1004 if (fColumn.GetLogicalId() == ROOT::kInvalidDescriptorId)
1005 return R__FAIL("invalid logical column id");
1006 if (fColumn.GetPhysicalId() == ROOT::kInvalidDescriptorId)
1007 return R__FAIL("invalid physical column id");
1008 if (fColumn.GetFieldId() == ROOT::kInvalidDescriptorId)
1009 return R__FAIL("invalid field id, dangling column");
1010
1011 // NOTE: if the column type is unknown we don't want to fail, as we might be reading an RNTuple
1012 // created with a future version of ROOT. In this case we just skip the valid bit range check,
1013 // as we have no idea what the valid range is.
1014 // In general, reading the metadata of an unknown column is fine, it becomes an error only when
1015 // we try to read the actual data contained in it.
1016 if (fColumn.GetType() != ENTupleColumnType::kUnknown) {
1017 const auto [minBits, maxBits] = ROOT::Internal::RColumnElementBase::GetValidBitRange(fColumn.GetType());
1018 if (fColumn.GetBitsOnStorage() < minBits || fColumn.GetBitsOnStorage() > maxBits)
1019 return R__FAIL("invalid column bit width");
1020 }
1021
1022 return fColumn.Clone();
1023}
1024
1032
1035{
1037 fieldDesc.FieldVersion(field.GetFieldVersion())
1038 .TypeVersion(field.GetTypeVersion())
1039 .FieldName(field.GetFieldName())
1040 .FieldDescription(field.GetDescription())
1041 .TypeName(field.GetTypeName())
1042 .TypeAlias(field.GetTypeAlias())
1043 .Structure(field.GetStructure())
1044 .NRepetitions(field.GetNRepetitions());
1046 fieldDesc.TypeChecksum(field.GetTypeChecksum());
1047 return fieldDesc;
1048}
1049
1051{
1052 if (fField.GetId() == ROOT::kInvalidDescriptorId) {
1053 return R__FAIL("invalid field id");
1054 }
1055 if (fField.GetStructure() == ROOT::ENTupleStructure::kInvalid) {
1056 return R__FAIL("invalid field structure");
1057 }
1058 // FieldZero is usually named "" and would be a false positive here
1059 if (fField.GetParentId() != ROOT::kInvalidDescriptorId) {
1060 auto validName = ROOT::Internal::EnsureValidNameForRNTuple(fField.GetFieldName(), "Field");
1061 if (!validName) {
1063 }
1064 if (fField.GetFieldName().empty()) {
1065 return R__FAIL("name cannot be empty string \"\"");
1066 }
1067 }
1068 return fField.Clone();
1069}
1070
1072{
1073 fDescriptor.fFieldDescriptors.emplace(fieldDesc.GetId(), fieldDesc.Clone());
1074 if (fDescriptor.fHeaderExtension)
1075 fDescriptor.fHeaderExtension->MarkExtendedField(fieldDesc);
1076 if (fieldDesc.GetFieldName().empty() && fieldDesc.GetParentId() == ROOT::kInvalidDescriptorId) {
1077 fDescriptor.fFieldZeroId = fieldDesc.GetId();
1078 }
1079}
1080
1083{
1085 if (!(fieldExists = EnsureFieldExists(fieldId)))
1087 if (!(fieldExists = EnsureFieldExists(linkId)))
1088 return R__FAIL("child field with id '" + std::to_string(linkId) + "' doesn't exist in NTuple");
1089
1090 if (linkId == fDescriptor.GetFieldZeroId()) {
1091 return R__FAIL("cannot make FieldZero a child field");
1092 }
1093 // fail if field already has another valid parent
1094 auto parentId = fDescriptor.fFieldDescriptors.at(linkId).GetParentId();
1096 return R__FAIL("field '" + std::to_string(linkId) + "' already has a parent ('" + std::to_string(parentId) + ")");
1097 }
1098 if (fieldId == linkId) {
1099 return R__FAIL("cannot make field '" + std::to_string(fieldId) + "' a child of itself");
1100 }
1101 fDescriptor.fFieldDescriptors.at(linkId).fParentId = fieldId;
1102 fDescriptor.fFieldDescriptors.at(fieldId).fLinkIds.push_back(linkId);
1103 return RResult<void>::Success();
1104}
1105
1108{
1110 if (!(fieldExists = EnsureFieldExists(sourceId)))
1112 if (!(fieldExists = EnsureFieldExists(targetId)))
1113 return R__FAIL("projected field with id '" + std::to_string(targetId) + "' doesn't exist in NTuple");
1114
1115 if (targetId == fDescriptor.GetFieldZeroId()) {
1116 return R__FAIL("cannot make FieldZero a projected field");
1117 }
1118 if (sourceId == targetId) {
1119 return R__FAIL("cannot make field '" + std::to_string(targetId) + "' a projection of itself");
1120 }
1121 if (fDescriptor.fFieldDescriptors.at(sourceId).IsProjectedField()) {
1122 return R__FAIL("cannot make field '" + std::to_string(targetId) + "' a projection of an already projected field");
1123 }
1124 // fail if target field already has another valid projection source
1125 auto &targetDesc = fDescriptor.fFieldDescriptors.at(targetId);
1126 if (targetDesc.IsProjectedField() && targetDesc.GetProjectionSourceId() != sourceId) {
1127 return R__FAIL("field '" + std::to_string(targetId) + "' has already a projection source ('" +
1128 std::to_string(targetDesc.GetProjectionSourceId()) + ")");
1129 }
1130 fDescriptor.fFieldDescriptors.at(targetId).fProjectionSourceId = sourceId;
1131 return RResult<void>::Success();
1132}
1133
1135{
1136 const auto fieldId = columnDesc.GetFieldId();
1137 const auto columnIndex = columnDesc.GetIndex();
1138 const auto representationIndex = columnDesc.GetRepresentationIndex();
1139
1140 auto fieldExists = EnsureFieldExists(fieldId);
1141 if (!fieldExists) {
1143 }
1144 auto &fieldDesc = fDescriptor.fFieldDescriptors.find(fieldId)->second;
1145
1146 if (columnDesc.IsAliasColumn()) {
1147 if (columnDesc.GetType() != fDescriptor.GetColumnDescriptor(columnDesc.GetPhysicalId()).GetType())
1148 return R__FAIL("alias column type mismatch");
1149 }
1150 if (fDescriptor.FindLogicalColumnId(fieldId, columnIndex, representationIndex) != ROOT::kInvalidDescriptorId) {
1151 return R__FAIL("column index clash");
1152 }
1153 if (columnIndex > 0) {
1154 if (fDescriptor.FindLogicalColumnId(fieldId, columnIndex - 1, representationIndex) == ROOT::kInvalidDescriptorId)
1155 return R__FAIL("out of bounds column index");
1156 }
1157 if (representationIndex > 0) {
1158 if (fDescriptor.FindLogicalColumnId(fieldId, 0, representationIndex - 1) == ROOT::kInvalidDescriptorId) {
1159 return R__FAIL("out of bounds representation index");
1160 }
1161 if (columnIndex == 0) {
1162 assert(fieldDesc.fColumnCardinality > 0);
1163 if (fDescriptor.FindLogicalColumnId(fieldId, fieldDesc.fColumnCardinality - 1, representationIndex - 1) ==
1165 return R__FAIL("incomplete column representations");
1166 }
1167 } else {
1168 if (columnIndex >= fieldDesc.fColumnCardinality)
1169 return R__FAIL("irregular column representations");
1170 }
1171 } else {
1172 // This will set the column cardinality to the number of columns of the first representation
1173 fieldDesc.fColumnCardinality = columnIndex + 1;
1174 }
1175
1176 const auto logicalId = columnDesc.GetLogicalId();
1177 fieldDesc.fLogicalColumnIds.emplace_back(logicalId);
1178
1179 if (!columnDesc.IsAliasColumn())
1180 fDescriptor.fNPhysicalColumns++;
1181 fDescriptor.fColumnDescriptors.emplace(logicalId, std::move(columnDesc));
1182 if (fDescriptor.fHeaderExtension)
1183 fDescriptor.fHeaderExtension->MarkExtendedColumn(columnDesc);
1184
1185 return RResult<void>::Success();
1186}
1187
1189{
1190 const auto id = clusterGroup.GetId();
1191 if (fDescriptor.fClusterGroupDescriptors.count(id) > 0)
1192 return R__FAIL("cluster group id clash");
1193 fDescriptor.fNEntries = std::max(fDescriptor.fNEntries, clusterGroup.GetMinEntry() + clusterGroup.GetEntrySpan());
1194 fDescriptor.fNClusters += clusterGroup.GetNClusters();
1195 fDescriptor.fClusterGroupDescriptors.emplace(id, std::move(clusterGroup));
1196 return RResult<void>::Success();
1197}
1198
1200{
1201 fDescriptor.fName = "";
1202 fDescriptor.fDescription = "";
1203 fDescriptor.fFieldDescriptors.clear();
1204 fDescriptor.fColumnDescriptors.clear();
1205 fDescriptor.fClusterDescriptors.clear();
1206 fDescriptor.fClusterGroupDescriptors.clear();
1207 fDescriptor.fHeaderExtension.reset();
1208}
1209
1214
1216{
1217 if (!fDescriptor.fHeaderExtension)
1218 fDescriptor.fHeaderExtension = std::make_unique<RNTupleDescriptor::RHeaderExtension>();
1219}
1220
1222{
1223 if (fDescriptor.GetNLogicalColumns() == 0)
1224 return;
1225 R__ASSERT(fDescriptor.GetNPhysicalColumns() > 0);
1226
1227 for (ROOT::DescriptorId_t id = fDescriptor.GetNLogicalColumns() - 1; id >= fDescriptor.GetNPhysicalColumns(); --id) {
1228 auto c = fDescriptor.fColumnDescriptors[id].Clone();
1229 R__ASSERT(c.IsAliasColumn());
1230 R__ASSERT(id == c.GetLogicalId());
1231 fDescriptor.fColumnDescriptors.erase(id);
1232 for (auto &link : fDescriptor.fFieldDescriptors[c.fFieldId].fLogicalColumnIds) {
1233 if (link == c.fLogicalColumnId) {
1234 link += offset;
1235 break;
1236 }
1237 }
1238 c.fLogicalColumnId += offset;
1239 R__ASSERT(fDescriptor.fColumnDescriptors.count(c.fLogicalColumnId) == 0);
1240 fDescriptor.fColumnDescriptors.emplace(c.fLogicalColumnId, std::move(c));
1241 }
1242}
1243
1245{
1246 auto clusterId = clusterDesc.GetId();
1247 if (fDescriptor.fClusterDescriptors.count(clusterId) > 0)
1248 return R__FAIL("cluster id clash");
1249 fDescriptor.fClusterDescriptors.emplace(clusterId, std::move(clusterDesc));
1250 return RResult<void>::Success();
1251}
1252
1255{
1256 // Make sure we have no duplicates
1257 if (std::find(fDescriptor.fExtraTypeInfoDescriptors.begin(), fDescriptor.fExtraTypeInfoDescriptors.end(),
1258 extraTypeInfoDesc) != fDescriptor.fExtraTypeInfoDescriptors.end()) {
1259 return R__FAIL("extra type info duplicates");
1260 }
1261 fDescriptor.fExtraTypeInfoDescriptors.emplace_back(std::move(extraTypeInfoDesc));
1262 return RResult<void>::Success();
1263}
1264
1266{
1267 auto it = std::find(fDescriptor.fExtraTypeInfoDescriptors.begin(), fDescriptor.fExtraTypeInfoDescriptors.end(),
1269 if (it != fDescriptor.fExtraTypeInfoDescriptors.end())
1270 *it = std::move(extraTypeInfoDesc);
1271 else
1272 fDescriptor.fExtraTypeInfoDescriptors.emplace_back(std::move(extraTypeInfoDesc));
1273}
1274
1276{
1278 const auto &desc = GetDescriptor();
1279
1280 std::function<void(const RFieldDescriptor &)> fnWalkFieldTree;
1282 if (fieldDesc.IsCustomClass()) {
1283 // Add streamer info for this class to streamerInfoMap
1284 auto cl = TClass::GetClass(fieldDesc.GetTypeName().c_str());
1285 if (!cl) {
1286 throw RException(R__FAIL(std::string("cannot get TClass for ") + fieldDesc.GetTypeName()));
1287 }
1288 auto streamerInfo = cl->GetStreamerInfo(fieldDesc.GetTypeVersion());
1289 if (!streamerInfo) {
1290 throw RException(R__FAIL(std::string("cannot get streamerInfo for ") + fieldDesc.GetTypeName()));
1291 }
1293 }
1294
1295 // Recursively traverse sub fields
1296 for (const auto &subFieldDesc : desc.GetFieldIterable(fieldDesc)) {
1298 }
1299 };
1300
1301 fnWalkFieldTree(desc.GetFieldZero());
1302
1303 // Add the streamer info records from streamer fields: because of runtime polymorphism we may need to add additional
1304 // types not covered by the type names stored in the field headers
1305 for (const auto &extraTypeInfo : desc.GetExtraTypeInfoIterable()) {
1306 if (extraTypeInfo.GetContentId() != EExtraTypeInfoIds::kStreamerInfo)
1307 continue;
1308 // Ideally, we would avoid deserializing the streamer info records of the streamer fields that we just serialized.
1309 // However, this happens only once at the end of writing and only when streamer fields are used, so the
1310 // preference here is for code simplicity.
1312 }
1313
1314 return streamerInfoMap;
1315}
1316
1321
1327
1334
1337{
1338 return GetFieldIterable(GetFieldDescriptor(fieldId));
1339}
1340
1347
1349{
1350 return GetFieldIterable(GetFieldZeroId());
1351}
1352
1354 const std::function<bool(ROOT::DescriptorId_t, ROOT::DescriptorId_t)> &comparator) const
1355{
1356 return GetFieldIterable(GetFieldZeroId(), comparator);
1357}
1358
1363
1369
1375
1380
1385
#define R__FORWARD_ERROR(res)
Short-hand to return an RResult<T> in an error state (i.e. after checking)
Definition RError.hxx:303
#define R__FAIL(msg)
Short-hand to return an RResult<T> in an error state; the RError is implicitly converted into RResult...
Definition RError.hxx:299
#define d(i)
Definition RSha256.hxx:102
#define b(i)
Definition RSha256.hxx:100
#define f(i)
Definition RSha256.hxx:104
#define c(i)
Definition RSha256.hxx:101
#define a(i)
Definition RSha256.hxx:99
ROOT::Detail::TRangeCast< T, true > TRangeDynCast
TRangeDynCast is an adapter class that allows the typed iteration through a TCollection.
#define R__ASSERT(e)
Checks condition e and reports a fatal error if it's false.
Definition TError.h:125
#define N
Option_t Option_t TPoint TPoint const char GetTextMagnitude GetFillStyle GetLineColor GetLineWidth GetMarkerStyle GetTextAlign GetTextColor GetTextSize void char Point_t Rectangle_t WindowAttributes_t Float_t Float_t Float_t Int_t Int_t UInt_t UInt_t Rectangle_t Int_t Int_t Window_t TString Int_t GCValues_t GetPrimarySelectionOwner GetDisplay GetScreen GetColormap GetNativeEvent const char const char dpyName wid window const char font_name cursor keysym reg const char only_if_exist regb h Point_t winding char text const char depth char const char Int_t count const char ColorStruct_t color const char Pixmap_t Pixmap_t PictureAttributes_t attr const char char ret_data h unsigned char height h offset
Option_t Option_t TPoint TPoint const char GetTextMagnitude GetFillStyle GetLineColor GetLineWidth GetMarkerStyle GetTextAlign GetTextColor GetTextSize void char Point_t Rectangle_t WindowAttributes_t Float_t Float_t Float_t Int_t Int_t UInt_t UInt_t Rectangle_t result
Option_t Option_t TPoint TPoint const char GetTextMagnitude GetFillStyle GetLineColor GetLineWidth GetMarkerStyle GetTextAlign GetTextColor GetTextSize void char Point_t Rectangle_t WindowAttributes_t index
Option_t Option_t TPoint TPoint const char GetTextMagnitude GetFillStyle GetLineColor GetLineWidth GetMarkerStyle GetTextAlign GetTextColor GetTextSize id
char name[80]
Definition TGX11.cxx:110
void cd(Int_t id=-1)
#define _(A, B)
Definition cfortran.h:108
A helper class for piece-wise construction of an RClusterDescriptor.
RResult< void > MarkSuppressedColumnRange(ROOT::DescriptorId_t physicalId)
Books the given column ID as being suppressed in this cluster.
RResult< void > CommitColumnRange(ROOT::DescriptorId_t physicalId, std::uint64_t firstElementIndex, std::uint32_t compressionSettings, const RClusterDescriptor::RPageRange &pageRange)
RClusterDescriptorBuilder & AddExtendedColumnRanges(const RNTupleDescriptor &desc)
Add column and page ranges for columns created during late model extension missing in this cluster.
RResult< void > CommitSuppressedColumnRanges(const RNTupleDescriptor &desc)
Sets the first element index and number of elements for all the suppressed column ranges.
RResult< RClusterDescriptor > MoveDescriptor()
Move out the full cluster descriptor including page locations.
A helper class for piece-wise construction of an RClusterGroupDescriptor.
RClusterGroupDescriptorBuilder & EntrySpan(std::uint64_t entrySpan)
RClusterGroupDescriptorBuilder & PageListLocator(const RNTupleLocator &pageListLocator)
static RClusterGroupDescriptorBuilder FromSummary(const RClusterGroupDescriptor &clusterGroupDesc)
RClusterGroupDescriptorBuilder & PageListLength(std::uint64_t pageListLength)
RClusterGroupDescriptorBuilder & MinEntry(std::uint64_t minEntry)
RResult< RClusterGroupDescriptor > MoveDescriptor()
RClusterGroupDescriptorBuilder & ClusterGroupId(ROOT::DescriptorId_t clusterGroupId)
RClusterGroupDescriptorBuilder & NClusters(std::uint32_t nClusters)
RResult< RColumnDescriptor > MakeDescriptor() const
Attempt to make a column descriptor.
A column element encapsulates the translation between basic C++ types and their column representation...
static std::pair< std::uint16_t, std::uint16_t > GetValidBitRange(ROOT::ENTupleColumnType type)
Most types have a fixed on-disk bit width.
RResult< RExtraTypeInfoDescriptor > MoveDescriptor()
A helper class for piece-wise construction of an RFieldDescriptor.
RFieldDescriptorBuilder()=default
Make an empty dangling field descriptor.
RResult< RFieldDescriptor > MakeDescriptor() const
Attempt to make a field descriptor.
static RFieldDescriptorBuilder FromField(const ROOT::RFieldBase &field)
Make a new RFieldDescriptorBuilder based off a live RNTuple field.
void SetNTuple(const std::string_view name, const std::string_view description)
void SetSchemaFromExisting(const RNTupleDescriptor &descriptor)
Copies the "schema" part of descriptor into the builder's descriptor.
RResult< void > AddColumn(RColumnDescriptor &&columnDesc)
RResult< void > AddFieldProjection(ROOT::DescriptorId_t sourceId, ROOT::DescriptorId_t targetId)
void ReplaceExtraTypeInfo(RExtraTypeInfoDescriptor &&extraTypeInfoDesc)
RResult< void > AddExtraTypeInfo(RExtraTypeInfoDescriptor &&extraTypeInfoDesc)
void ShiftAliasColumns(std::uint32_t offset)
Shift column IDs of alias columns by offset
void BeginHeaderExtension()
Mark the beginning of the header extension; any fields and columns added after a call to this functio...
RResult< void > AddCluster(RClusterDescriptor &&clusterDesc)
RResult< void > EnsureValidDescriptor() const
Checks whether invariants hold:
RResult< void > AddFieldLink(ROOT::DescriptorId_t fieldId, ROOT::DescriptorId_t linkId)
void Reset()
Clears so-far stored clusters, fields, and columns and return to a pristine RNTupleDescriptor.
void AddField(const RFieldDescriptor &fieldDesc)
ROOT::Internal::RNTupleSerializer::StreamerInfoMap_t BuildStreamerInfos() const
Get the streamer info records for custom classes. Currently requires the corresponding dictionaries t...
RResult< void > AddClusterGroup(RClusterGroupDescriptor &&clusterGroup)
RResult< void > EnsureFieldExists(ROOT::DescriptorId_t fieldId) const
A helper class for serializing and deserialization of the RNTuple binary format.
std::map< Int_t, TVirtualStreamerInfo * > StreamerInfoMap_t
static RResult< StreamerInfoMap_t > DeserializeStreamerInfos(const std::string &extraTypeInfoContent)
Records the partition of data into pages for a particular column in a particular cluster.
RPageInfoExtended Find(ROOT::NTupleSize_t idxInCluster) const
Find the page in the RPageRange that contains the given element. The element must exist.
std::size_t ExtendToFitColumnRange(const RColumnRange &columnRange, const ROOT::Internal::RColumnElementBase &element, std::size_t pageSize)
Extend this RPageRange to fit the given RColumnRange.
Metadata for RNTuple clusters.
ROOT::NTupleSize_t fFirstEntryIndex
Clusters can be swapped by adjusting the entry offsets of the cluster and all ranges.
std::unordered_map< ROOT::DescriptorId_t, RColumnRange > fColumnRanges
ROOT::DescriptorId_t fClusterId
RClusterDescriptor Clone() const
bool operator==(const RClusterDescriptor &other) const
RColumnRangeIterable GetColumnRangeIterable() const
Returns an iterator over pairs { columnId, columnRange }. The iteration order is unspecified.
std::unordered_map< ROOT::DescriptorId_t, RPageRange > fPageRanges
std::uint64_t GetNBytesOnStorage() const
Clusters are bundled in cluster groups.
RNTupleLocator fPageListLocator
The page list that corresponds to the cluster group.
RClusterGroupDescriptor Clone() const
std::vector< ROOT::DescriptorId_t > fClusterIds
The cluster IDs can be empty if the corresponding page list is not loaded.
std::uint64_t fMinEntry
The minimum first entry number of the clusters in the cluster group.
std::uint32_t fNClusters
Number of clusters is always known even if the cluster IDs are not (yet) populated.
std::uint64_t fPageListLength
Uncompressed size of the page list.
std::uint64_t fEntrySpan
Number of entries that are (partially for sharded clusters) covered by this cluster group.
bool operator==(const RClusterGroupDescriptor &other) const
RClusterGroupDescriptor CloneSummary() const
Creates a clone without the cluster IDs.
Metadata stored for every column of an RNTuple.
ROOT::DescriptorId_t fPhysicalColumnId
Usually identical to the logical column ID, except for alias columns where it references the shadowed...
bool operator==(const RColumnDescriptor &other) const
ROOT::DescriptorId_t fLogicalColumnId
The actual column identifier, which is the link to the corresponding field.
ROOT::DescriptorId_t fFieldId
Every column belongs to one and only one field.
std::int64_t fFirstElementIndex
The absolute value specifies the index for the first stored element for this column.
std::uint32_t fIndex
A field can be serialized into several columns, which are numbered from zero to $n$.
std::uint16_t fBitsOnStorage
The size in bits of elements of this column.
std::uint16_t fRepresentationIndex
A field may use multiple column representations, which are numbered from zero to $m$.
ROOT::ENTupleColumnType fType
The on-disk column type.
std::optional< RValueRange > fValueRange
Optional value range (used e.g. by quantized real fields)
RColumnDescriptor Clone() const
Get a copy of the descriptor.
Base class for all ROOT issued exceptions.
Definition RError.hxx:79
Field specific extra type information from the header / extenstion header.
bool operator==(const RExtraTypeInfoDescriptor &other) const
RExtraTypeInfoDescriptor Clone() const
EExtraTypeInfoIds fContentId
Specifies the meaning of the extra information.
std::string fTypeName
The type name the extra information refers to; empty for RNTuple-wide extra information.
std::string fContent
The content format depends on the content ID and may be binary.
std::uint32_t fTypeVersion
Type version the extra type information is bound to.
A field translates read and write calls from/to underlying columns to/from tree values.
@ kTraitInvalidField
This field is an instance of RInvalidField and can be safely static_cast to it.
@ kTraitTypeChecksum
The TClass checksum is set and valid.
Metadata stored for every field of an RNTuple.
std::unique_ptr< ROOT::RFieldBase > CreateField(const RNTupleDescriptor &ntplDesc, const ROOT::RCreateFieldOptions &options={}) const
In general, we create a field simply from the C++ type name.
std::uint32_t fFieldVersion
The version of the C++-type-to-column translation mechanics.
ROOT::DescriptorId_t fFieldId
RFieldDescriptor Clone() const
Get a copy of the descriptor.
std::uint64_t fNRepetitions
The number of elements per entry for fixed-size arrays.
std::uint32_t fColumnCardinality
The number of columns in the column representations of the field.
ROOT::DescriptorId_t fProjectionSourceId
For projected fields, the source field ID.
bool operator==(const RFieldDescriptor &other) const
std::string fFieldDescription
Free text set by the user.
ROOT::DescriptorId_t fParentId
Establishes sub field relationships, such as classes and collections.
bool IsCustomClass() const
Tells if the field describes a user-defined class rather than a fundamental type, a collection,...
std::string fTypeAlias
A typedef or using directive that resolved to the type name during field creation.
ROOT::ENTupleStructure fStructure
The structural information carried by this field in the data model tree.
std::vector< ROOT::DescriptorId_t > fLinkIds
The pointers in the other direction from parent to children.
std::string fFieldName
The leaf name, not including parent fields.
std::uint32_t fTypeVersion
The version of the C++ type itself.
std::string fTypeName
The C++ type that was used when writing the field.
std::vector< ROOT::DescriptorId_t > fLogicalColumnIds
The ordered list of columns attached to this field: first by representation index then by column inde...
std::optional< std::uint32_t > fTypeChecksum
For custom classes, we store the ROOT TClass reported checksum to facilitate the use of I/O rules tha...
Used in RFieldBase::Check() to record field creation failures.
Definition RField.hxx:72
@ kGeneric
Generic unrecoverable error.
@ kUnknownStructure
The field could not be created because its descriptor had an unknown structural role.
Used to loop over all the clusters of an RNTuple (in unspecified order)
Used to loop over all the cluster groups of an RNTuple (in unspecified order)
Used to loop over a field's associated columns.
std::vector< ROOT::DescriptorId_t > fColumns
The descriptor ids of the columns ordered by field, representation, and column index.
RColumnDescriptorIterable(const RNTupleDescriptor &ntuple, const RFieldDescriptor &fieldDesc)
Used to loop over all the extra type info record of an RNTuple (in unspecified order)
Used to loop over a field's child fields.
std::vector< ROOT::DescriptorId_t > GetTopLevelFields(const RNTupleDescriptor &desc) const
Return a vector containing the IDs of the top-level fields defined in the extension header,...
The on-storage metadata of an RNTuple.
const RColumnDescriptor & GetColumnDescriptor(ROOT::DescriptorId_t columnId) const
ROOT::DescriptorId_t FindNextClusterId(ROOT::DescriptorId_t clusterId) const
RFieldDescriptorIterable GetFieldIterable(const RFieldDescriptor &fieldDesc) const
std::set< unsigned int > fFeatureFlags
std::unordered_map< ROOT::DescriptorId_t, RClusterGroupDescriptor > fClusterGroupDescriptors
const RFieldDescriptor & GetFieldDescriptor(ROOT::DescriptorId_t fieldId) const
std::uint64_t fNPhysicalColumns
Updated by the descriptor builder when columns are added.
ROOT::DescriptorId_t fFieldZeroId
Set by the descriptor builder.
std::uint64_t fNEntries
Updated by the descriptor builder when the cluster groups are added.
RClusterGroupDescriptorIterable GetClusterGroupIterable() const
RColumnDescriptorIterable GetColumnIterable() const
bool operator==(const RNTupleDescriptor &other) const
std::uint64_t fOnDiskFooterSize
Like fOnDiskHeaderSize, contains both cluster summaries and page locations.
ROOT::DescriptorId_t FindClusterId(ROOT::NTupleSize_t entryIdx) const
std::vector< std::uint64_t > GetFeatureFlags() const
ROOT::DescriptorId_t GetFieldZeroId() const
Returns the logical parent of all top-level RNTuple data fields.
std::unique_ptr< ROOT::RNTupleModel > CreateModel(const RCreateModelOptions &options=RCreateModelOptions()) const
Re-create the C++ model from the stored metadata.
std::unordered_map< ROOT::DescriptorId_t, RClusterDescriptor > fClusterDescriptors
Potentially a subset of all the available clusters.
ROOT::DescriptorId_t FindPhysicalColumnId(ROOT::DescriptorId_t fieldId, std::uint32_t columnIndex, std::uint16_t representationIndex) const
RExtraTypeInfoDescriptorIterable GetExtraTypeInfoIterable() const
const RHeaderExtension * GetHeaderExtension() const
Return header extension information; if the descriptor does not have a header extension,...
std::uint64_t fNClusters
Updated by the descriptor builder when the cluster groups are added.
std::uint64_t fOnDiskHeaderXxHash3
Set by the descriptor builder when deserialized.
ROOT::DescriptorId_t FindFieldId(std::string_view fieldName, ROOT::DescriptorId_t parentId) const
std::string fName
The RNTuple name needs to be unique in a given storage location (file)
std::uint64_t fOnDiskHeaderSize
Set by the descriptor builder when deserialized.
RResult< void > DropClusterGroupDetails(ROOT::DescriptorId_t clusterGroupId)
std::vector< ROOT::DescriptorId_t > fSortedClusterGroupIds
References cluster groups sorted by entry range and thus allows for binary search.
std::unordered_map< ROOT::DescriptorId_t, RColumnDescriptor > fColumnDescriptors
ROOT::DescriptorId_t FindLogicalColumnId(ROOT::DescriptorId_t fieldId, std::uint32_t columnIndex, std::uint16_t representationIndex) const
std::unordered_map< ROOT::DescriptorId_t, RFieldDescriptor > fFieldDescriptors
ROOT::NTupleSize_t GetNElements(ROOT::DescriptorId_t physicalColumnId) const
RResult< void > AddClusterGroupDetails(ROOT::DescriptorId_t clusterGroupId, std::vector< RClusterDescriptor > &clusterDescs)
Methods to load and drop cluster group details (cluster IDs and page locations)
std::string fDescription
Free text from the user.
RFieldDescriptorIterable GetTopLevelFields() const
std::vector< RExtraTypeInfoDescriptor > fExtraTypeInfoDescriptors
RNTupleDescriptor Clone() const
std::string GetQualifiedFieldName(ROOT::DescriptorId_t fieldId) const
Walks up the parents of the field ID and returns a field name of the form a.b.c.d In case of invalid ...
RClusterDescriptorIterable GetClusterIterable() const
RNTupleDescriptor CloneSchema() const
Creates a descriptor containing only the schema information about this RNTuple, i....
std::uint64_t fGeneration
The generation of the descriptor.
ROOT::DescriptorId_t FindPrevClusterId(ROOT::DescriptorId_t clusterId) const
std::unique_ptr< RHeaderExtension > fHeaderExtension
Generic information about the physical location of data.
static std::unique_ptr< RNTupleModel > Create()
static std::unique_ptr< RNTupleModel > CreateBare()
Creates a "bare model", i.e. an RNTupleModel with no default entry.
const_iterator begin() const
const_iterator end() const
The class is used as a return type for operations that can fail; wraps a value of type T or an RError...
Definition RError.hxx:197
static std::unique_ptr< RVectorField > CreateUntyped(std::string_view fieldName, std::unique_ptr< RFieldBase > itemField)
static TClass * GetClass(const char *name, Bool_t load=kTRUE, Bool_t silent=kFALSE)
Static method returning pointer to TClass of the specified class name.
Definition TClass.cxx:3073
struct void * fTypeName
Definition cppyy.h:9
const Int_t n
Definition legend1.C:16
Double_t ex[n]
Definition legend1.C:17
RResult< void > EnsureValidNameForRNTuple(std::string_view name, std::string_view where)
Check whether a given string is a valid name according to the RNTuple specification.
ROOT::RResult< std::unique_ptr< ROOT::RFieldBase > > CallFieldBaseCreate(const std::string &fieldName, const std::string &typeName, const ROOT::RCreateFieldOptions &options, const ROOT::RNTupleDescriptor *desc, ROOT::DescriptorId_t fieldId)
std::uint64_t DescriptorId_t
Distriniguishes elements of the same type within a descriptor, e.g. different fields.
constexpr NTupleSize_t kInvalidNTupleIndex
std::uint64_t NTupleSize_t
Integer type long enough to hold the maximum number of entries in a column.
constexpr DescriptorId_t kInvalidDescriptorId
Additional information about a page in an in-memory RPageRange.
Information about a single page in the context of a cluster's page range.
static uint64_t sum(uint64_t i)
Definition Factory.cxx:2345