Logo ROOT  
Reference Guide
 
All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Properties Friends Macros Modules Pages
Loading...
Searching...
No Matches
RNTupleDescriptor.cxx
Go to the documentation of this file.
1/// \file RNTupleDescriptor.cxx
2/// \ingroup NTuple ROOT7
3/// \author Jakob Blomer <jblomer@cern.ch>
4/// \author Javier Lopez-Gomez <javier.lopez.gomez@cern.ch>
5/// \date 2018-10-04
6/// \warning This is part of the ROOT 7 prototype! It will change without notice. It might trigger earthquakes. Feedback
7/// is welcome!
8
9/*************************************************************************
10 * Copyright (C) 1995-2019, Rene Brun and Fons Rademakers. *
11 * All rights reserved. *
12 * *
13 * For the licensing terms see $ROOTSYS/LICENSE. *
14 * For the list of contributors see $ROOTSYS/README/CREDITS. *
15 *************************************************************************/
16
17#include <ROOT/RError.hxx>
18#include <ROOT/RFieldBase.hxx>
20#include <ROOT/RNTupleModel.hxx>
21#include <ROOT/RNTupleUtil.hxx>
22#include <ROOT/RPage.hxx>
23#include <string_view>
24
25#include <RZip.h>
26#include <TError.h>
28
29#include <algorithm>
30#include <cstdint>
31#include <deque>
32#include <functional>
33#include <iostream>
34#include <set>
35#include <utility>
36
38{
39 return fFieldId == other.fFieldId && fFieldVersion == other.fFieldVersion && fTypeVersion == other.fTypeVersion &&
40 fFieldName == other.fFieldName && fFieldDescription == other.fFieldDescription &&
41 fTypeName == other.fTypeName && fTypeAlias == other.fTypeAlias && fNRepetitions == other.fNRepetitions &&
42 fStructure == other.fStructure && fParentId == other.fParentId &&
43 fProjectionSourceId == other.fProjectionSourceId && fLinkIds == other.fLinkIds &&
44 fLogicalColumnIds == other.fLogicalColumnIds && other.fTypeChecksum == other.fTypeChecksum;
45}
46
48{
49 RFieldDescriptor clone;
50 clone.fFieldId = fFieldId;
51 clone.fFieldVersion = fFieldVersion;
52 clone.fTypeVersion = fTypeVersion;
53 clone.fFieldName = fFieldName;
54 clone.fFieldDescription = fFieldDescription;
55 clone.fTypeName = fTypeName;
56 clone.fTypeAlias = fTypeAlias;
57 clone.fNRepetitions = fNRepetitions;
58 clone.fStructure = fStructure;
59 clone.fParentId = fParentId;
60 clone.fProjectionSourceId = fProjectionSourceId;
61 clone.fLinkIds = fLinkIds;
62 clone.fColumnCardinality = fColumnCardinality;
63 clone.fLogicalColumnIds = fLogicalColumnIds;
64 clone.fTypeChecksum = fTypeChecksum;
65 return clone;
66}
67
68std::unique_ptr<ROOT::Experimental::RFieldBase>
70 const ROOT::RCreateFieldOptions &options) const
71{
72 if (GetStructure() == ROOT::ENTupleStructure::kStreamer) {
73 auto streamerField = std::make_unique<RStreamerField>(GetFieldName(), GetTypeName());
74 streamerField->SetOnDiskId(fFieldId);
75 return streamerField;
76 }
77
78 // The structure may be unknown if the descriptor comes from a deserialized field with an unknown structural role.
79 // For forward compatibility, we allow this case and return an InvalidField.
80 if (GetStructure() == ROOT::ENTupleStructure::kUnknown) {
81 if (options.GetReturnInvalidOnError()) {
82 auto invalidField = std::make_unique<RInvalidField>(GetFieldName(), GetTypeName(), "",
84 invalidField->SetOnDiskId(fFieldId);
85 return invalidField;
86 } else {
87 throw RException(R__FAIL("unexpected on-disk field structure value for field \"" + GetFieldName() + "\""));
88 }
89 }
90
91 if (GetTypeName().empty()) {
92 switch (GetStructure()) {
94 std::vector<std::unique_ptr<RFieldBase>> memberFields;
95 memberFields.reserve(fLinkIds.size());
96 for (auto id : fLinkIds) {
97 const auto &memberDesc = ntplDesc.GetFieldDescriptor(id);
98 auto field = memberDesc.CreateField(ntplDesc, options);
99 if (field->GetTraits() & RFieldBase::kTraitInvalidField)
100 return field;
101 memberFields.emplace_back(std::move(field));
102 }
103 auto recordField = std::make_unique<RRecordField>(GetFieldName(), std::move(memberFields));
104 recordField->SetOnDiskId(fFieldId);
105 return recordField;
106 }
108 if (fLinkIds.size() != 1) {
109 throw RException(R__FAIL("unsupported untyped collection for field \"" + GetFieldName() + "\""));
110 }
111 auto itemField = ntplDesc.GetFieldDescriptor(fLinkIds[0]).CreateField(ntplDesc, options);
112 if (itemField->GetTraits() & RFieldBase::kTraitInvalidField)
113 return itemField;
114 auto collectionField = RVectorField::CreateUntyped(GetFieldName(), std::move(itemField));
115 collectionField->SetOnDiskId(fFieldId);
116 return collectionField;
117 }
118 default: throw RException(R__FAIL("unsupported untyped field structure for field \"" + GetFieldName() + "\""));
119 }
120 }
121
122 try {
123 const auto &fieldName = GetFieldName();
124 const auto &typeName = GetTypeAlias().empty() ? GetTypeName() : GetTypeAlias();
125 // NOTE: Unwrap() here may throw an exception, hence the try block.
126 // If options.fReturnInvalidOnError is false we just rethrow it, otherwise we return an InvalidField wrapping the
127 // error.
128 auto field = Internal::CallFieldBaseCreate(fieldName, typeName, options, &ntplDesc, fFieldId).Unwrap();
129 field->SetOnDiskId(fFieldId);
130
131 for (auto &subfield : *field) {
132 const auto subfieldId = ntplDesc.FindFieldId(subfield.GetFieldName(), subfield.GetParent()->GetOnDiskId());
133 subfield.SetOnDiskId(subfieldId);
134 if (subfield.GetTraits() & RFieldBase::kTraitInvalidField) {
135 auto &invalidField = static_cast<RInvalidField &>(subfield);
136 // A subfield being invalid "infects" its entire ancestry.
137 return invalidField.Clone(fieldName);
138 }
139 }
140
141 return field;
142 } catch (RException &ex) {
143 if (options.GetReturnInvalidOnError())
144 return std::make_unique<RInvalidField>(GetFieldName(), GetTypeName(), ex.GetError().GetReport(),
146 else
147 throw ex;
148 }
149}
150
152{
154 return false;
155
156 // Skip untyped structs
157 if (fTypeName.empty())
158 return false;
159
160 if (fStructure == ROOT::ENTupleStructure::kRecord) {
161 if (fTypeName.compare(0, 10, "std::pair<") == 0)
162 return false;
163 if (fTypeName.compare(0, 11, "std::tuple<") == 0)
164 return false;
165 }
166
167 return true;
168}
169
170////////////////////////////////////////////////////////////////////////////////
171
173{
174 return fLogicalColumnId == other.fLogicalColumnId && fPhysicalColumnId == other.fPhysicalColumnId &&
175 fBitsOnStorage == other.fBitsOnStorage && fType == other.fType && fFieldId == other.fFieldId &&
176 fIndex == other.fIndex && fRepresentationIndex == other.fRepresentationIndex &&
177 fValueRange == other.fValueRange;
178}
179
181{
182 RColumnDescriptor clone;
183 clone.fLogicalColumnId = fLogicalColumnId;
184 clone.fPhysicalColumnId = fPhysicalColumnId;
185 clone.fBitsOnStorage = fBitsOnStorage;
186 clone.fType = fType;
187 clone.fFieldId = fFieldId;
188 clone.fIndex = fIndex;
189 clone.fFirstElementIndex = fFirstElementIndex;
190 clone.fRepresentationIndex = fRepresentationIndex;
191 clone.fValueRange = fValueRange;
192 return clone;
193}
194
195////////////////////////////////////////////////////////////////////////////////
196
199{
200 const auto N = fCumulativeNElements.size();
201 R__ASSERT(N > 0);
202 R__ASSERT(N == fPageInfos.size());
203
204 std::size_t left = 0;
205 std::size_t right = N - 1;
206 std::size_t midpoint = N;
207 while (left <= right) {
208 midpoint = (left + right) / 2;
209 if (fCumulativeNElements[midpoint] <= idxInCluster) {
210 left = midpoint + 1;
211 continue;
212 }
213
214 if ((midpoint == 0) || (fCumulativeNElements[midpoint - 1] <= idxInCluster))
215 break;
216
217 right = midpoint - 1;
218 }
220
221 auto pageInfo = fPageInfos[midpoint];
222 decltype(idxInCluster) firstInPage = (midpoint == 0) ? 0 : fCumulativeNElements[midpoint - 1];
224 R__ASSERT((firstInPage + pageInfo.GetNElements()) > idxInCluster);
226}
227
228std::size_t
231 std::size_t pageSize)
232{
233 R__ASSERT(fPhysicalColumnId == columnRange.GetPhysicalColumnId());
234 R__ASSERT(!columnRange.IsSuppressed());
235
236 const auto nElements =
237 std::accumulate(fPageInfos.begin(), fPageInfos.end(), 0U,
238 [](std::size_t n, const auto &pageInfo) { return n + pageInfo.GetNElements(); });
239 const auto nElementsRequired = static_cast<std::uint64_t>(columnRange.GetNElements());
240
242 return 0U;
243 R__ASSERT((nElementsRequired > nElements) && "invalid attempt to shrink RPageRange");
244
245 std::vector<RPageInfo> pageInfos;
246 // Synthesize new `RPageInfo`s as needed
247 const std::uint64_t nElementsPerPage = pageSize / element.GetSize();
251 pageInfo.SetNElements(std::min(nElementsPerPage, nRemainingElements));
254 locator.SetNBytesOnStorage(element.GetPackedSize(pageInfo.GetNElements()));
255 pageInfo.SetLocator(locator);
256 pageInfos.emplace_back(pageInfo);
257 nRemainingElements -= pageInfo.GetNElements();
258 }
259
260 pageInfos.insert(pageInfos.end(), std::make_move_iterator(fPageInfos.begin()),
261 std::make_move_iterator(fPageInfos.end()));
262 std::swap(fPageInfos, pageInfos);
264}
265
267{
268 return fClusterId == other.fClusterId && fFirstEntryIndex == other.fFirstEntryIndex &&
269 fNEntries == other.fNEntries && fColumnRanges == other.fColumnRanges && fPageRanges == other.fPageRanges;
270}
271
273{
274 std::uint64_t nbytes = 0;
275 for (const auto &pr : fPageRanges) {
276 for (const auto &pi : pr.second.fPageInfos) {
277 nbytes += pi.GetLocator().GetNBytesOnStorage();
278 }
279 }
280 return nbytes;
281}
282
284{
285 RClusterDescriptor clone;
286 clone.fClusterId = fClusterId;
287 clone.fFirstEntryIndex = fFirstEntryIndex;
288 clone.fNEntries = fNEntries;
289 clone.fColumnRanges = fColumnRanges;
290 for (const auto &d : fPageRanges)
291 clone.fPageRanges.emplace(d.first, d.second.Clone());
292 return clone;
293}
294
295////////////////////////////////////////////////////////////////////////////////
296
298{
299 return fContentId == other.fContentId && fTypeName == other.fTypeName && fTypeVersion == other.fTypeVersion;
300}
301
303{
305 clone.fContentId = fContentId;
306 clone.fTypeVersion = fTypeVersion;
307 clone.fTypeName = fTypeName;
308 clone.fContent = fContent;
309 return clone;
310}
311
312////////////////////////////////////////////////////////////////////////////////
313
315{
316 // clang-format off
317 return fName == other.fName &&
318 fDescription == other.fDescription &&
319 fNEntries == other.fNEntries &&
320 fGeneration == other.fGeneration &&
321 fFieldZeroId == other.fFieldZeroId &&
322 fFieldDescriptors == other.fFieldDescriptors &&
323 fColumnDescriptors == other.fColumnDescriptors &&
324 fClusterGroupDescriptors == other.fClusterGroupDescriptors &&
325 fClusterDescriptors == other.fClusterDescriptors;
326 // clang-format on
327}
328
330{
332 for (const auto &cd : fClusterDescriptors) {
333 if (!cd.second.ContainsColumn(physicalColumnId))
334 continue;
335 auto columnRange = cd.second.GetColumnRange(physicalColumnId);
336 result = std::max(result, columnRange.GetFirstElementIndex() + columnRange.GetNElements());
337 }
338 return result;
339}
340
343{
344 std::string leafName(fieldName);
345 auto posDot = leafName.find_last_of('.');
346 if (posDot != std::string::npos) {
347 auto parentName = leafName.substr(0, posDot);
348 leafName = leafName.substr(posDot + 1);
349 parentId = FindFieldId(parentName, parentId);
350 }
351 auto itrFieldDesc = fFieldDescriptors.find(parentId);
352 if (itrFieldDesc == fFieldDescriptors.end())
354 for (const auto linkId : itrFieldDesc->second.GetLinkIds()) {
355 if (fFieldDescriptors.at(linkId).GetFieldName() == leafName)
356 return linkId;
357 }
359}
360
362{
364 return "";
365
366 const auto &fieldDescriptor = fFieldDescriptors.at(fieldId);
367 auto prefix = GetQualifiedFieldName(fieldDescriptor.GetParentId());
368 if (prefix.empty())
369 return fieldDescriptor.GetFieldName();
370 return prefix + "." + fieldDescriptor.GetFieldName();
371}
372
374{
375 return FindFieldId(fieldName, GetFieldZeroId());
376}
377
379 std::uint32_t columnIndex,
380 std::uint16_t representationIndex) const
381{
382 auto itr = fFieldDescriptors.find(fieldId);
383 if (itr == fFieldDescriptors.cend())
385 if (columnIndex >= itr->second.GetColumnCardinality())
387 const auto idx = representationIndex * itr->second.GetColumnCardinality() + columnIndex;
388 if (itr->second.GetLogicalColumnIds().size() <= idx)
390 return itr->second.GetLogicalColumnIds()[idx];
391}
392
395 std::uint16_t representationIndex) const
396{
397 auto logicalId = FindLogicalColumnId(fieldId, columnIndex, representationIndex);
400 return GetColumnDescriptor(logicalId).GetPhysicalId();
401}
402
405{
406 if (GetNClusterGroups() == 0)
408
409 // Binary search in the cluster group list, followed by a binary search in the clusters of that cluster group
410
411 std::size_t cgLeft = 0;
412 std::size_t cgRight = GetNClusterGroups() - 1;
413 while (cgLeft <= cgRight) {
414 const std::size_t cgMidpoint = (cgLeft + cgRight) / 2;
415 const auto &clusterIds = GetClusterGroupDescriptor(fSortedClusterGroupIds[cgMidpoint]).GetClusterIds();
416 R__ASSERT(!clusterIds.empty());
417
418 const auto &clusterDesc = GetClusterDescriptor(clusterIds.front());
419 // this may happen if the RNTuple has an empty schema
420 if (!clusterDesc.ContainsColumn(physicalColumnId))
422
423 const auto firstElementInGroup = clusterDesc.GetColumnRange(physicalColumnId).GetFirstElementIndex();
425 // Look into the lower half of cluster groups
427 cgRight = cgMidpoint - 1;
428 continue;
429 }
430
431 const auto &lastColumnRange = GetClusterDescriptor(clusterIds.back()).GetColumnRange(physicalColumnId);
432 if ((lastColumnRange.GetFirstElementIndex() + lastColumnRange.GetNElements()) <= index) {
433 // Look into the upper half of cluster groups
434 cgLeft = cgMidpoint + 1;
435 continue;
436 }
437
438 // Binary search in the current cluster group; since we already checked the element range boundaries,
439 // the element must be in that cluster group.
440 std::size_t clusterLeft = 0;
441 std::size_t clusterRight = clusterIds.size() - 1;
442 while (clusterLeft <= clusterRight) {
443 const std::size_t clusterMidpoint = (clusterLeft + clusterRight) / 2;
445 const auto &columnRange = GetClusterDescriptor(clusterId).GetColumnRange(physicalColumnId);
446
447 if (columnRange.Contains(index))
448 return clusterId;
449
450 if (columnRange.GetFirstElementIndex() > index) {
453 continue;
454 }
455
456 if (columnRange.GetFirstElementIndex() + columnRange.GetNElements() <= index) {
458 continue;
459 }
460 }
461 R__ASSERT(false);
462 }
464}
465
467{
468 if (GetNClusterGroups() == 0)
470
471 // Binary search in the cluster group list, followed by a binary search in the clusters of that cluster group
472
473 std::size_t cgLeft = 0;
474 std::size_t cgRight = GetNClusterGroups() - 1;
475 while (cgLeft <= cgRight) {
476 const std::size_t cgMidpoint = (cgLeft + cgRight) / 2;
477 const auto &cgDesc = GetClusterGroupDescriptor(fSortedClusterGroupIds[cgMidpoint]);
478
479 if (cgDesc.GetMinEntry() > entryIdx) {
481 cgRight = cgMidpoint - 1;
482 continue;
483 }
484
485 if (cgDesc.GetMinEntry() + cgDesc.GetEntrySpan() <= entryIdx) {
486 cgLeft = cgMidpoint + 1;
487 continue;
488 }
489
490 // Binary search in the current cluster group; since we already checked the element range boundaries,
491 // the element must be in that cluster group.
492 const auto &clusterIds = cgDesc.GetClusterIds();
493 R__ASSERT(!clusterIds.empty());
494 std::size_t clusterLeft = 0;
495 std::size_t clusterRight = clusterIds.size() - 1;
496 while (clusterLeft <= clusterRight) {
497 const std::size_t clusterMidpoint = (clusterLeft + clusterRight) / 2;
498 const auto &clusterDesc = GetClusterDescriptor(clusterIds[clusterMidpoint]);
499
500 if (clusterDesc.GetFirstEntryIndex() > entryIdx) {
503 continue;
504 }
505
506 if (clusterDesc.GetFirstEntryIndex() + clusterDesc.GetNEntries() <= entryIdx) {
508 continue;
509 }
510
512 }
513 R__ASSERT(false);
514 }
516}
517
519{
520 // TODO(jblomer): we may want to shortcut the common case and check if clusterId + 1 contains
521 // firstEntryInNextCluster. This shortcut would currently always trigger. We do not want, however, to depend
522 // on the linearity of the descriptor IDs, so we should only enable the shortcut if we can ensure that the
523 // binary search code path remains tested.
524 const auto &clusterDesc = GetClusterDescriptor(clusterId);
525 const auto firstEntryInNextCluster = clusterDesc.GetFirstEntryIndex() + clusterDesc.GetNEntries();
526 return FindClusterId(firstEntryInNextCluster);
527}
528
530{
531 // TODO(jblomer): we may want to shortcut the common case and check if clusterId - 1 contains
532 // firstEntryInNextCluster. This shortcut would currently always trigger. We do not want, however, to depend
533 // on the linearity of the descriptor IDs, so we should only enable the shortcut if we can ensure that the
534 // binary search code path remains tested.
535 const auto &clusterDesc = GetClusterDescriptor(clusterId);
536 if (clusterDesc.GetFirstEntryIndex() == 0)
538 return FindClusterId(clusterDesc.GetFirstEntryIndex() - 1);
539}
540
541std::vector<ROOT::DescriptorId_t>
543{
544 auto fieldZeroId = desc.GetFieldZeroId();
545
546 std::vector<ROOT::DescriptorId_t> fields;
547 for (const auto fieldId : fFieldIdsOrder) {
548 if (desc.GetFieldDescriptor(fieldId).GetParentId() == fieldZeroId)
549 fields.emplace_back(fieldId);
550 }
551 return fields;
552}
553
559
562 : fNTuple(ntuple)
563{
564 std::deque<ROOT::DescriptorId_t> fieldIdQueue{ntuple.GetFieldZeroId()};
565
566 while (!fieldIdQueue.empty()) {
567 auto currFieldId = fieldIdQueue.front();
568 fieldIdQueue.pop_front();
569
570 const auto &columns = ntuple.GetFieldDescriptor(currFieldId).GetLogicalColumnIds();
571 fColumns.insert(fColumns.end(), columns.begin(), columns.end());
572
573 for (const auto &field : ntuple.GetFieldIterable(currFieldId)) {
574 auto fieldId = field.GetId();
575 fieldIdQueue.push_back(fieldId);
576 }
577 }
578}
579
581{
582 std::vector<std::uint64_t> result;
583 unsigned int base = 0;
584 std::uint64_t flags = 0;
585 for (auto f : fFeatureFlags) {
586 if ((f > 0) && ((f % 64) == 0))
587 throw RException(R__FAIL("invalid feature flag: " + std::to_string(f)));
588 while (f > base + 64) {
589 result.emplace_back(flags);
590 flags = 0;
591 base += 64;
592 }
593 f -= base;
594 flags |= 1 << f;
595 }
596 result.emplace_back(flags);
597 return result;
598}
599
602 std::vector<RClusterDescriptor> &clusterDescs)
603{
605 if (iter == fClusterGroupDescriptors.end())
606 return R__FAIL("invalid attempt to add details of unknown cluster group");
607 if (iter->second.HasClusterDetails())
608 return R__FAIL("invalid attempt to re-populate cluster group details");
609 if (iter->second.GetNClusters() != clusterDescs.size())
610 return R__FAIL("mismatch of number of clusters");
611
612 std::vector<ROOT::DescriptorId_t> clusterIds;
613 for (unsigned i = 0; i < clusterDescs.size(); ++i) {
614 clusterIds.emplace_back(clusterDescs[i].GetId());
615 auto [_, success] = fClusterDescriptors.emplace(clusterIds.back(), std::move(clusterDescs[i]));
616 if (!success) {
617 return R__FAIL("invalid attempt to re-populate existing cluster");
618 }
619 }
621 return fClusterDescriptors[a].GetFirstEntryIndex() < fClusterDescriptors[b].GetFirstEntryIndex();
622 });
624 cgBuilder.AddSortedClusters(clusterIds);
625 iter->second = cgBuilder.MoveDescriptor().Unwrap();
626 return RResult<void>::Success();
627}
628
630{
632 if (iter == fClusterGroupDescriptors.end())
633 return R__FAIL("invalid attempt to drop cluster details of unknown cluster group");
634 if (!iter->second.HasClusterDetails())
635 return R__FAIL("invalid attempt to drop details of cluster group summary");
636
637 for (auto clusterId : iter->second.GetClusterIds())
639 iter->second = iter->second.CloneSummary();
640 return RResult<void>::Success();
641}
642
643std::unique_ptr<ROOT::Experimental::RNTupleModel>
645{
646 auto fieldZero = std::make_unique<RFieldZero>();
647 fieldZero->SetOnDiskId(GetFieldZeroId());
648 auto model = options.GetCreateBare() ? RNTupleModel::CreateBare(std::move(fieldZero))
649 : RNTupleModel::Create(std::move(fieldZero));
651 createFieldOpts.SetReturnInvalidOnError(options.GetForwardCompatible());
652 createFieldOpts.SetEmulateUnknownTypes(options.GetEmulateUnknownTypes());
653 for (const auto &topDesc : GetTopLevelFields()) {
654 auto field = topDesc.CreateField(*this, createFieldOpts);
655 if (field->GetTraits() & RFieldBase::kTraitInvalidField)
656 continue;
657
658 if (options.GetReconstructProjections() && topDesc.IsProjectedField()) {
659 model->AddProjectedField(std::move(field), [this](const std::string &targetName) -> std::string {
660 return GetQualifiedFieldName(GetFieldDescriptor(FindFieldId(targetName)).GetProjectionSourceId());
661 });
662 } else {
663 model->AddField(std::move(field));
664 }
665 }
666 model->Freeze();
667 return model;
668}
669
671{
672 RNTupleDescriptor clone;
673 clone.fName = fName;
678 // OnDiskHeaderSize, OnDiskHeaderXxHash3 not copied because they may come from a merged header + extension header
679 // and therefore not represent the actual sources's header.
680 // OnDiskFooterSize not copied because it contains information beyond the schema, for example the clustering.
681
682 for (const auto &d : fFieldDescriptors)
683 clone.fFieldDescriptors.emplace(d.first, d.second.Clone());
684 for (const auto &d : fColumnDescriptors)
685 clone.fColumnDescriptors.emplace(d.first, d.second.Clone());
686
687 for (const auto &d : fExtraTypeInfoDescriptors)
688 clone.fExtraTypeInfoDescriptors.emplace_back(d.Clone());
690 clone.fHeaderExtension = std::make_unique<RHeaderExtension>(*fHeaderExtension);
691
692 return clone;
693}
694
696{
698
702 clone.fNEntries = fNEntries;
703 clone.fNClusters = fNClusters;
704 clone.fGeneration = fGeneration;
705 for (const auto &d : fClusterGroupDescriptors)
706 clone.fClusterGroupDescriptors.emplace(d.first, d.second.Clone());
708 for (const auto &d : fClusterDescriptors)
709 clone.fClusterDescriptors.emplace(d.first, d.second.Clone());
710 return clone;
711}
712
713////////////////////////////////////////////////////////////////////////////////
714
716{
717 return fClusterGroupId == other.fClusterGroupId && fClusterIds == other.fClusterIds &&
718 fMinEntry == other.fMinEntry && fEntrySpan == other.fEntrySpan && fNClusters == other.fNClusters;
719}
720
722{
724 clone.fClusterGroupId = fClusterGroupId;
725 clone.fPageListLocator = fPageListLocator;
726 clone.fPageListLength = fPageListLength;
727 clone.fMinEntry = fMinEntry;
728 clone.fEntrySpan = fEntrySpan;
729 clone.fNClusters = fNClusters;
730 return clone;
731}
732
734{
735 RClusterGroupDescriptor clone = CloneSummary();
736 clone.fClusterIds = fClusterIds;
737 return clone;
738}
739
740////////////////////////////////////////////////////////////////////////////////
741
745{
746 if (physicalId != pageRange.fPhysicalColumnId)
747 return R__FAIL("column ID mismatch");
748 if (fCluster.fColumnRanges.count(physicalId) > 0)
749 return R__FAIL("column ID conflict");
751 for (const auto &pi : pageRange.fPageInfos) {
752 columnRange.IncrementNElements(pi.GetNElements());
753 }
754 fCluster.fPageRanges[physicalId] = pageRange.Clone();
755 fCluster.fColumnRanges[physicalId] = columnRange;
756 return RResult<void>::Success();
757}
758
761{
762 if (fCluster.fColumnRanges.count(physicalId) > 0)
763 return R__FAIL("column ID conflict");
764
766 columnRange.SetPhysicalColumnId(physicalId);
767 columnRange.SetIsSuppressed(true);
768 fCluster.fColumnRanges[physicalId] = columnRange;
769 return RResult<void>::Success();
770}
771
774{
775 for (auto &[_, columnRange] : fCluster.fColumnRanges) {
776 if (!columnRange.IsSuppressed())
777 continue;
778 R__ASSERT(columnRange.GetFirstElementIndex() == ROOT::kInvalidNTupleIndex);
779
780 const auto &columnDesc = desc.GetColumnDescriptor(columnRange.GetPhysicalColumnId());
781 const auto &fieldDesc = desc.GetFieldDescriptor(columnDesc.GetFieldId());
782 // We expect only few columns and column representations per field, so we do a linear search
783 for (const auto otherColumnLogicalId : fieldDesc.GetLogicalColumnIds()) {
785 if (otherColumnDesc.GetRepresentationIndex() == columnDesc.GetRepresentationIndex())
786 continue;
787 if (otherColumnDesc.GetIndex() != columnDesc.GetIndex())
788 continue;
789
790 // Found corresponding column of a different column representation
791 const auto &otherColumnRange = fCluster.GetColumnRange(otherColumnDesc.GetPhysicalId());
792 if (otherColumnRange.IsSuppressed())
793 continue;
794
795 columnRange.SetFirstElementIndex(otherColumnRange.GetFirstElementIndex());
796 columnRange.SetNElements(otherColumnRange.GetNElements());
797 break;
798 }
799
800 if (columnRange.GetFirstElementIndex() == ROOT::kInvalidNTupleIndex) {
801 return R__FAIL(std::string("cannot find non-suppressed column for column ID ") +
802 std::to_string(columnRange.GetPhysicalColumnId()) +
803 ", cluster ID: " + std::to_string(fCluster.GetId()));
804 }
805 }
806 return RResult<void>::Success();
807}
808
811{
812 /// Carries out a depth-first traversal of a field subtree rooted at `rootFieldId`. For each field, `visitField` is
813 /// called passing the field ID and the number of overall repetitions, taking into account the repetitions of each
814 /// parent field in the hierarchy.
816 const auto &visitField, const auto &enterSubtree) -> void {
818 for (const auto &f : desc.GetFieldIterable(rootFieldId)) {
819 const std::uint64_t nRepetitions = std::max(f.GetNRepetitions(), std::uint64_t{1U}) * nRepetitionsAtThisLevel;
821 }
822 };
823
824 // Extended columns can only be part of the header extension
825 if (!desc.GetHeaderExtension())
826 return *this;
827
828 // Ensure that all columns in the header extension have their associated `R(Column|Page)Range`
829 // Extended columns can be attached both to fields of the regular header and to fields of the extension header
830 for (const auto &topLevelField : desc.GetTopLevelFields()) {
832 topLevelField.GetId(), std::max(topLevelField.GetNRepetitions(), std::uint64_t{1U}),
833 [&](ROOT::DescriptorId_t fieldId, std::uint64_t nRepetitions) {
834 for (const auto &c : desc.GetColumnIterable(fieldId)) {
835 const ROOT::DescriptorId_t physicalId = c.GetPhysicalId();
836 auto &columnRange = fCluster.fColumnRanges[physicalId];
837
838 // Initialize a RColumnRange for `physicalId` if it was not there. Columns that were created during model
839 // extension won't have on-disk metadata for the clusters that were already committed before the model
840 // was extended. Therefore, these need to be synthetically initialized upon reading.
841 if (columnRange.GetPhysicalColumnId() == ROOT::kInvalidDescriptorId) {
842 columnRange.SetPhysicalColumnId(physicalId);
843 columnRange.SetFirstElementIndex(0);
844 columnRange.SetNElements(0);
845 columnRange.SetIsSuppressed(c.IsSuppressedDeferredColumn());
846 }
847 // Fixup the RColumnRange and RPageRange in deferred columns. We know what the first element index and
848 // number of elements should have been if the column was not deferred; fix those and let
849 // `ExtendToFitColumnRange()` synthesize RPageInfos accordingly.
850 // Note that a deferred column (i.e, whose first element index is > 0) already met the criteria of
851 // `RFieldBase::EntryToColumnElementIndex()`, i.e. it is a principal column reachable from the field zero
852 // excluding subfields of collection and variant fields.
853 if (c.IsDeferredColumn()) {
854 columnRange.SetFirstElementIndex(fCluster.GetFirstEntryIndex() * nRepetitions);
855 columnRange.SetNElements(fCluster.GetNEntries() * nRepetitions);
856 if (!columnRange.IsSuppressed()) {
857 auto &pageRange = fCluster.fPageRanges[physicalId];
858 pageRange.fPhysicalColumnId = physicalId;
859 const auto element = Internal::RColumnElementBase::Generate<void>(c.GetType());
860 pageRange.ExtendToFitColumnRange(columnRange, *element, Internal::RPage::kPageZeroSize);
861 }
862 } else if (!columnRange.IsSuppressed()) {
863 fCluster.fPageRanges[physicalId].fPhysicalColumnId = physicalId;
864 }
865 }
866 },
868 }
869 return *this;
870}
871
874{
875 if (fCluster.fClusterId == ROOT::kInvalidDescriptorId)
876 return R__FAIL("unset cluster ID");
877 if (fCluster.fNEntries == 0)
878 return R__FAIL("empty cluster");
879 for (auto &pr : fCluster.fPageRanges) {
880 if (fCluster.fColumnRanges.count(pr.first) == 0) {
881 return R__FAIL("missing column range");
882 }
883 pr.second.fCumulativeNElements.clear();
884 pr.second.fCumulativeNElements.reserve(pr.second.fPageInfos.size());
886 for (const auto &pi : pr.second.fPageInfos) {
887 sum += pi.GetNElements();
888 pr.second.fCumulativeNElements.emplace_back(sum);
889 }
890 }
892 std::swap(result, fCluster);
893 return result;
894}
895
896////////////////////////////////////////////////////////////////////////////////
897
901{
903 builder.ClusterGroupId(clusterGroupDesc.GetId())
904 .PageListLocator(clusterGroupDesc.GetPageListLocator())
905 .PageListLength(clusterGroupDesc.GetPageListLength())
906 .MinEntry(clusterGroupDesc.GetMinEntry())
907 .EntrySpan(clusterGroupDesc.GetEntrySpan())
908 .NClusters(clusterGroupDesc.GetNClusters());
909 return builder;
910}
911
914{
915 if (fClusterGroup.fClusterGroupId == ROOT::kInvalidDescriptorId)
916 return R__FAIL("unset cluster group ID");
918 std::swap(result, fClusterGroup);
919 return result;
920}
921
922////////////////////////////////////////////////////////////////////////////////
923
926{
927 if (fExtraTypeInfo.fContentId == EExtraTypeInfoIds::kInvalid)
928 throw RException(R__FAIL("invalid extra type info content id"));
930 std::swap(result, fExtraTypeInfo);
931 return result;
932}
933
934////////////////////////////////////////////////////////////////////////////////
935
938{
939 if (fDescriptor.fFieldDescriptors.count(fieldId) == 0)
940 return R__FAIL("field with id '" + std::to_string(fieldId) + "' doesn't exist");
941 return RResult<void>::Success();
942}
943
945{
946 // Reuse field name validity check
947 auto validName = ROOT::Experimental::Internal::EnsureValidNameForRNTuple(fDescriptor.GetName(), "Field");
948 if (!validName) {
950 }
951
952 for (const auto &[fieldId, fieldDesc] : fDescriptor.fFieldDescriptors) {
953 // parent not properly set?
954 if (fieldId != fDescriptor.GetFieldZeroId() && fieldDesc.GetParentId() == ROOT::kInvalidDescriptorId) {
955 return R__FAIL("field with id '" + std::to_string(fieldId) + "' has an invalid parent id");
956 }
957
958 // Same number of columns in every column representation?
959 const auto columnCardinality = fieldDesc.GetColumnCardinality();
960 if (columnCardinality == 0)
961 continue;
962
963 // In AddColumn, we already checked that all but the last representation are complete.
964 // Check that the last column representation is complete, i.e. has all columns.
965 const auto &logicalColumnIds = fieldDesc.GetLogicalColumnIds();
966 const auto nColumns = logicalColumnIds.size();
967 // If we have only a single column representation, the following condition is true by construction
968 if ((nColumns + 1) == columnCardinality)
969 continue;
970
971 const auto &lastColumn = fDescriptor.GetColumnDescriptor(logicalColumnIds.back());
972 if (lastColumn.GetIndex() + 1 != columnCardinality)
973 return R__FAIL("field with id '" + std::to_string(fieldId) + "' has incomplete column representations");
974 }
975
976 return RResult<void>::Success();
977}
978
980{
981 EnsureValidDescriptor().ThrowOnError();
982 fDescriptor.fSortedClusterGroupIds.reserve(fDescriptor.fClusterGroupDescriptors.size());
983 for (const auto &[id, _] : fDescriptor.fClusterGroupDescriptors)
984 fDescriptor.fSortedClusterGroupIds.emplace_back(id);
985 std::sort(fDescriptor.fSortedClusterGroupIds.begin(), fDescriptor.fSortedClusterGroupIds.end(),
987 return fDescriptor.fClusterGroupDescriptors[a].GetMinEntry() <
988 fDescriptor.fClusterGroupDescriptors[b].GetMinEntry();
989 });
991 std::swap(result, fDescriptor);
992 return result;
993}
994
996 const std::string_view description)
997{
998 fDescriptor.fName = std::string(name);
999 fDescriptor.fDescription = std::string(description);
1000}
1001
1003{
1004 if (flag % 64 == 0)
1005 throw RException(R__FAIL("invalid feature flag: " + std::to_string(flag)));
1006 fDescriptor.fFeatureFlags.insert(flag);
1007}
1008
1011{
1012 if (fColumn.GetLogicalId() == ROOT::kInvalidDescriptorId)
1013 return R__FAIL("invalid logical column id");
1014 if (fColumn.GetPhysicalId() == ROOT::kInvalidDescriptorId)
1015 return R__FAIL("invalid physical column id");
1016 if (fColumn.GetFieldId() == ROOT::kInvalidDescriptorId)
1017 return R__FAIL("invalid field id, dangling column");
1018
1019 // NOTE: if the column type is unknown we don't want to fail, as we might be reading an RNTuple
1020 // created with a future version of ROOT. In this case we just skip the valid bit range check,
1021 // as we have no idea what the valid range is.
1022 // In general, reading the metadata of an unknown column is fine, it becomes an error only when
1023 // we try to read the actual data contained in it.
1024 if (fColumn.GetType() != ENTupleColumnType::kUnknown) {
1025 const auto [minBits, maxBits] = RColumnElementBase::GetValidBitRange(fColumn.GetType());
1026 if (fColumn.GetBitsOnStorage() < minBits || fColumn.GetBitsOnStorage() > maxBits)
1027 return R__FAIL("invalid column bit width");
1028 }
1029
1030 return fColumn.Clone();
1031}
1032
1040
1043{
1045 fieldDesc.FieldVersion(field.GetFieldVersion())
1046 .TypeVersion(field.GetTypeVersion())
1047 .FieldName(field.GetFieldName())
1048 .FieldDescription(field.GetDescription())
1049 .TypeName(field.GetTypeName())
1050 .TypeAlias(field.GetTypeAlias())
1051 .Structure(field.GetStructure())
1052 .NRepetitions(field.GetNRepetitions());
1053 if (field.GetTraits() & RFieldBase::kTraitTypeChecksum)
1054 fieldDesc.TypeChecksum(field.GetTypeChecksum());
1055 return fieldDesc;
1056}
1057
1060{
1061 if (fField.GetId() == ROOT::kInvalidDescriptorId) {
1062 return R__FAIL("invalid field id");
1063 }
1064 if (fField.GetStructure() == ROOT::ENTupleStructure::kInvalid) {
1065 return R__FAIL("invalid field structure");
1066 }
1067 // FieldZero is usually named "" and would be a false positive here
1068 if (fField.GetParentId() != ROOT::kInvalidDescriptorId) {
1069 auto validName = ROOT::Experimental::Internal::EnsureValidNameForRNTuple(fField.GetFieldName(), "Field");
1070 if (!validName) {
1072 }
1073 if (fField.GetFieldName().empty()) {
1074 return R__FAIL("name cannot be empty string \"\"");
1075 }
1076 }
1077 return fField.Clone();
1078}
1079
1081{
1082 fDescriptor.fFieldDescriptors.emplace(fieldDesc.GetId(), fieldDesc.Clone());
1083 if (fDescriptor.fHeaderExtension)
1084 fDescriptor.fHeaderExtension->MarkExtendedField(fieldDesc);
1085 if (fieldDesc.GetFieldName().empty() && fieldDesc.GetParentId() == ROOT::kInvalidDescriptorId) {
1086 fDescriptor.fFieldZeroId = fieldDesc.GetId();
1087 }
1088}
1089
1092{
1094 if (!(fieldExists = EnsureFieldExists(fieldId)))
1096 if (!(fieldExists = EnsureFieldExists(linkId)))
1097 return R__FAIL("child field with id '" + std::to_string(linkId) + "' doesn't exist in NTuple");
1098
1099 if (linkId == fDescriptor.GetFieldZeroId()) {
1100 return R__FAIL("cannot make FieldZero a child field");
1101 }
1102 // fail if field already has another valid parent
1103 auto parentId = fDescriptor.fFieldDescriptors.at(linkId).GetParentId();
1105 return R__FAIL("field '" + std::to_string(linkId) + "' already has a parent ('" + std::to_string(parentId) + ")");
1106 }
1107 if (fieldId == linkId) {
1108 return R__FAIL("cannot make field '" + std::to_string(fieldId) + "' a child of itself");
1109 }
1110 fDescriptor.fFieldDescriptors.at(linkId).fParentId = fieldId;
1111 fDescriptor.fFieldDescriptors.at(fieldId).fLinkIds.push_back(linkId);
1112 return RResult<void>::Success();
1113}
1114
1118{
1120 if (!(fieldExists = EnsureFieldExists(sourceId)))
1122 if (!(fieldExists = EnsureFieldExists(targetId)))
1123 return R__FAIL("projected field with id '" + std::to_string(targetId) + "' doesn't exist in NTuple");
1124
1125 if (targetId == fDescriptor.GetFieldZeroId()) {
1126 return R__FAIL("cannot make FieldZero a projected field");
1127 }
1128 if (sourceId == targetId) {
1129 return R__FAIL("cannot make field '" + std::to_string(targetId) + "' a projection of itself");
1130 }
1131 if (fDescriptor.fFieldDescriptors.at(sourceId).IsProjectedField()) {
1132 return R__FAIL("cannot make field '" + std::to_string(targetId) + "' a projection of an already projected field");
1133 }
1134 // fail if target field already has another valid projection source
1135 auto &targetDesc = fDescriptor.fFieldDescriptors.at(targetId);
1136 if (targetDesc.IsProjectedField() && targetDesc.GetProjectionSourceId() != sourceId) {
1137 return R__FAIL("field '" + std::to_string(targetId) + "' has already a projection source ('" +
1138 std::to_string(targetDesc.GetProjectionSourceId()) + ")");
1139 }
1140 fDescriptor.fFieldDescriptors.at(targetId).fProjectionSourceId = sourceId;
1141 return RResult<void>::Success();
1142}
1143
1145{
1146 const auto fieldId = columnDesc.GetFieldId();
1147 const auto columnIndex = columnDesc.GetIndex();
1148 const auto representationIndex = columnDesc.GetRepresentationIndex();
1149
1150 auto fieldExists = EnsureFieldExists(fieldId);
1151 if (!fieldExists) {
1153 }
1154 auto &fieldDesc = fDescriptor.fFieldDescriptors.find(fieldId)->second;
1155
1156 if (columnDesc.IsAliasColumn()) {
1157 if (columnDesc.GetType() != fDescriptor.GetColumnDescriptor(columnDesc.GetPhysicalId()).GetType())
1158 return R__FAIL("alias column type mismatch");
1159 }
1160 if (fDescriptor.FindLogicalColumnId(fieldId, columnIndex, representationIndex) != ROOT::kInvalidDescriptorId) {
1161 return R__FAIL("column index clash");
1162 }
1163 if (columnIndex > 0) {
1164 if (fDescriptor.FindLogicalColumnId(fieldId, columnIndex - 1, representationIndex) == ROOT::kInvalidDescriptorId)
1165 return R__FAIL("out of bounds column index");
1166 }
1167 if (representationIndex > 0) {
1168 if (fDescriptor.FindLogicalColumnId(fieldId, 0, representationIndex - 1) == ROOT::kInvalidDescriptorId) {
1169 return R__FAIL("out of bounds representation index");
1170 }
1171 if (columnIndex == 0) {
1172 assert(fieldDesc.fColumnCardinality > 0);
1173 if (fDescriptor.FindLogicalColumnId(fieldId, fieldDesc.fColumnCardinality - 1, representationIndex - 1) ==
1175 return R__FAIL("incomplete column representations");
1176 }
1177 } else {
1178 if (columnIndex >= fieldDesc.fColumnCardinality)
1179 return R__FAIL("irregular column representations");
1180 }
1181 } else {
1182 // This will set the column cardinality to the number of columns of the first representation
1183 fieldDesc.fColumnCardinality = columnIndex + 1;
1184 }
1185
1186 const auto logicalId = columnDesc.GetLogicalId();
1187 fieldDesc.fLogicalColumnIds.emplace_back(logicalId);
1188
1189 if (!columnDesc.IsAliasColumn())
1190 fDescriptor.fNPhysicalColumns++;
1191 fDescriptor.fColumnDescriptors.emplace(logicalId, std::move(columnDesc));
1192 if (fDescriptor.fHeaderExtension)
1193 fDescriptor.fHeaderExtension->MarkExtendedColumn(columnDesc);
1194
1195 return RResult<void>::Success();
1196}
1197
1200{
1201 const auto id = clusterGroup.GetId();
1202 if (fDescriptor.fClusterGroupDescriptors.count(id) > 0)
1203 return R__FAIL("cluster group id clash");
1204 fDescriptor.fNEntries = std::max(fDescriptor.fNEntries, clusterGroup.GetMinEntry() + clusterGroup.GetEntrySpan());
1205 fDescriptor.fNClusters += clusterGroup.GetNClusters();
1206 fDescriptor.fClusterGroupDescriptors.emplace(id, std::move(clusterGroup));
1207 return RResult<void>::Success();
1208}
1209
1211{
1212 fDescriptor.fName = "";
1213 fDescriptor.fDescription = "";
1214 fDescriptor.fFieldDescriptors.clear();
1215 fDescriptor.fColumnDescriptors.clear();
1216 fDescriptor.fClusterDescriptors.clear();
1217 fDescriptor.fClusterGroupDescriptors.clear();
1218 fDescriptor.fHeaderExtension.reset();
1219}
1220
1225
1227{
1228 if (!fDescriptor.fHeaderExtension)
1229 fDescriptor.fHeaderExtension = std::make_unique<RNTupleDescriptor::RHeaderExtension>();
1230}
1231
1233{
1234 if (fDescriptor.GetNLogicalColumns() == 0)
1235 return;
1236 R__ASSERT(fDescriptor.GetNPhysicalColumns() > 0);
1237
1238 for (ROOT::DescriptorId_t id = fDescriptor.GetNLogicalColumns() - 1; id >= fDescriptor.GetNPhysicalColumns(); --id) {
1239 auto c = fDescriptor.fColumnDescriptors[id].Clone();
1240 R__ASSERT(c.IsAliasColumn());
1241 R__ASSERT(id == c.GetLogicalId());
1242 fDescriptor.fColumnDescriptors.erase(id);
1243 for (auto &link : fDescriptor.fFieldDescriptors[c.fFieldId].fLogicalColumnIds) {
1244 if (link == c.fLogicalColumnId) {
1245 link += offset;
1246 break;
1247 }
1248 }
1249 c.fLogicalColumnId += offset;
1250 R__ASSERT(fDescriptor.fColumnDescriptors.count(c.fLogicalColumnId) == 0);
1251 fDescriptor.fColumnDescriptors.emplace(c.fLogicalColumnId, std::move(c));
1252 }
1253}
1254
1256{
1257 auto clusterId = clusterDesc.GetId();
1258 if (fDescriptor.fClusterDescriptors.count(clusterId) > 0)
1259 return R__FAIL("cluster id clash");
1260 fDescriptor.fClusterDescriptors.emplace(clusterId, std::move(clusterDesc));
1261 return RResult<void>::Success();
1262}
1263
1266{
1267 // Make sure we have no duplicates
1268 if (std::find(fDescriptor.fExtraTypeInfoDescriptors.begin(), fDescriptor.fExtraTypeInfoDescriptors.end(),
1269 extraTypeInfoDesc) != fDescriptor.fExtraTypeInfoDescriptors.end()) {
1270 return R__FAIL("extra type info duplicates");
1271 }
1272 fDescriptor.fExtraTypeInfoDescriptors.emplace_back(std::move(extraTypeInfoDesc));
1273 return RResult<void>::Success();
1274}
1275
1278{
1279 auto it = std::find(fDescriptor.fExtraTypeInfoDescriptors.begin(), fDescriptor.fExtraTypeInfoDescriptors.end(),
1281 if (it != fDescriptor.fExtraTypeInfoDescriptors.end())
1282 *it = std::move(extraTypeInfoDesc);
1283 else
1284 fDescriptor.fExtraTypeInfoDescriptors.emplace_back(std::move(extraTypeInfoDesc));
1285}
1286
1289{
1291 const auto &desc = GetDescriptor();
1292
1293 std::function<void(const RFieldDescriptor &)> fnWalkFieldTree;
1295 if (fieldDesc.IsCustomClass()) {
1296 // Add streamer info for this class to streamerInfoMap
1297 auto cl = TClass::GetClass(fieldDesc.GetTypeName().c_str());
1298 if (!cl) {
1299 throw RException(R__FAIL(std::string("cannot get TClass for ") + fieldDesc.GetTypeName()));
1300 }
1301 auto streamerInfo = cl->GetStreamerInfo(fieldDesc.GetTypeVersion());
1302 if (!streamerInfo) {
1303 throw RException(R__FAIL(std::string("cannot get streamerInfo for ") + fieldDesc.GetTypeName()));
1304 }
1306 }
1307
1308 // Recursively traverse sub fields
1309 for (const auto &subFieldDesc : desc.GetFieldIterable(fieldDesc)) {
1311 }
1312 };
1313
1314 fnWalkFieldTree(desc.GetFieldZero());
1315
1316 // Add the streamer info records from streamer fields: because of runtime polymorphism we may need to add additional
1317 // types not covered by the type names stored in the field headers
1318 for (const auto &extraTypeInfo : desc.GetExtraTypeInfoIterable()) {
1319 if (extraTypeInfo.GetContentId() != EExtraTypeInfoIds::kStreamerInfo)
1320 continue;
1321 // Ideally, we would avoid deserializing the streamer info records of the streamer fields that we just serialized.
1322 // However, this happens only once at the end of writing and only when streamer fields are used, so the
1323 // preference here is for code simplicity.
1325 }
1326
1327 return streamerInfoMap;
1328}
1329
1335
1341
1348
1351{
1352 return GetFieldIterable(GetFieldDescriptor(fieldId));
1353}
1354
1361
1364{
1365 return GetFieldIterable(GetFieldZeroId());
1366}
1367
1370 const std::function<bool(ROOT::DescriptorId_t, ROOT::DescriptorId_t)> &comparator) const
1371{
1372 return GetFieldIterable(GetFieldZeroId(), comparator);
1373}
1374
1380
1386
1392
1398
1404
#define R__FORWARD_ERROR(res)
Short-hand to return an RResult<T> in an error state (i.e. after checking)
Definition RError.hxx:303
#define R__FAIL(msg)
Short-hand to return an RResult<T> in an error state; the RError is implicitly converted into RResult...
Definition RError.hxx:299
#define d(i)
Definition RSha256.hxx:102
#define b(i)
Definition RSha256.hxx:100
#define f(i)
Definition RSha256.hxx:104
#define c(i)
Definition RSha256.hxx:101
#define a(i)
Definition RSha256.hxx:99
ROOT::Detail::TRangeCast< T, true > TRangeDynCast
TRangeDynCast is an adapter class that allows the typed iteration through a TCollection.
#define R__ASSERT(e)
Checks condition e and reports a fatal error if it's false.
Definition TError.h:125
#define N
Option_t Option_t TPoint TPoint const char GetTextMagnitude GetFillStyle GetLineColor GetLineWidth GetMarkerStyle GetTextAlign GetTextColor GetTextSize void char Point_t Rectangle_t WindowAttributes_t Float_t Float_t Float_t Int_t Int_t UInt_t UInt_t Rectangle_t Int_t Int_t Window_t TString Int_t GCValues_t GetPrimarySelectionOwner GetDisplay GetScreen GetColormap GetNativeEvent const char const char dpyName wid window const char font_name cursor keysym reg const char only_if_exist regb h Point_t winding char text const char depth char const char Int_t count const char ColorStruct_t color const char Pixmap_t Pixmap_t PictureAttributes_t attr const char char ret_data h unsigned char height h offset
Option_t Option_t TPoint TPoint const char GetTextMagnitude GetFillStyle GetLineColor GetLineWidth GetMarkerStyle GetTextAlign GetTextColor GetTextSize void char Point_t Rectangle_t WindowAttributes_t Float_t Float_t Float_t Int_t Int_t UInt_t UInt_t Rectangle_t result
Option_t Option_t TPoint TPoint const char GetTextMagnitude GetFillStyle GetLineColor GetLineWidth GetMarkerStyle GetTextAlign GetTextColor GetTextSize void char Point_t Rectangle_t WindowAttributes_t index
Option_t Option_t TPoint TPoint const char GetTextMagnitude GetFillStyle GetLineColor GetLineWidth GetMarkerStyle GetTextAlign GetTextColor GetTextSize id
char name[80]
Definition TGX11.cxx:110
#define _(A, B)
Definition cfortran.h:108
A helper class for piece-wise construction of an RClusterDescriptor.
RResult< void > CommitSuppressedColumnRanges(const RNTupleDescriptor &desc)
Sets the first element index and number of elements for all the suppressed column ranges.
RResult< RClusterDescriptor > MoveDescriptor()
Move out the full cluster descriptor including page locations.
RResult< void > MarkSuppressedColumnRange(ROOT::DescriptorId_t physicalId)
Books the given column ID as being suppressed in this cluster.
RResult< void > CommitColumnRange(ROOT::DescriptorId_t physicalId, std::uint64_t firstElementIndex, std::uint32_t compressionSettings, const RClusterDescriptor::RPageRange &pageRange)
RClusterDescriptorBuilder & AddExtendedColumnRanges(const RNTupleDescriptor &desc)
Add column and page ranges for columns created during late model extension missing in this cluster.
A helper class for piece-wise construction of an RClusterGroupDescriptor.
RClusterGroupDescriptorBuilder & PageListLocator(const RNTupleLocator &pageListLocator)
RClusterGroupDescriptorBuilder & MinEntry(std::uint64_t minEntry)
RClusterGroupDescriptorBuilder & ClusterGroupId(ROOT::DescriptorId_t clusterGroupId)
RClusterGroupDescriptorBuilder & EntrySpan(std::uint64_t entrySpan)
RClusterGroupDescriptorBuilder & NClusters(std::uint32_t nClusters)
RClusterGroupDescriptorBuilder & PageListLength(std::uint64_t pageListLength)
static RClusterGroupDescriptorBuilder FromSummary(const RClusterGroupDescriptor &clusterGroupDesc)
RResult< RColumnDescriptor > MakeDescriptor() const
Attempt to make a column descriptor.
A column element encapsulates the translation between basic C++ types and their column representation...
A helper class for piece-wise construction of an RFieldDescriptor.
static RFieldDescriptorBuilder FromField(const RFieldBase &field)
Make a new RFieldDescriptorBuilder based off a live NTuple field.
RResult< RFieldDescriptor > MakeDescriptor() const
Attempt to make a field descriptor.
RFieldDescriptorBuilder()=default
Make an empty dangling field descriptor.
RNTupleSerializer::StreamerInfoMap_t BuildStreamerInfos() const
Get the streamer info records for custom classes. Currently requires the corresponding dictionaries t...
RResult< void > AddExtraTypeInfo(RExtraTypeInfoDescriptor &&extraTypeInfoDesc)
RResult< void > AddColumn(RColumnDescriptor &&columnDesc)
void BeginHeaderExtension()
Mark the beginning of the header extension; any fields and columns added after a call to this functio...
void ShiftAliasColumns(std::uint32_t offset)
If the descriptor is constructed in pieces consisting of physical and alias columns (regular and proj...
RResult< void > AddClusterGroup(RClusterGroupDescriptor &&clusterGroup)
void SetSchemaFromExisting(const RNTupleDescriptor &descriptor)
Copies the "schema" part of descriptor into the builder's descriptor.
RResult< void > AddFieldProjection(ROOT::DescriptorId_t sourceId, ROOT::DescriptorId_t targetId)
void ReplaceExtraTypeInfo(RExtraTypeInfoDescriptor &&extraTypeInfoDesc)
void SetNTuple(const std::string_view name, const std::string_view description)
RResult< void > AddFieldLink(ROOT::DescriptorId_t fieldId, ROOT::DescriptorId_t linkId)
RResult< void > AddCluster(RClusterDescriptor &&clusterDesc)
RResult< void > EnsureFieldExists(ROOT::DescriptorId_t fieldId) const
RResult< void > EnsureValidDescriptor() const
Checks whether invariants hold:
void Reset()
Clears so-far stored clusters, fields, and columns and return to a pristine ntuple descriptor.
static RResult< StreamerInfoMap_t > DeserializeStreamerInfos(const std::string &extraTypeInfoContent)
std::map< Int_t, TVirtualStreamerInfo * > StreamerInfoMap_t
The window of element indexes of a particular column in a particular cluster.
Records the partition of data into pages for a particular column in a particular cluster.
std::size_t ExtendToFitColumnRange(const RColumnRange &columnRange, const Internal::RColumnElementBase &element, std::size_t pageSize)
Extend this RPageRange to fit the given RColumnRange, i.e.
RPageInfoExtended Find(ROOT::NTupleSize_t idxInCluster) const
Find the page in the RPageRange that contains the given element. The element must exist.
Meta-data for a set of ntuple clusters.
RColumnRangeIterable GetColumnRangeIterable() const
Returns an iterator over pairs { columnId, columnRange }. The iteration order is unspecified.
std::unordered_map< ROOT::DescriptorId_t, RPageRange > fPageRanges
bool operator==(const RClusterDescriptor &other) const
std::unordered_map< ROOT::DescriptorId_t, RColumnRange > fColumnRanges
ROOT::NTupleSize_t fFirstEntryIndex
Clusters can be swapped by adjusting the entry offsets.
Clusters are bundled in cluster groups.
std::uint64_t fMinEntry
The minimum first entry number of the clusters in the cluster group.
std::vector< ROOT::DescriptorId_t > fClusterIds
The cluster IDs can be empty if the corresponding page list is not loaded.
std::uint64_t fEntrySpan
Number of entries that are (partially for sharded clusters) covered by this cluster group.
std::uint64_t fPageListLength
Uncompressed size of the page list.
RClusterGroupDescriptor CloneSummary() const
std::uint32_t fNClusters
Number of clusters is always known even if the cluster IDs are not (yet) populated.
RNTupleLocator fPageListLocator
The page list that corresponds to the cluster group.
bool operator==(const RClusterGroupDescriptor &other) const
Meta-data stored for every column of an ntuple.
std::optional< RValueRange > fValueRange
Optional value range (used e.g. by quantized real fields)
std::uint16_t fBitsOnStorage
The size in bits of elements of this column.
RColumnDescriptor Clone() const
Get a copy of the descriptor.
ROOT::DescriptorId_t fLogicalColumnId
The actual column identifier, which is the link to the corresponding field.
ROOT::ENTupleColumnType fType
The on-disk column type.
ROOT::DescriptorId_t fPhysicalColumnId
Usually identical to the logical column ID, except for alias columns where it references the shadowed...
std::int64_t fFirstElementIndex
The absolute value specifies the index for the first stored element for this column.
ROOT::DescriptorId_t fFieldId
Every column belongs to one and only one field.
std::uint16_t fRepresentationIndex
A field may use multiple column representations, which are numbered from zero to $m$.
std::uint32_t fIndex
A field can be serialized into several columns, which are numbered from zero to $n$.
bool operator==(const RColumnDescriptor &other) const
Field specific extra type information from the header / extenstion header.
bool operator==(const RExtraTypeInfoDescriptor &other) const
std::uint32_t fTypeVersion
Type version the extra type information is bound to.
EExtraTypeInfoIds fContentId
Specifies the meaning of the extra information.
std::string fTypeName
The type name the extra information refers to; empty for RNTuple-wide extra information.
std::string fContent
The content format depends on the content ID and may be binary.
A field translates read and write calls from/to underlying columns to/from tree values.
@ kTraitTypeChecksum
The TClass checksum is set and valid.
@ kTraitInvalidField
This field is an instance of RInvalidField and can be safely static_cast to it.
Meta-data stored for every field of an ntuple.
std::uint32_t fTypeVersion
The version of the C++ type itself.
std::uint32_t fColumnCardinality
The number of columns in the column representations of the field.
std::optional< std::uint32_t > fTypeChecksum
For custom classes, we store the ROOT TClass reported checksum to facilitate the use of I/O rules tha...
ROOT::DescriptorId_t fParentId
Establishes sub field relationships, such as classes and collections.
bool IsCustomClass() const
Tells if the field describes a user-defined class rather than a fundamental type, a collection,...
std::string fFieldDescription
Free text set by the user.
std::string fFieldName
The leaf name, not including parent fields.
std::uint32_t fFieldVersion
The version of the C++-type-to-column translation mechanics.
ROOT::DescriptorId_t fProjectionSourceId
For projected fields, the source field ID.
std::unique_ptr< RFieldBase > CreateField(const RNTupleDescriptor &ntplDesc, const ROOT::RCreateFieldOptions &options={}) const
In general, we create a field simply from the C++ type name.
std::vector< ROOT::DescriptorId_t > fLogicalColumnIds
The ordered list of columns attached to this field: first by representation index then by column inde...
RFieldDescriptor Clone() const
Get a copy of the descriptor.
bool operator==(const RFieldDescriptor &other) const
std::vector< ROOT::DescriptorId_t > fLinkIds
The pointers in the other direction from parent to children.
std::string fTypeAlias
A typedef or using directive that resolved to the type name during field creation.
ROOT::ENTupleStructure fStructure
The structural information carried by this field in the data model tree.
std::string fTypeName
The C++ type that was used when writing the field.
std::uint64_t fNRepetitions
The number of elements per entry for fixed-size arrays.
Used in RFieldBase::Check() to record field creation failures.
Definition RField.hxx:76
@ kGeneric
Generic unrecoverable error.
@ kUnknownStructure
The field could not be created because its descriptor had an unknown structural role.
Used to loop over all the clusters of an ntuple (in unspecified order)
Used to loop over all the cluster groups of an ntuple (in unspecified order)
std::vector< ROOT::DescriptorId_t > fColumns
The descriptor ids of the columns ordered by field, representation, and column index.
RColumnDescriptorIterable(const RNTupleDescriptor &ntuple, const RFieldDescriptor &fieldDesc)
Used to loop over all the extra type info record of an ntuple (in unspecified order)
std::vector< ROOT::DescriptorId_t > GetTopLevelFields(const RNTupleDescriptor &desc) const
Return a vector containing the IDs of the top-level fields defined in the extension header,...
The on-storage meta-data of an ntuple.
std::uint64_t fNPhysicalColumns
Updated by the descriptor builder when columns are added.
std::uint64_t fGeneration
Once constructed by an RNTupleDescriptorBuilder, the descriptor is mostly immutable except for set of...
ROOT::DescriptorId_t FindPhysicalColumnId(ROOT::DescriptorId_t fieldId, std::uint32_t columnIndex, std::uint16_t representationIndex) const
std::uint64_t fOnDiskFooterSize
Like fOnDiskHeaderSize, contains both cluster summaries and page locations.
std::uint64_t fNEntries
Updated by the descriptor builder when the cluster groups are added.
ROOT::NTupleSize_t GetNElements(ROOT::DescriptorId_t physicalColumnId) const
std::vector< RExtraTypeInfoDescriptor > fExtraTypeInfoDescriptors
RResult< void > DropClusterGroupDetails(ROOT::DescriptorId_t clusterGroupId)
RResult< void > AddClusterGroupDetails(ROOT::DescriptorId_t clusterGroupId, std::vector< RClusterDescriptor > &clusterDescs)
Methods to load and drop cluster group details (cluster IDs and page locations)
std::unique_ptr< RNTupleModel > CreateModel(const RCreateModelOptions &options=RCreateModelOptions()) const
Re-create the C++ model from the stored meta-data.
std::vector< ROOT::DescriptorId_t > fSortedClusterGroupIds
References cluster groups sorted by entry range and thus allows for binary search.
ROOT::DescriptorId_t GetFieldZeroId() const
Returns the logical parent of all top-level NTuple data fields.
RColumnDescriptorIterable GetColumnIterable() const
const RColumnDescriptor & GetColumnDescriptor(ROOT::DescriptorId_t columnId) const
ROOT::DescriptorId_t FindPrevClusterId(ROOT::DescriptorId_t clusterId) const
const RFieldDescriptor & GetFieldDescriptor(ROOT::DescriptorId_t fieldId) const
std::uint64_t fNClusters
Updated by the descriptor builder when the cluster groups are added.
std::string fName
The ntuple name needs to be unique in a given storage location (file)
ROOT::DescriptorId_t FindLogicalColumnId(ROOT::DescriptorId_t fieldId, std::uint32_t columnIndex, std::uint16_t representationIndex) const
std::unordered_map< ROOT::DescriptorId_t, RFieldDescriptor > fFieldDescriptors
std::uint64_t fOnDiskHeaderXxHash3
Set by the descriptor builder when deserialized.
std::unordered_map< ROOT::DescriptorId_t, RColumnDescriptor > fColumnDescriptors
bool operator==(const RNTupleDescriptor &other) const
ROOT::DescriptorId_t FindFieldId(std::string_view fieldName, ROOT::DescriptorId_t parentId) const
ROOT::DescriptorId_t FindNextClusterId(ROOT::DescriptorId_t clusterId) const
ROOT::DescriptorId_t fFieldZeroId
Set by the descriptor builder.
RExtraTypeInfoDescriptorIterable GetExtraTypeInfoIterable() const
std::unordered_map< ROOT::DescriptorId_t, RClusterDescriptor > fClusterDescriptors
May contain only a subset of all the available clusters, e.g.
std::unique_ptr< RHeaderExtension > fHeaderExtension
RClusterGroupDescriptorIterable GetClusterGroupIterable() const
std::unordered_map< ROOT::DescriptorId_t, RClusterGroupDescriptor > fClusterGroupDescriptors
RNTupleDescriptor CloneSchema() const
Creates a descriptor containing only the schema information about this RNTuple, i....
RClusterDescriptorIterable GetClusterIterable() const
std::string fDescription
Free text from the user.
RFieldDescriptorIterable GetTopLevelFields() const
ROOT::DescriptorId_t FindClusterId(ROOT::NTupleSize_t entryIdx) const
std::string GetQualifiedFieldName(ROOT::DescriptorId_t fieldId) const
Walks up the parents of the field ID and returns a field name of the form a.b.c.d In case of invalid ...
const RHeaderExtension * GetHeaderExtension() const
Return header extension information; if the descriptor does not have a header extension,...
std::uint64_t fOnDiskHeaderSize
Set by the descriptor builder when deserialized.
RFieldDescriptorIterable GetFieldIterable(const RFieldDescriptor &fieldDesc) const
std::vector< std::uint64_t > GetFeatureFlags() const
static std::unique_ptr< RNTupleModel > Create()
static std::unique_ptr< RNTupleModel > CreateBare()
A bare model has no default entry.
static std::unique_ptr< RVectorField > CreateUntyped(std::string_view fieldName, std::unique_ptr< RFieldBase > itemField)
Base class for all ROOT issued exceptions.
Definition RError.hxx:79
Generic information about the physical location of data.
const_iterator begin() const
const_iterator end() const
The class is used as a return type for operations that can fail; wraps a value of type T or an RError...
Definition RError.hxx:197
static TClass * GetClass(const char *name, Bool_t load=kTRUE, Bool_t silent=kFALSE)
Static method returning pointer to TClass of the specified class name.
Definition TClass.cxx:3069
struct void * fTypeName
Definition cppyy.h:9
const Int_t n
Definition legend1.C:16
Double_t ex[n]
Definition legend1.C:17
RResult< void > EnsureValidNameForRNTuple(std::string_view name, std::string_view where)
Check whether a given string is a valid name according to the RNTuple specification.
ROOT::RResult< std::unique_ptr< ROOT::Experimental::RFieldBase > > CallFieldBaseCreate(const std::string &fieldName, const std::string &typeName, const ROOT::RCreateFieldOptions &options, const RNTupleDescriptor *desc, ROOT::DescriptorId_t fieldId)
std::uint64_t DescriptorId_t
Distriniguishes elements of the same type within a descriptor, e.g. different fields.
constexpr NTupleSize_t kInvalidNTupleIndex
std::uint64_t NTupleSize_t
Integer type long enough to hold the maximum number of entries in a column.
constexpr DescriptorId_t kInvalidDescriptorId
We do not need to store the element size / uncompressed page size because we know to which column the...
static uint64_t sum(uint64_t i)
Definition Factory.cxx:2345