Logo ROOT  
Reference Guide
 
Loading...
Searching...
No Matches
RNTupleDescriptor.cxx
Go to the documentation of this file.
1/// \file RNTupleDescriptor.cxx
2/// \ingroup NTuple ROOT7
3/// \author Jakob Blomer <jblomer@cern.ch>
4/// \author Javier Lopez-Gomez <javier.lopez.gomez@cern.ch>
5/// \date 2018-10-04
6/// \warning This is part of the ROOT 7 prototype! It will change without notice. It might trigger earthquakes. Feedback
7/// is welcome!
8
9/*************************************************************************
10 * Copyright (C) 1995-2019, Rene Brun and Fons Rademakers. *
11 * All rights reserved. *
12 * *
13 * For the licensing terms see $ROOTSYS/LICENSE. *
14 * For the list of contributors see $ROOTSYS/README/CREDITS. *
15 *************************************************************************/
16
17#include <ROOT/RError.hxx>
18#include <ROOT/RFieldBase.hxx>
20#include <ROOT/RNTupleModel.hxx>
21#include <ROOT/RNTupleUtil.hxx>
22#include <ROOT/RPage.hxx>
23#include <string_view>
24
25#include <RZip.h>
26#include <TError.h>
28
29#include <algorithm>
30#include <cstdint>
31#include <deque>
32#include <functional>
33#include <iostream>
34#include <set>
35#include <utility>
36
38{
39 return fFieldId == other.fFieldId && fFieldVersion == other.fFieldVersion && fTypeVersion == other.fTypeVersion &&
41 fTypeName == other.fTypeName && fTypeAlias == other.fTypeAlias && fNRepetitions == other.fNRepetitions &&
42 fStructure == other.fStructure && fParentId == other.fParentId &&
45}
46
48{
49 RFieldDescriptor clone;
50 clone.fFieldId = fFieldId;
51 clone.fFieldVersion = fFieldVersion;
52 clone.fTypeVersion = fTypeVersion;
53 clone.fFieldName = fFieldName;
54 clone.fFieldDescription = fFieldDescription;
55 clone.fTypeName = fTypeName;
56 clone.fTypeAlias = fTypeAlias;
57 clone.fNRepetitions = fNRepetitions;
58 clone.fStructure = fStructure;
59 clone.fParentId = fParentId;
60 clone.fProjectionSourceId = fProjectionSourceId;
61 clone.fLinkIds = fLinkIds;
62 clone.fColumnCardinality = fColumnCardinality;
63 clone.fLogicalColumnIds = fLogicalColumnIds;
64 clone.fTypeChecksum = fTypeChecksum;
65 return clone;
66}
67
68std::unique_ptr<ROOT::Experimental::RFieldBase>
70{
71 if (GetStructure() == ENTupleStructure::kUnsplit) {
72 auto unsplitField = std::make_unique<RUnsplitField>(GetFieldName(), GetTypeName());
73 unsplitField->SetOnDiskId(fFieldId);
74 return unsplitField;
75 }
76
77 if (GetTypeName().empty()) {
78 // For untyped records or collections, we have no class available to collect all the sub fields.
79 // Therefore, we create an untyped record field as an artificial binder for the record itself, and in the case of
80 // collections, its items.
81 std::vector<std::unique_ptr<RFieldBase>> memberFields;
82 for (auto id : fLinkIds) {
83 const auto &memberDesc = ntplDesc.GetFieldDescriptor(id);
84 memberFields.emplace_back(memberDesc.CreateField(ntplDesc));
85 }
86 if (GetStructure() == ENTupleStructure::kRecord) {
87 auto recordField = std::make_unique<RRecordField>(GetFieldName(), memberFields);
88 recordField->SetOnDiskId(fFieldId);
89 return recordField;
90 } else if (GetStructure() == ENTupleStructure::kCollection) {
91 auto recordField = std::make_unique<RRecordField>("_0", memberFields);
92 auto collectionField = std::make_unique<RVectorField>(GetFieldName(), std::move(recordField));
93 collectionField->SetOnDiskId(fFieldId);
94 return collectionField;
95 } else {
96 throw RException(R__FAIL("unknown field type for field \"" + GetFieldName() + "\""));
97 }
98 }
99
100 auto field = RFieldBase::Create(GetFieldName(), GetTypeAlias().empty() ? GetTypeName() : GetTypeAlias()).Unwrap();
101 field->SetOnDiskId(fFieldId);
102 for (auto &f : *field)
103 f.SetOnDiskId(ntplDesc.FindFieldId(f.GetFieldName(), f.GetParent()->GetOnDiskId()));
104 return field;
105}
106
108{
109 if (fStructure != ENTupleStructure::kRecord && fStructure != ENTupleStructure::kUnsplit)
110 return false;
111
112 // Skip untyped structs
113 if (fTypeName.empty())
114 return false;
115
116 if (fStructure == ENTupleStructure::kRecord) {
117 if (fTypeName.compare(0, 10, "std::pair<") == 0)
118 return false;
119 if (fTypeName.compare(0, 11, "std::tuple<") == 0)
120 return false;
121 }
122
123 return true;
124}
125
126////////////////////////////////////////////////////////////////////////////////
127
129{
130 return fLogicalColumnId == other.fLogicalColumnId && fPhysicalColumnId == other.fPhysicalColumnId &&
131 fBitsOnStorage == other.fBitsOnStorage && fType == other.fType && fFieldId == other.fFieldId &&
132 fIndex == other.fIndex && fRepresentationIndex == other.fRepresentationIndex;
133}
134
136{
137 RColumnDescriptor clone;
138 clone.fLogicalColumnId = fLogicalColumnId;
139 clone.fPhysicalColumnId = fPhysicalColumnId;
140 clone.fBitsOnStorage = fBitsOnStorage;
141 clone.fType = fType;
142 clone.fFieldId = fFieldId;
143 clone.fIndex = fIndex;
144 clone.fFirstElementIndex = fFirstElementIndex;
145 clone.fRepresentationIndex = fRepresentationIndex;
146 return clone;
147}
148
149////////////////////////////////////////////////////////////////////////////////
150
153{
154 // TODO(jblomer): binary search
155 RPageInfo pageInfo;
156 decltype(idxInCluster) firstInPage = 0;
157 NTupleSize_t pageNo = 0;
158 for (const auto &pi : fPageInfos) {
159 if (firstInPage + pi.fNElements > idxInCluster) {
160 pageInfo = pi;
161 break;
162 }
163 firstInPage += pi.fNElements;
164 ++pageNo;
165 }
166 R__ASSERT(firstInPage <= idxInCluster);
167 R__ASSERT((firstInPage + pageInfo.fNElements) > idxInCluster);
168 return RPageInfoExtended{pageInfo, firstInPage, pageNo};
169}
170
171std::size_t
173 const Internal::RColumnElementBase &element,
174 std::size_t pageSize)
175{
176 R__ASSERT(fPhysicalColumnId == columnRange.fPhysicalColumnId);
177 R__ASSERT(!columnRange.fIsSuppressed);
178
179 const auto nElements = std::accumulate(fPageInfos.begin(), fPageInfos.end(), 0U,
180 [](std::size_t n, const auto &PI) { return n + PI.fNElements; });
181 const auto nElementsRequired = static_cast<std::uint64_t>(columnRange.fNElements);
182
183 if (nElementsRequired == nElements)
184 return 0U;
185 R__ASSERT((nElementsRequired > nElements) && "invalid attempt to shrink RPageRange");
186
187 std::vector<RPageInfo> pageInfos;
188 // Synthesize new `RPageInfo`s as needed
189 const std::uint64_t nElementsPerPage = pageSize / element.GetSize();
190 R__ASSERT(nElementsPerPage > 0);
191 for (auto nRemainingElements = nElementsRequired - nElements; nRemainingElements > 0;) {
193 PI.fNElements = std::min(nElementsPerPage, nRemainingElements);
194 PI.fLocator.fType = RNTupleLocator::kTypePageZero;
195 PI.fLocator.fBytesOnStorage = element.GetPackedSize(PI.fNElements);
196 pageInfos.emplace_back(PI);
197 nRemainingElements -= PI.fNElements;
198 }
199
200 pageInfos.insert(pageInfos.end(), std::make_move_iterator(fPageInfos.begin()),
201 std::make_move_iterator(fPageInfos.end()));
202 std::swap(fPageInfos, pageInfos);
203 return nElementsRequired - nElements;
204}
205
207{
208 return fClusterId == other.fClusterId && fFirstEntryIndex == other.fFirstEntryIndex &&
209 fNEntries == other.fNEntries && fColumnRanges == other.fColumnRanges && fPageRanges == other.fPageRanges;
210}
211
213{
214 std::uint64_t nbytes = 0;
215 for (const auto &pr : fPageRanges) {
216 for (const auto &pi : pr.second.fPageInfos) {
217 nbytes += pi.fLocator.fBytesOnStorage;
218 }
219 }
220 return nbytes;
221}
222
224{
225 RClusterDescriptor clone;
226 clone.fClusterId = fClusterId;
227 clone.fFirstEntryIndex = fFirstEntryIndex;
228 clone.fNEntries = fNEntries;
229 clone.fColumnRanges = fColumnRanges;
230 for (const auto &d : fPageRanges)
231 clone.fPageRanges.emplace(d.first, d.second.Clone());
232 return clone;
233}
234
235////////////////////////////////////////////////////////////////////////////////
236
238{
239 return fContentId == other.fContentId && fTypeName == other.fTypeName &&
240 fTypeVersionFrom == other.fTypeVersionFrom && fTypeVersionTo == other.fTypeVersionTo;
241}
242
244{
246 clone.fContentId = fContentId;
247 clone.fTypeVersionFrom = fTypeVersionFrom;
248 clone.fTypeVersionTo = fTypeVersionTo;
249 clone.fTypeName = fTypeName;
250 clone.fContent = fContent;
251 return clone;
252}
253
254////////////////////////////////////////////////////////////////////////////////
255
257{
258 // clang-format off
259 return fName == other.fName &&
260 fDescription == other.fDescription &&
261 fNEntries == other.fNEntries &&
262 fGeneration == other.fGeneration &&
263 fFieldZeroId == other.fFieldZeroId &&
264 fFieldDescriptors == other.fFieldDescriptors &&
265 fColumnDescriptors == other.fColumnDescriptors &&
266 fClusterGroupDescriptors == other.fClusterGroupDescriptors &&
267 fClusterDescriptors == other.fClusterDescriptors;
268 // clang-format on
269}
270
273{
275 for (const auto &cd : fClusterDescriptors) {
276 if (!cd.second.ContainsColumn(physicalColumnId))
277 continue;
278 auto columnRange = cd.second.GetColumnRange(physicalColumnId);
279 result = std::max(result, columnRange.fFirstElementIndex + columnRange.fNElements);
280 }
281 return result;
282}
283
286{
287 std::string leafName(fieldName);
288 auto posDot = leafName.find_last_of('.');
289 if (posDot != std::string::npos) {
290 auto parentName = leafName.substr(0, posDot);
291 leafName = leafName.substr(posDot + 1);
292 parentId = FindFieldId(parentName, parentId);
293 }
294 auto itrFieldDesc = fFieldDescriptors.find(parentId);
295 if (itrFieldDesc == fFieldDescriptors.end())
297 for (const auto linkId : itrFieldDesc->second.GetLinkIds()) {
298 if (fFieldDescriptors.at(linkId).GetFieldName() == leafName)
299 return linkId;
300 }
302}
303
305{
306 if (fieldId == kInvalidDescriptorId)
307 return "";
308
309 const auto &fieldDescriptor = fFieldDescriptors.at(fieldId);
310 auto prefix = GetQualifiedFieldName(fieldDescriptor.GetParentId());
311 if (prefix.empty())
312 return fieldDescriptor.GetFieldName();
313 return prefix + "." + fieldDescriptor.GetFieldName();
314}
315
317{
318 return FindFieldId(fieldName, GetFieldZeroId());
319}
320
323 std::uint16_t representationIndex) const
324{
325 auto itr = fFieldDescriptors.find(fieldId);
326 if (itr == fFieldDescriptors.cend())
328 if (columnIndex >= itr->second.GetColumnCardinality())
330 const auto idx = representationIndex * itr->second.GetColumnCardinality() + columnIndex;
331 if (itr->second.GetLogicalColumnIds().size() <= idx)
333 return itr->second.GetLogicalColumnIds()[idx];
334}
335
338 std::uint16_t representationIndex) const
339{
340 auto logicalId = FindLogicalColumnId(fieldId, columnIndex, representationIndex);
341 if (logicalId == kInvalidDescriptorId)
343 return GetColumnDescriptor(logicalId).GetPhysicalId();
344}
345
348{
349 // TODO(jblomer): binary search?
350 for (const auto &cd : fClusterDescriptors) {
351 if (!cd.second.ContainsColumn(physicalColumnId))
352 continue;
353 auto columnRange = cd.second.GetColumnRange(physicalColumnId);
354 if (columnRange.Contains(index))
355 return cd.second.GetId();
356 }
358}
359
360// TODO(jblomer): fix for cases of sharded clasters
363{
364 const auto &clusterDesc = GetClusterDescriptor(clusterId);
365 auto firstEntryInNextCluster = clusterDesc.GetFirstEntryIndex() + clusterDesc.GetNEntries();
366 // TODO(jblomer): binary search?
367 for (const auto &cd : fClusterDescriptors) {
368 if (cd.second.GetFirstEntryIndex() == firstEntryInNextCluster)
369 return cd.second.GetId();
370 }
372}
373
374// TODO(jblomer): fix for cases of sharded clasters
377{
378 const auto &clusterDesc = GetClusterDescriptor(clusterId);
379 // TODO(jblomer): binary search?
380 for (const auto &cd : fClusterDescriptors) {
381 if (cd.second.GetFirstEntryIndex() + cd.second.GetNEntries() == clusterDesc.GetFirstEntryIndex())
382 return cd.second.GetId();
383 }
385}
386
387std::vector<ROOT::Experimental::DescriptorId_t>
389{
390 auto fieldZeroId = desc.GetFieldZeroId();
391
392 std::vector<DescriptorId_t> fields;
393 for (const DescriptorId_t fieldId : fFieldIdsOrder) {
394 if (desc.GetFieldDescriptor(fieldId).GetParentId() == fieldZeroId)
395 fields.emplace_back(fieldId);
396 }
397 return fields;
398}
399
401 const RNTupleDescriptor &ntuple, const RFieldDescriptor &field)
402 : fNTuple(ntuple), fColumns(field.GetLogicalColumnIds())
403{
404}
405
407 const RNTupleDescriptor &ntuple)
408 : fNTuple(ntuple)
409{
410 std::deque<DescriptorId_t> fieldIdQueue{ntuple.GetFieldZeroId()};
411
412 while (!fieldIdQueue.empty()) {
413 auto currFieldId = fieldIdQueue.front();
414 fieldIdQueue.pop_front();
415
416 const auto &columns = ntuple.GetFieldDescriptor(currFieldId).GetLogicalColumnIds();
417 fColumns.insert(fColumns.end(), columns.begin(), columns.end());
418
419 for (const auto &field : ntuple.GetFieldIterable(currFieldId)) {
420 auto fieldId = field.GetId();
421 fieldIdQueue.push_back(fieldId);
422 }
423 }
424}
425
427{
428 std::vector<std::uint64_t> result;
429 unsigned int base = 0;
430 std::uint64_t flags = 0;
431 for (auto f : fFeatureFlags) {
432 if ((f > 0) && ((f % 64) == 0))
433 throw RException(R__FAIL("invalid feature flag: " + std::to_string(f)));
434 while (f > base + 64) {
435 result.emplace_back(flags);
436 flags = 0;
437 base += 64;
438 }
439 f -= base;
440 flags |= 1 << f;
441 }
442 result.emplace_back(flags);
443 return result;
444}
445
448 std::vector<RClusterDescriptor> &clusterDescs)
449{
450 auto iter = fClusterGroupDescriptors.find(clusterGroupId);
451 if (iter == fClusterGroupDescriptors.end())
452 return R__FAIL("invalid attempt to add details of unknown cluster group");
453 if (iter->second.HasClusterDetails())
454 return R__FAIL("invalid attempt to re-populate cluster group details");
455 if (iter->second.GetNClusters() != clusterDescs.size())
456 return R__FAIL("mismatch of number of clusters");
457
458 std::vector<DescriptorId_t> clusterIds;
459 for (unsigned i = 0; i < clusterDescs.size(); ++i) {
460 clusterIds.emplace_back(clusterDescs[i].GetId());
461 auto [_, success] = fClusterDescriptors.emplace(clusterIds.back(), std::move(clusterDescs[i]));
462 if (!success) {
463 return R__FAIL("invalid attempt to re-populate existing cluster");
464 }
465 }
466 auto cgBuilder = Internal::RClusterGroupDescriptorBuilder::FromSummary(iter->second);
467 cgBuilder.AddClusters(clusterIds);
468 iter->second = cgBuilder.MoveDescriptor().Unwrap();
469 return RResult<void>::Success();
470}
471
474{
475 auto iter = fClusterGroupDescriptors.find(clusterGroupId);
476 if (iter == fClusterGroupDescriptors.end())
477 return R__FAIL("invalid attempt to drop cluster details of unknown cluster group");
478 if (!iter->second.HasClusterDetails())
479 return R__FAIL("invalid attempt to drop details of cluster group summary");
480
481 for (auto clusterId : iter->second.GetClusterIds())
482 fClusterDescriptors.erase(clusterId);
483 iter->second = iter->second.CloneSummary();
484 return RResult<void>::Success();
485}
486
487std::unique_ptr<ROOT::Experimental::RNTupleModel>
489{
490 auto fieldZero = std::make_unique<RFieldZero>();
491 fieldZero->SetOnDiskId(GetFieldZeroId());
492 auto model = RNTupleModel::Create(std::move(fieldZero));
493 for (const auto &topDesc : GetTopLevelFields()) {
494 auto field = topDesc.CreateField(*this);
495 if (options.fReconstructProjections && topDesc.IsProjectedField()) {
496 model->AddProjectedField(std::move(field), [this](const std::string &targetName) -> std::string {
497 return GetQualifiedFieldName(GetFieldDescriptor(FindFieldId(targetName)).GetProjectionSourceId());
498 });
499 } else {
500 model->AddField(std::move(field));
501 }
502 }
503 model->Freeze();
504 return model;
505}
506
507std::unique_ptr<ROOT::Experimental::RNTupleDescriptor> ROOT::Experimental::RNTupleDescriptor::Clone() const
508{
509 auto clone = std::make_unique<RNTupleDescriptor>();
510 clone->fName = fName;
511 clone->fDescription = fDescription;
512 clone->fOnDiskHeaderXxHash3 = fOnDiskHeaderXxHash3;
513 clone->fOnDiskHeaderSize = fOnDiskHeaderSize;
514 clone->fOnDiskFooterSize = fOnDiskFooterSize;
515 clone->fNEntries = fNEntries;
516 clone->fNClusters = fNClusters;
517 clone->fNPhysicalColumns = fNPhysicalColumns;
518 clone->fFieldZeroId = fFieldZeroId;
519 clone->fGeneration = fGeneration;
520 for (const auto &d : fFieldDescriptors)
521 clone->fFieldDescriptors.emplace(d.first, d.second.Clone());
522 for (const auto &d : fColumnDescriptors)
523 clone->fColumnDescriptors.emplace(d.first, d.second.Clone());
524 for (const auto &d : fClusterGroupDescriptors)
525 clone->fClusterGroupDescriptors.emplace(d.first, d.second.Clone());
526 for (const auto &d : fClusterDescriptors)
527 clone->fClusterDescriptors.emplace(d.first, d.second.Clone());
528 for (const auto &d : fExtraTypeInfoDescriptors)
529 clone->fExtraTypeInfoDescriptors.emplace_back(d.Clone());
531 clone->fHeaderExtension = std::make_unique<RHeaderExtension>(*fHeaderExtension);
532 return clone;
533}
534
535////////////////////////////////////////////////////////////////////////////////
536
538{
539 return fColumnGroupId == other.fColumnGroupId && fPhysicalColumnIds == other.fPhysicalColumnIds;
540}
541
542////////////////////////////////////////////////////////////////////////////////
543
545{
546 return fClusterGroupId == other.fClusterGroupId && fClusterIds == other.fClusterIds &&
547 fMinEntry == other.fMinEntry && fEntrySpan == other.fEntrySpan && fNClusters == other.fNClusters;
548}
549
551{
553 clone.fClusterGroupId = fClusterGroupId;
554 clone.fPageListLocator = fPageListLocator;
555 clone.fPageListLength = fPageListLength;
556 clone.fMinEntry = fMinEntry;
557 clone.fEntrySpan = fEntrySpan;
558 clone.fNClusters = fNClusters;
559 return clone;
560}
561
563{
564 RClusterGroupDescriptor clone = CloneSummary();
565 clone.fClusterIds = fClusterIds;
566 return clone;
567}
568
569////////////////////////////////////////////////////////////////////////////////
570
572 DescriptorId_t physicalId, std::uint64_t firstElementIndex, std::uint32_t compressionSettings,
573 const RClusterDescriptor::RPageRange &pageRange)
574{
575 if (physicalId != pageRange.fPhysicalColumnId)
576 return R__FAIL("column ID mismatch");
577 if (fCluster.fColumnRanges.count(physicalId) > 0)
578 return R__FAIL("column ID conflict");
579 RClusterDescriptor::RColumnRange columnRange{physicalId, firstElementIndex, ClusterSize_t{0}};
580 columnRange.fCompressionSettings = compressionSettings;
581 for (const auto &pi : pageRange.fPageInfos) {
582 columnRange.fNElements += pi.fNElements;
583 }
584 fCluster.fPageRanges[physicalId] = pageRange.Clone();
585 fCluster.fColumnRanges[physicalId] = columnRange;
586 return RResult<void>::Success();
587}
588
591{
592 if (fCluster.fColumnRanges.count(physicalId) > 0)
593 return R__FAIL("column ID conflict");
594
596 columnRange.fPhysicalColumnId = physicalId;
598 columnRange.fIsSuppressed = true;
599 fCluster.fColumnRanges[physicalId] = columnRange;
600 return RResult<void>::Success();
601}
602
605{
606 for (auto &[_, columnRange] : fCluster.fColumnRanges) {
607 if (!columnRange.fIsSuppressed)
608 continue;
609 R__ASSERT(columnRange.fFirstElementIndex == kInvalidNTupleIndex);
610
611 const auto &columnDesc = desc.GetColumnDescriptor(columnRange.fPhysicalColumnId);
612 const auto &fieldDesc = desc.GetFieldDescriptor(columnDesc.GetFieldId());
613 // We expect only few columns and column representations per field, so we do a linear search
614 for (const auto otherColumnLogicalId : fieldDesc.GetLogicalColumnIds()) {
615 const auto &otherColumnDesc = desc.GetColumnDescriptor(otherColumnLogicalId);
616 if (otherColumnDesc.GetRepresentationIndex() == columnDesc.GetRepresentationIndex())
617 continue;
618 if (otherColumnDesc.GetIndex() != columnDesc.GetIndex())
619 continue;
620
621 // Found corresponding column of a different column representation
622 const auto &otherColumnRange = fCluster.GetColumnRange(otherColumnDesc.GetPhysicalId());
623 if (otherColumnRange.fIsSuppressed)
624 continue;
625
626 columnRange.fFirstElementIndex = otherColumnRange.fFirstElementIndex;
627 columnRange.fNElements = otherColumnRange.fNElements;
628 break;
629 }
630
631 if (columnRange.fFirstElementIndex == kInvalidNTupleIndex) {
632 return R__FAIL(std::string("cannot find non-suppressed column for column ID ") +
633 std::to_string(columnRange.fPhysicalColumnId) +
634 ", cluster ID: " + std::to_string(fCluster.GetId()));
635 }
636 }
637 return RResult<void>::Success();
638}
639
642{
643 /// Carries out a depth-first traversal of a field subtree rooted at `rootFieldId`. For each field, `visitField` is
644 /// called passing the field ID and the number of overall repetitions, taking into account the repetitions of each
645 /// parent field in the hierarchy.
646 auto fnTraverseSubtree = [&](DescriptorId_t rootFieldId, std::uint64_t nRepetitionsAtThisLevel,
647 const auto &visitField, const auto &enterSubtree) -> void {
648 visitField(rootFieldId, nRepetitionsAtThisLevel);
649 for (const auto &f : desc.GetFieldIterable(rootFieldId)) {
650 const std::uint64_t nRepetitions = std::max(f.GetNRepetitions(), std::uint64_t{1U}) * nRepetitionsAtThisLevel;
651 enterSubtree(f.GetId(), nRepetitions, visitField, enterSubtree);
652 }
653 };
654
655 // Extended columns can only be part of the header extension
656 if (!desc.GetHeaderExtension())
657 return *this;
658
659 // Ensure that all columns in the header extension have their associated `R(Column|Page)Range`
660 // Extended columns can be attached both to fields of the regular header and to fields of the extension header
661 for (const auto &topLevelField : desc.GetTopLevelFields()) {
662 fnTraverseSubtree(
663 topLevelField.GetId(), std::max(topLevelField.GetNRepetitions(), std::uint64_t{1U}),
664 [&](DescriptorId_t fieldId, std::uint64_t nRepetitions) {
665 for (const auto &c : desc.GetColumnIterable(fieldId)) {
666 const DescriptorId_t physicalId = c.GetPhysicalId();
667 auto &columnRange = fCluster.fColumnRanges[physicalId];
668
669 // Initialize a RColumnRange for `physicalId` if it was not there. Columns that were created during model
670 // extension won't have on-disk metadata for the clusters that were already committed before the model
671 // was extended. Therefore, these need to be synthetically initialized upon reading.
672 if (columnRange.fPhysicalColumnId == kInvalidDescriptorId) {
673 columnRange.fPhysicalColumnId = physicalId;
674 columnRange.fFirstElementIndex = 0;
675 columnRange.fNElements = 0;
676 columnRange.fIsSuppressed = c.IsSuppressedDeferredColumn();
677 }
678 // Fixup the RColumnRange and RPageRange in deferred columns. We know what the first element index and
679 // number of elements should have been if the column was not deferred; fix those and let
680 // `ExtendToFitColumnRange()` synthesize RPageInfos accordingly.
681 // Note that a deferred column (i.e, whose first element index is > 0) already met the criteria of
682 // `RFieldBase::EntryToColumnElementIndex()`, i.e. it is a principal column reachable from the field zero
683 // excluding subfields of collection and variant fields.
684 if (c.IsDeferredColumn()) {
685 columnRange.fFirstElementIndex = fCluster.GetFirstEntryIndex() * nRepetitions;
686 columnRange.fNElements = fCluster.GetNEntries() * nRepetitions;
687 if (!columnRange.fIsSuppressed) {
688 auto &pageRange = fCluster.fPageRanges[physicalId];
689 pageRange.fPhysicalColumnId = physicalId;
690 const auto element = Internal::RColumnElementBase::Generate<void>(c.GetType());
691 pageRange.ExtendToFitColumnRange(columnRange, *element, Internal::RPage::kPageZeroSize);
692 }
693 } else if (!columnRange.fIsSuppressed) {
694 fCluster.fPageRanges[physicalId].fPhysicalColumnId = physicalId;
695 }
696 }
697 },
698 fnTraverseSubtree);
699 }
700 return *this;
701}
702
705{
706 if (fCluster.fClusterId == kInvalidDescriptorId)
707 return R__FAIL("unset cluster ID");
708 if (fCluster.fNEntries == 0)
709 return R__FAIL("empty cluster");
710 for (const auto &pr : fCluster.fPageRanges) {
711 if (fCluster.fColumnRanges.count(pr.first) == 0) {
712 return R__FAIL("missing column range");
713 }
714 }
716 std::swap(result, fCluster);
717 return result;
718}
719
720////////////////////////////////////////////////////////////////////////////////
721
724 const RClusterGroupDescriptor &clusterGroupDesc)
725{
727 builder.ClusterGroupId(clusterGroupDesc.GetId())
728 .PageListLocator(clusterGroupDesc.GetPageListLocator())
729 .PageListLength(clusterGroupDesc.GetPageListLength())
730 .MinEntry(clusterGroupDesc.GetMinEntry())
731 .EntrySpan(clusterGroupDesc.GetEntrySpan())
732 .NClusters(clusterGroupDesc.GetNClusters());
733 return builder;
734}
735
738{
739 if (fClusterGroup.fClusterGroupId == kInvalidDescriptorId)
740 return R__FAIL("unset cluster group ID");
742 std::swap(result, fClusterGroup);
743 return result;
744}
745
746////////////////////////////////////////////////////////////////////////////////
747
750{
751 if (fColumnGroup.fColumnGroupId == kInvalidDescriptorId)
752 return R__FAIL("unset column group ID");
754 std::swap(result, fColumnGroup);
755 return result;
756}
757
758////////////////////////////////////////////////////////////////////////////////
759
762{
763 if (fExtraTypeInfo.fContentId == EExtraTypeInfoIds::kInvalid)
764 throw RException(R__FAIL("invalid extra type info content id"));
766 std::swap(result, fExtraTypeInfo);
767 return result;
768}
769
770////////////////////////////////////////////////////////////////////////////////
771
774{
775 if (fDescriptor.fFieldDescriptors.count(fieldId) == 0)
776 return R__FAIL("field with id '" + std::to_string(fieldId) + "' doesn't exist");
777 return RResult<void>::Success();
778}
779
781{
782 // Reuse field name validity check
783 auto validName = RFieldBase::EnsureValidFieldName(fDescriptor.GetName());
784 if (!validName) {
785 return R__FORWARD_ERROR(validName);
786 }
787
788 for (const auto &[fieldId, fieldDesc] : fDescriptor.fFieldDescriptors) {
789 // parent not properly set?
790 if (fieldId != fDescriptor.GetFieldZeroId() && fieldDesc.GetParentId() == kInvalidDescriptorId) {
791 return R__FAIL("field with id '" + std::to_string(fieldId) + "' has an invalid parent id");
792 }
793
794 // Same number of columns in every column representation?
795 const auto columnCardinality = fieldDesc.GetColumnCardinality();
796 if (columnCardinality == 0)
797 continue;
798
799 // In AddColumn, we already checked that all but the last representation are complete.
800 // Check that the last column representation is complete, i.e. has all columns.
801 const auto &logicalColumnIds = fieldDesc.GetLogicalColumnIds();
802 const auto nColumns = logicalColumnIds.size();
803 // If we have only a single column representation, the following condition is true by construction
804 if ((nColumns + 1) == columnCardinality)
805 continue;
806
807 const auto &lastColumn = fDescriptor.GetColumnDescriptor(logicalColumnIds.back());
808 if (lastColumn.GetIndex() + 1 != columnCardinality)
809 return R__FAIL("field with id '" + std::to_string(fieldId) + "' has incomplete column representations");
810 }
811
812 return RResult<void>::Success();
813}
814
816{
817 EnsureValidDescriptor().ThrowOnError();
819 std::swap(result, fDescriptor);
820 return result;
821}
822
824 const std::string_view description)
825{
826 fDescriptor.fName = std::string(name);
827 fDescriptor.fDescription = std::string(description);
828}
829
831{
832 if (flag % 64 == 0)
833 throw RException(R__FAIL("invalid feature flag: " + std::to_string(flag)));
834 fDescriptor.fFeatureFlags.insert(flag);
835}
836
839{
840 if (fColumn.GetLogicalId() == kInvalidDescriptorId)
841 return R__FAIL("invalid logical column id");
842 if (fColumn.GetPhysicalId() == kInvalidDescriptorId)
843 return R__FAIL("invalid physical column id");
844 if (fColumn.GetType() == EColumnType::kUnknown)
845 return R__FAIL("invalid column model");
846 if (fColumn.GetFieldId() == kInvalidDescriptorId)
847 return R__FAIL("invalid field id, dangling column");
848
849 const auto [minBits, maxBits] = RColumnElementBase::GetValidBitRange(fColumn.GetType());
850 if (fColumn.GetBitsOnStorage() < minBits || fColumn.GetBitsOnStorage() > maxBits)
851 return R__FAIL("invalid column bit width");
852
853 return fColumn.Clone();
854}
855
857 : fField(fieldDesc.Clone())
858{
860 fField.fLinkIds = {};
862}
863
866{
867 RFieldDescriptorBuilder fieldDesc;
868 fieldDesc.FieldVersion(field.GetFieldVersion())
870 .FieldName(field.GetFieldName())
872 .TypeName(field.GetTypeName())
873 .TypeAlias(field.GetTypeAlias())
874 .Structure(field.GetStructure())
877 fieldDesc.TypeChecksum(field.GetTypeChecksum());
878 return fieldDesc;
879}
880
883{
884 if (fField.GetId() == kInvalidDescriptorId) {
885 return R__FAIL("invalid field id");
886 }
887 if (fField.GetStructure() == ENTupleStructure::kInvalid) {
888 return R__FAIL("invalid field structure");
889 }
890 // FieldZero is usually named "" and would be a false positive here
891 if (fField.GetParentId() != kInvalidDescriptorId) {
892 auto validName = RFieldBase::EnsureValidFieldName(fField.GetFieldName());
893 if (!validName) {
894 return R__FORWARD_ERROR(validName);
895 }
896 }
897 return fField.Clone();
898}
899
901{
902 fDescriptor.fFieldDescriptors.emplace(fieldDesc.GetId(), fieldDesc.Clone());
903 if (fDescriptor.fHeaderExtension)
904 fDescriptor.fHeaderExtension->AddExtendedField(fieldDesc);
905 if (fieldDesc.GetFieldName().empty() && fieldDesc.GetParentId() == kInvalidDescriptorId) {
906 fDescriptor.fFieldZeroId = fieldDesc.GetId();
907 }
908}
909
912{
913 auto fieldExists = RResult<void>::Success();
914 if (!(fieldExists = EnsureFieldExists(fieldId)))
915 return R__FORWARD_ERROR(fieldExists);
916 if (!(fieldExists = EnsureFieldExists(linkId)))
917 return R__FAIL("child field with id '" + std::to_string(linkId) + "' doesn't exist in NTuple");
918
919 if (linkId == fDescriptor.GetFieldZeroId()) {
920 return R__FAIL("cannot make FieldZero a child field");
921 }
922 // fail if field already has another valid parent
923 auto parentId = fDescriptor.fFieldDescriptors.at(linkId).GetParentId();
924 if ((parentId != kInvalidDescriptorId) && (parentId != fieldId)) {
925 return R__FAIL("field '" + std::to_string(linkId) + "' already has a parent ('" + std::to_string(parentId) + ")");
926 }
927 if (fieldId == linkId) {
928 return R__FAIL("cannot make field '" + std::to_string(fieldId) + "' a child of itself");
929 }
930 fDescriptor.fFieldDescriptors.at(linkId).fParentId = fieldId;
931 fDescriptor.fFieldDescriptors.at(fieldId).fLinkIds.push_back(linkId);
932 return RResult<void>::Success();
933}
934
937 DescriptorId_t targetId)
938{
939 auto fieldExists = RResult<void>::Success();
940 if (!(fieldExists = EnsureFieldExists(sourceId)))
941 return R__FORWARD_ERROR(fieldExists);
942 if (!(fieldExists = EnsureFieldExists(targetId)))
943 return R__FAIL("projected field with id '" + std::to_string(targetId) + "' doesn't exist in NTuple");
944
945 if (targetId == fDescriptor.GetFieldZeroId()) {
946 return R__FAIL("cannot make FieldZero a projected field");
947 }
948 if (sourceId == targetId) {
949 return R__FAIL("cannot make field '" + std::to_string(targetId) + "' a projection of itself");
950 }
951 if (fDescriptor.fFieldDescriptors.at(sourceId).IsProjectedField()) {
952 return R__FAIL("cannot make field '" + std::to_string(targetId) + "' a projection of an already projected field");
953 }
954 // fail if target field already has another valid projection source
955 auto &targetDesc = fDescriptor.fFieldDescriptors.at(targetId);
956 if (targetDesc.IsProjectedField() && targetDesc.GetProjectionSourceId() != sourceId) {
957 return R__FAIL("field '" + std::to_string(targetId) + "' has already a projection source ('" +
958 std::to_string(targetDesc.GetProjectionSourceId()) + ")");
959 }
960 fDescriptor.fFieldDescriptors.at(targetId).fProjectionSourceId = sourceId;
961 return RResult<void>::Success();
962}
963
966{
967 const auto fieldId = columnDesc.GetFieldId();
968 const auto columnIndex = columnDesc.GetIndex();
969 const auto representationIndex = columnDesc.GetRepresentationIndex();
970
971 auto fieldExists = EnsureFieldExists(fieldId);
972 if (!fieldExists) {
973 return R__FORWARD_ERROR(fieldExists);
974 }
975 auto &fieldDesc = fDescriptor.fFieldDescriptors.find(fieldId)->second;
976
977 if (columnDesc.IsAliasColumn()) {
978 if (columnDesc.GetType() != fDescriptor.GetColumnDescriptor(columnDesc.GetPhysicalId()).GetType())
979 return R__FAIL("alias column type mismatch");
980 }
981 if (fDescriptor.FindLogicalColumnId(fieldId, columnIndex, representationIndex) != kInvalidDescriptorId) {
982 return R__FAIL("column index clash");
983 }
984 if (columnIndex > 0) {
985 if (fDescriptor.FindLogicalColumnId(fieldId, columnIndex - 1, representationIndex) == kInvalidDescriptorId)
986 return R__FAIL("out of bounds column index");
987 }
988 if (representationIndex > 0) {
989 if (fDescriptor.FindLogicalColumnId(fieldId, 0, representationIndex - 1) == kInvalidDescriptorId) {
990 return R__FAIL("out of bounds representation index");
991 }
992 if (columnIndex == 0) {
993 assert(fieldDesc.fColumnCardinality > 0);
994 if (fDescriptor.FindLogicalColumnId(fieldId, fieldDesc.fColumnCardinality - 1, representationIndex - 1) ==
996 return R__FAIL("incomplete column representations");
997 }
998 } else {
999 if (columnIndex >= fieldDesc.fColumnCardinality)
1000 return R__FAIL("irregular column representations");
1001 }
1002 } else {
1003 // This will set the column cardinality to the number of columns of the first representation
1004 fieldDesc.fColumnCardinality = columnIndex + 1;
1005 }
1006
1007 const auto logicalId = columnDesc.GetLogicalId();
1008 fieldDesc.fLogicalColumnIds.emplace_back(logicalId);
1009
1010 if (!columnDesc.IsAliasColumn())
1011 fDescriptor.fNPhysicalColumns++;
1012 fDescriptor.fColumnDescriptors.emplace(logicalId, std::move(columnDesc));
1013 if (fDescriptor.fHeaderExtension)
1014 fDescriptor.fHeaderExtension->AddExtendedColumn(columnDesc);
1015
1016 return RResult<void>::Success();
1017}
1018
1021{
1022 const auto id = clusterGroup.GetId();
1023 if (fDescriptor.fClusterGroupDescriptors.count(id) > 0)
1024 return R__FAIL("cluster group id clash");
1025 fDescriptor.fNEntries = std::max(fDescriptor.fNEntries, clusterGroup.GetMinEntry() + clusterGroup.GetEntrySpan());
1026 fDescriptor.fNClusters += clusterGroup.GetNClusters();
1027 fDescriptor.fClusterGroupDescriptors.emplace(id, std::move(clusterGroup));
1028 return RResult<void>::Success();
1029}
1030
1032{
1033 fDescriptor.fName = "";
1034 fDescriptor.fDescription = "";
1035 fDescriptor.fFieldDescriptors.clear();
1036 fDescriptor.fColumnDescriptors.clear();
1037 fDescriptor.fClusterDescriptors.clear();
1038 fDescriptor.fClusterGroupDescriptors.clear();
1039 fDescriptor.fHeaderExtension.reset();
1040}
1041
1043{
1044 if (!fDescriptor.fHeaderExtension)
1045 fDescriptor.fHeaderExtension = std::make_unique<RNTupleDescriptor::RHeaderExtension>();
1046}
1047
1050{
1051 auto clusterId = clusterDesc.GetId();
1052 if (fDescriptor.fClusterDescriptors.count(clusterId) > 0)
1053 return R__FAIL("cluster id clash");
1054 fDescriptor.fClusterDescriptors.emplace(clusterId, std::move(clusterDesc));
1055 return RResult<void>::Success();
1056}
1057
1060{
1061 // Make sure we have no duplicates
1062 if (std::find(fDescriptor.fExtraTypeInfoDescriptors.begin(), fDescriptor.fExtraTypeInfoDescriptors.end(),
1063 extraTypeInfoDesc) != fDescriptor.fExtraTypeInfoDescriptors.end()) {
1064 return R__FAIL("extra type info duplicates");
1065 }
1066 fDescriptor.fExtraTypeInfoDescriptors.emplace_back(std::move(extraTypeInfoDesc));
1067 return RResult<void>::Success();
1068}
1069
1072{
1074 const auto &desc = GetDescriptor();
1075
1076 std::function<void(const RFieldDescriptor &)> fnWalkFieldTree;
1077 fnWalkFieldTree = [&desc, &streamerInfoMap, &fnWalkFieldTree](const RFieldDescriptor &fieldDesc) {
1078 if (fieldDesc.IsCustomClass()) {
1079 // Add streamer info for this class to streamerInfoMap
1080 auto cl = TClass::GetClass(fieldDesc.GetTypeName().c_str());
1081 if (!cl) {
1082 throw RException(R__FAIL(std::string("cannot get TClass for ") + fieldDesc.GetTypeName()));
1083 }
1084 auto streamerInfo = cl->GetStreamerInfo(fieldDesc.GetTypeVersion());
1085 if (!streamerInfo) {
1086 throw RException(R__FAIL(std::string("cannot get streamerInfo for ") + fieldDesc.GetTypeName()));
1087 }
1088 streamerInfoMap[streamerInfo->GetNumber()] = streamerInfo;
1089 }
1090
1091 // Recursively traverse sub fields
1092 for (const auto &subFieldDesc : desc.GetFieldIterable(fieldDesc)) {
1093 fnWalkFieldTree(subFieldDesc);
1094 }
1095 };
1096
1097 fnWalkFieldTree(desc.GetFieldZero());
1098
1099 // Add the streamer info records from unsplit fields: because of runtime polymorphism we may need to add additional
1100 // types not covered by the type names stored in the field headers
1101 for (const auto &extraTypeInfo : desc.GetExtraTypeInfoIterable()) {
1102 if (extraTypeInfo.GetContentId() != EExtraTypeInfoIds::kStreamerInfo)
1103 continue;
1104 streamerInfoMap.merge(RNTupleSerializer::DeserializeStreamerInfos(extraTypeInfo.GetContent()).Unwrap());
1105 }
1106
1107 return streamerInfoMap;
1108}
1109
1112{
1113 return RColumnRangeIterable(*this);
1114}
1115
1118{
1119 return RFieldDescriptorIterable(*this, fieldDesc);
1120}
1121
1123 const RFieldDescriptor &fieldDesc, const std::function<bool(DescriptorId_t, DescriptorId_t)> &comparator) const
1124{
1125 return RFieldDescriptorIterable(*this, fieldDesc, comparator);
1126}
1127
1130{
1131 return GetFieldIterable(GetFieldDescriptor(fieldId));
1132}
1133
1135 DescriptorId_t fieldId, const std::function<bool(DescriptorId_t, DescriptorId_t)> &comparator) const
1136{
1137 return GetFieldIterable(GetFieldDescriptor(fieldId), comparator);
1138}
1139
1142{
1143 return GetFieldIterable(GetFieldZeroId());
1144}
1145
1148 const std::function<bool(DescriptorId_t, DescriptorId_t)> &comparator) const
1149{
1150 return GetFieldIterable(GetFieldZeroId(), comparator);
1151}
1152
1155{
1156 return RColumnDescriptorIterable(*this);
1157}
1158
1161{
1162 return RColumnDescriptorIterable(*this, fieldDesc);
1163}
1164
1167{
1168 return RColumnDescriptorIterable(*this, GetFieldDescriptor(fieldId));
1169}
1170
1173{
1174 return RClusterGroupDescriptorIterable(*this);
1175}
1176
1179{
1180 return RClusterDescriptorIterable(*this);
1181}
1182
1185{
1187}
#define R__FORWARD_ERROR(res)
Short-hand to return an RResult<T> in an error state (i.e. after checking)
Definition RError.hxx:294
#define R__FAIL(msg)
Short-hand to return an RResult<T> in an error state; the RError is implicitly converted into RResult...
Definition RError.hxx:290
#define d(i)
Definition RSha256.hxx:102
#define f(i)
Definition RSha256.hxx:104
#define PI
#define R__ASSERT(e)
Checks condition e and reports a fatal error if it's false.
Definition TError.h:125
Option_t Option_t TPoint TPoint const char GetTextMagnitude GetFillStyle GetLineColor GetLineWidth GetMarkerStyle GetTextAlign GetTextColor GetTextSize void char Point_t Rectangle_t WindowAttributes_t Float_t Float_t Float_t Int_t Int_t UInt_t UInt_t Rectangle_t result
Option_t Option_t TPoint TPoint const char GetTextMagnitude GetFillStyle GetLineColor GetLineWidth GetMarkerStyle GetTextAlign GetTextColor GetTextSize void char Point_t Rectangle_t WindowAttributes_t index
char name[80]
Definition TGX11.cxx:110
#define _(A, B)
Definition cfortran.h:108
A helper class for piece-wise construction of an RClusterDescriptor.
RResult< RClusterDescriptor > MoveDescriptor()
Move out the full cluster descriptor including page locations.
RResult< void > MarkSuppressedColumnRange(DescriptorId_t physicalId)
Books the given column ID as being suppressed in this cluster.
RClusterDescriptorBuilder & AddExtendedColumnRanges(const RNTupleDescriptor &desc)
Add column and page ranges for columns created during late model extension missing in this cluster.
RResult< void > CommitColumnRange(DescriptorId_t physicalId, std::uint64_t firstElementIndex, std::uint32_t compressionSettings, const RClusterDescriptor::RPageRange &pageRange)
RResult< void > CommitSuppressedColumnRanges(const RNTupleDescriptor &desc)
Sets the first element index and number of elements for all the suppressed column ranges.
A helper class for piece-wise construction of an RClusterGroupDescriptor.
RClusterGroupDescriptorBuilder & PageListLocator(const RNTupleLocator &pageListLocator)
RClusterGroupDescriptorBuilder & MinEntry(std::uint64_t minEntry)
RClusterGroupDescriptorBuilder & ClusterGroupId(DescriptorId_t clusterGroupId)
RClusterGroupDescriptorBuilder & EntrySpan(std::uint64_t entrySpan)
RClusterGroupDescriptorBuilder & NClusters(std::uint32_t nClusters)
RClusterGroupDescriptorBuilder & PageListLength(std::uint64_t pageListLength)
static RClusterGroupDescriptorBuilder FromSummary(const RClusterGroupDescriptor &clusterGroupDesc)
RResult< RColumnDescriptor > MakeDescriptor() const
Attempt to make a column descriptor.
A column element encapsulates the translation between basic C++ types and their column representation...
std::size_t GetPackedSize(std::size_t nElements=1U) const
A helper class for piece-wise construction of an RFieldDescriptor.
RFieldDescriptorBuilder & TypeVersion(std::uint32_t typeVersion)
RFieldDescriptorBuilder & NRepetitions(std::uint64_t nRepetitions)
RFieldDescriptorBuilder & FieldVersion(std::uint32_t fieldVersion)
RFieldDescriptorBuilder & Structure(const ENTupleStructure &structure)
RFieldDescriptorBuilder & TypeName(const std::string &typeName)
static RFieldDescriptorBuilder FromField(const RFieldBase &field)
Make a new RFieldDescriptorBuilder based off a live NTuple field.
RResult< RFieldDescriptor > MakeDescriptor() const
Attempt to make a field descriptor.
RFieldDescriptorBuilder & FieldName(const std::string &fieldName)
RFieldDescriptorBuilder()=default
Make an empty dangling field descriptor.
RFieldDescriptorBuilder & TypeChecksum(const std::optional< std::uint32_t > typeChecksum)
RFieldDescriptorBuilder & TypeAlias(const std::string &typeAlias)
RFieldDescriptorBuilder & FieldDescription(const std::string &fieldDescription)
RNTupleSerializer::StreamerInfoMap_t BuildStreamerInfos() const
Get the streamer info records for custom classes. Currently requires the corresponding dictionaires t...
RResult< void > AddFieldProjection(DescriptorId_t sourceId, DescriptorId_t targetId)
void BeginHeaderExtension()
Mark the beginning of the header extension; any fields and columns added after a call to this functio...
RResult< void > EnsureFieldExists(DescriptorId_t fieldId) const
RResult< void > AddFieldLink(DescriptorId_t fieldId, DescriptorId_t linkId)
RResult< void > EnsureValidDescriptor() const
Checks whether invariants hold:
RResult< void > AddCluster(RClusterDescriptor &&clusterDesc)
void SetNTuple(const std::string_view name, const std::string_view description)
RResult< void > AddClusterGroup(RClusterGroupDescriptor &&clusterGroup)
RResult< void > AddColumn(RColumnDescriptor &&columnDesc)
void Reset()
Clears so-far stored clusters, fields, and columns and return to a pristine ntuple descriptor.
RResult< void > AddExtraTypeInfo(RExtraTypeInfoDescriptor &&extraTypeInfoDesc)
static RResult< StreamerInfoMap_t > DeserializeStreamerInfos(const std::string &extraTypeInfoContent)
std::map< Int_t, TVirtualStreamerInfo * > StreamerInfoMap_t
Records the partition of data into pages for a particular column in a particular cluster.
std::size_t ExtendToFitColumnRange(const RColumnRange &columnRange, const Internal::RColumnElementBase &element, std::size_t pageSize)
Extend this RPageRange to fit the given RColumnRange, i.e.
RPageInfoExtended Find(ClusterSize_t::ValueType idxInCluster) const
Find the page in the RPageRange that contains the given element. The element must exist.
Meta-data for a set of ntuple clusters.
std::unordered_map< DescriptorId_t, RPageRange > fPageRanges
RColumnRangeIterable GetColumnRangeIterable() const
Returns an iterator over pairs { columnId, columnRange }. The iteration order is unspecified.
NTupleSize_t fFirstEntryIndex
Clusters can be swapped by adjusting the entry offsets.
std::unordered_map< DescriptorId_t, RColumnRange > fColumnRanges
bool operator==(const RClusterDescriptor &other) const
Clusters are bundled in cluster groups.
std::uint64_t fMinEntry
The minimum first entry number of the clusters in the cluster group.
std::uint64_t fEntrySpan
Number of entries that are (partially for sharded clusters) covered by this cluster group.
std::uint64_t fPageListLength
Uncompressed size of the page list.
RClusterGroupDescriptor CloneSummary() const
std::uint32_t fNClusters
Number of clusters is always known even if the cluster IDs are not (yet) populated.
RNTupleLocator fPageListLocator
The page list that corresponds to the cluster group.
bool operator==(const RClusterGroupDescriptor &other) const
std::vector< DescriptorId_t > fClusterIds
The cluster IDs can be empty if the corresponding page list is not loaded.
Meta-data stored for every column of an ntuple.
std::uint16_t fBitsOnStorage
The size in bits of elements of this column.
DescriptorId_t fPhysicalColumnId
Usually identical to the logical column ID, except for alias columns where it references the shadowed...
DescriptorId_t fLogicalColumnId
The actual column identifier, which is the link to the corresponding field.
RColumnDescriptor Clone() const
Get a copy of the descriptor.
DescriptorId_t fFieldId
Every column belongs to one and only one field.
std::int64_t fFirstElementIndex
The absolute value specifies the index for the first stored element for this column.
std::uint16_t fRepresentationIndex
A field may use multiple column representations, which are numbered from zero to $m$.
EColumnType fType
The on-disk column type.
std::uint32_t fIndex
A field can be serialized into several columns, which are numbered from zero to $n$.
bool operator==(const RColumnDescriptor &other) const
Meta-data for a sets of columns; non-trivial column groups are used for sharded clusters.
std::unordered_set< DescriptorId_t > fPhysicalColumnIds
bool operator==(const RColumnGroupDescriptor &other) const
Base class for all ROOT issued exceptions.
Definition RError.hxx:78
Field specific extra type information from the header / extenstion header.
bool operator==(const RExtraTypeInfoDescriptor &other) const
std::uint32_t fTypeVersionFrom
Extra type information restricted to a certain version range of the type.
EExtraTypeInfoIds fContentId
Specifies the meaning of the extra information.
std::string fTypeName
The type name the extra information refers to; empty for RNTuple-wide extra information.
std::string fContent
The content format depends on the content ID and may be binary.
A field translates read and write calls from/to underlying columns to/from tree values.
const std::string & GetTypeAlias() const
const std::string & GetDescription() const
Get the field's description.
const std::string & GetFieldName() const
ENTupleStructure GetStructure() const
const std::string & GetTypeName() const
static constexpr int kTraitTypeChecksum
The TClass checksum is set and valid.
virtual std::uint32_t GetTypeVersion() const
Indicates an evolution of the C++ type itself.
virtual std::uint32_t GetTypeChecksum() const
Return the current TClass reported checksum of this class. Only valid if kTraitTypeChecksum is set.
virtual std::uint32_t GetFieldVersion() const
Indicates an evolution of the mapping scheme from C++ type to columns.
static RResult< std::unique_ptr< RFieldBase > > Create(const std::string &fieldName, const std::string &canonicalType, const std::string &typeAlias, bool fContinueOnError=false)
Factory method to resurrect a field from the stored on-disk type information.
Definition RField.cxx:622
std::size_t GetNRepetitions() const
static RResult< void > EnsureValidFieldName(std::string_view fieldName)
Check whether a given string is a valid field name.
Definition RField.cxx:905
Meta-data stored for every field of an ntuple.
std::vector< DescriptorId_t > fLinkIds
The pointers in the other direction from parent to children.
std::unique_ptr< RFieldBase > CreateField(const RNTupleDescriptor &ntplDesc) const
In general, we create a field simply from the C++ type name.
const std::string & GetFieldName() const
std::uint32_t fTypeVersion
The version of the C++ type itself.
std::uint32_t fColumnCardinality
The number of columns in the column representations of the field.
std::optional< std::uint32_t > fTypeChecksum
For custom classes, we store the ROOT TClass reported checksum to facilitate the use of I/O rules tha...
bool IsCustomClass() const
Tells if the field describes a user-defined class with a dictionary.
const std::vector< DescriptorId_t > & GetLogicalColumnIds() const
std::string fFieldDescription
Free text set by the user.
std::string fFieldName
The leaf name, not including parent fields.
std::uint32_t fFieldVersion
The version of the C++-type-to-column translation mechanics.
std::vector< DescriptorId_t > fLogicalColumnIds
The ordered list of columns attached to this field: first by representation index then by column inde...
DescriptorId_t fParentId
Establishes sub field relationships, such as classes and collections.
RFieldDescriptor Clone() const
Get a copy of the descriptor.
bool operator==(const RFieldDescriptor &other) const
std::string fTypeAlias
A typedef or using directive that resolved to the type name during field creation.
ENTupleStructure fStructure
The structural information carried by this field in the data model tree.
std::string fTypeName
The C++ type that was used when writing the field.
std::uint64_t fNRepetitions
The number of elements per entry for fixed-size arrays.
DescriptorId_t fProjectionSourceId
For projected fields, the source field ID.
Used to loop over all the clusters of an ntuple (in unspecified order)
Used to loop over all the cluster groups of an ntuple (in unspecified order)
std::vector< DescriptorId_t > fColumns
The descriptor ids of the columns ordered by field, representation, and column index.
RColumnDescriptorIterable(const RNTupleDescriptor &ntuple, const RFieldDescriptor &fieldDesc)
Used to loop over all the extra type info record of an ntuple (in unspecified order)
std::vector< DescriptorId_t > GetTopLevelFields(const RNTupleDescriptor &desc) const
Return a vector containing the IDs of the top-level fields defined in the extension header,...
The on-storage meta-data of an ntuple.
std::uint64_t fNPhysicalColumns
Updated by the descriptor builder when columns are added.
std::unordered_map< DescriptorId_t, RClusterDescriptor > fClusterDescriptors
May contain only a subset of all the available clusters, e.g.
std::uint64_t fGeneration
Once constructed by an RNTupleDescriptorBuilder, the descriptor is mostly immutable except for set of...
std::uint64_t fOnDiskFooterSize
Like fOnDiskHeaderSize, contains both cluster summaries and page locations.
std::uint64_t fNEntries
Updated by the descriptor builder when the cluster groups are added.
std::vector< RExtraTypeInfoDescriptor > fExtraTypeInfoDescriptors
NTupleSize_t GetNElements(DescriptorId_t physicalColumnId) const
std::unordered_map< DescriptorId_t, RClusterGroupDescriptor > fClusterGroupDescriptors
std::unique_ptr< RNTupleModel > CreateModel(const RCreateModelOptions &options=RCreateModelOptions()) const
Re-create the C++ model from the stored meta-data.
DescriptorId_t FindLogicalColumnId(DescriptorId_t fieldId, std::uint32_t columnIndex, std::uint16_t representationIndex) const
DescriptorId_t FindNextClusterId(DescriptorId_t clusterId) const
DescriptorId_t FindPrevClusterId(DescriptorId_t clusterId) const
std::unordered_map< DescriptorId_t, RColumnDescriptor > fColumnDescriptors
std::unique_ptr< RNTupleDescriptor > Clone() const
RColumnDescriptorIterable GetColumnIterable() const
DescriptorId_t FindClusterId(DescriptorId_t physicalColumnId, NTupleSize_t index) const
std::uint64_t fNClusters
Updated by the descriptor builder when the cluster groups are added.
std::string fName
The ntuple name needs to be unique in a given storage location (file)
std::unordered_map< DescriptorId_t, RFieldDescriptor > fFieldDescriptors
DescriptorId_t GetFieldZeroId() const
Returns the logical parent of all top-level NTuple data fields.
DescriptorId_t FindPhysicalColumnId(DescriptorId_t fieldId, std::uint32_t columnIndex, std::uint16_t representationIndex) const
std::uint64_t fOnDiskHeaderXxHash3
Set by the descriptor builder when deserialized.
bool operator==(const RNTupleDescriptor &other) const
std::string GetQualifiedFieldName(DescriptorId_t fieldId) const
Walks up the parents of the field ID and returns a field name of the form a.b.c.d In case of invalid ...
RResult< void > AddClusterGroupDetails(DescriptorId_t clusterGroupId, std::vector< RClusterDescriptor > &clusterDescs)
Methods to load and drop cluster group details (cluster IDs and page locations)
DescriptorId_t FindFieldId(std::string_view fieldName, DescriptorId_t parentId) const
const RColumnDescriptor & GetColumnDescriptor(DescriptorId_t columnId) const
RExtraTypeInfoDescriptorIterable GetExtraTypeInfoIterable() const
const RFieldDescriptor & GetFieldDescriptor(DescriptorId_t fieldId) const
RResult< void > DropClusterGroupDetails(DescriptorId_t clusterGroupId)
std::unique_ptr< RHeaderExtension > fHeaderExtension
RClusterGroupDescriptorIterable GetClusterGroupIterable() const
RClusterDescriptorIterable GetClusterIterable() const
std::string fDescription
Free text from the user.
RFieldDescriptorIterable GetTopLevelFields() const
DescriptorId_t fFieldZeroId
Set by the descriptor builder.
const RHeaderExtension * GetHeaderExtension() const
Return header extension information; if the descriptor does not have a header extension,...
std::uint64_t fOnDiskHeaderSize
Set by the descriptor builder when deserialized.
RFieldDescriptorIterable GetFieldIterable(const RFieldDescriptor &fieldDesc) const
std::vector< std::uint64_t > GetFeatureFlags() const
static std::unique_ptr< RNTupleModel > Create()
The class is used as a return type for operations that can fail; wraps a value of type T or an RError...
Definition RError.hxx:194
static TClass * GetClass(const char *name, Bool_t load=kTRUE, Bool_t silent=kFALSE)
Static method returning pointer to TClass of the specified class name.
Definition TClass.cxx:2968
struct void * fTypeName
Definition cppyy.h:9
const Int_t n
Definition legend1.C:16
std::uint64_t NTupleSize_t
Integer type long enough to hold the maximum number of entries in a column.
constexpr int kUnknownCompressionSettings
std::uint64_t DescriptorId_t
Distriniguishes elements of the same type within a descriptor, e.g. different fields.
constexpr NTupleSize_t kInvalidNTupleIndex
constexpr DescriptorId_t kInvalidDescriptorId
The window of element indexes of a particular column in a particular cluster.
bool fIsSuppressed
Suppressed columns have an empty page range and unknown compression settings.
int fCompressionSettings
The usual format for ROOT compression settings (see Compression.h).
ClusterSize_t fNElements
The number of column elements in the cluster.
We do not need to store the element size / uncompressed page size because we know to which column the...
std::uint32_t fNElements
The sum of the elements of all the pages must match the corresponding fNElements field in fColumnRang...
Wrap the integer in a struct in order to avoid template specialization clash with std::uint64_t.
bool fReconstructProjections
If set to true, projected fields will be reconstructed as such.