Logo ROOT  
Reference Guide
 
Loading...
Searching...
No Matches
RNTupleDescriptor.cxx
Go to the documentation of this file.
1/// \file RNTupleDescriptor.cxx
2/// \ingroup NTuple ROOT7
3/// \author Jakob Blomer <jblomer@cern.ch>
4/// \author Javier Lopez-Gomez <javier.lopez.gomez@cern.ch>
5/// \date 2018-10-04
6/// \warning This is part of the ROOT 7 prototype! It will change without notice. It might trigger earthquakes. Feedback
7/// is welcome!
8
9/*************************************************************************
10 * Copyright (C) 1995-2019, Rene Brun and Fons Rademakers. *
11 * All rights reserved. *
12 * *
13 * For the licensing terms see $ROOTSYS/LICENSE. *
14 * For the list of contributors see $ROOTSYS/README/CREDITS. *
15 *************************************************************************/
16
17#include <ROOT/RError.hxx>
18#include <ROOT/RField.hxx>
20#include <ROOT/RNTupleModel.hxx>
21#include <ROOT/RNTupleUtil.hxx>
22#include <ROOT/RPage.hxx>
23#include <string_view>
24
25#include <RZip.h>
26#include <TError.h>
27
28#include <algorithm>
29#include <cstdint>
30#include <deque>
31#include <iostream>
32#include <set>
33#include <utility>
34
35
37{
38 return fFieldId == other.fFieldId && fFieldVersion == other.fFieldVersion && fTypeVersion == other.fTypeVersion &&
40 fTypeName == other.fTypeName && fTypeAlias == other.fTypeAlias && fNRepetitions == other.fNRepetitions &&
41 fStructure == other.fStructure && fParentId == other.fParentId && fLinkIds == other.fLinkIds;
42}
43
46{
48 clone.fFieldId = fFieldId;
49 clone.fFieldVersion = fFieldVersion;
50 clone.fTypeVersion = fTypeVersion;
51 clone.fFieldName = fFieldName;
52 clone.fFieldDescription = fFieldDescription;
53 clone.fTypeName = fTypeName;
54 clone.fTypeAlias = fTypeAlias;
55 clone.fNRepetitions = fNRepetitions;
56 clone.fStructure = fStructure;
57 clone.fParentId = fParentId;
58 clone.fLinkIds = fLinkIds;
59 return clone;
60}
61
62std::unique_ptr<ROOT::Experimental::RFieldBase>
64{
65 if (GetTypeName().empty()) {
66 // For untyped records or collections, we have no class available to collect all the sub fields.
67 // Therefore, we create an untyped record field as an artificial binder for the record itself, and in the case of
68 // collections, its items.
69 std::vector<std::unique_ptr<RFieldBase>> memberFields;
70 for (auto id : fLinkIds) {
71 const auto &memberDesc = ntplDesc.GetFieldDescriptor(id);
72 memberFields.emplace_back(memberDesc.CreateField(ntplDesc));
73 }
74 if (GetStructure() == ENTupleStructure::kRecord) {
75 auto recordField = std::make_unique<RRecordField>(GetFieldName(), memberFields);
76 recordField->SetOnDiskId(fFieldId);
77 return recordField;
78 } else if (GetStructure() == ENTupleStructure::kCollection) {
79 auto recordField = std::make_unique<RRecordField>("_0", memberFields);
80 auto collectionField = std::make_unique<RVectorField>(GetFieldName(), std::move(recordField));
81 collectionField->SetOnDiskId(fFieldId);
82 return collectionField;
83 } else {
84 throw RException(R__FAIL("unknown field type for field \"" + GetFieldName() + "\""));
85 }
86 }
87
88 auto field = RFieldBase::Create(GetFieldName(), GetTypeAlias().empty() ? GetTypeName() : GetTypeAlias()).Unwrap();
89 field->SetOnDiskId(fFieldId);
90 for (auto &f : *field)
91 f.SetOnDiskId(ntplDesc.FindFieldId(f.GetFieldName(), f.GetParent()->GetOnDiskId()));
92 return field;
93}
94
95
96////////////////////////////////////////////////////////////////////////////////
97
98
100{
101 return fLogicalColumnId == other.fLogicalColumnId && fPhysicalColumnId == other.fPhysicalColumnId &&
102 fModel == other.fModel && fFieldId == other.fFieldId && fIndex == other.fIndex;
103}
104
105
108{
110 clone.fLogicalColumnId = fLogicalColumnId;
111 clone.fPhysicalColumnId = fPhysicalColumnId;
112 clone.fModel = fModel;
113 clone.fFieldId = fFieldId;
114 clone.fIndex = fIndex;
115 clone.fFirstElementIndex = fFirstElementIndex;
116 return clone;
117}
118
119
120////////////////////////////////////////////////////////////////////////////////
121
124{
125 // TODO(jblomer): binary search
126 RPageInfo pageInfo;
127 decltype(idxInCluster) firstInPage = 0;
128 NTupleSize_t pageNo = 0;
129 for (const auto &pi : fPageInfos) {
130 if (firstInPage + pi.fNElements > idxInCluster) {
131 pageInfo = pi;
132 break;
133 }
134 firstInPage += pi.fNElements;
135 ++pageNo;
136 }
137 R__ASSERT(firstInPage <= idxInCluster);
138 R__ASSERT((firstInPage + pageInfo.fNElements) > idxInCluster);
139 return RPageInfoExtended{pageInfo, firstInPage, pageNo};
140}
141
142std::size_t
144 const Internal::RColumnElementBase &element,
145 std::size_t pageSize)
146{
147 R__ASSERT(fPhysicalColumnId == columnRange.fPhysicalColumnId);
148
149 const auto nElements = std::accumulate(fPageInfos.begin(), fPageInfos.end(), 0U,
150 [](std::size_t n, const auto &PI) { return n + PI.fNElements; });
151 const auto nElementsRequired = static_cast<std::uint64_t>(columnRange.fNElements);
152
153 if (nElementsRequired == nElements)
154 return 0U;
155 R__ASSERT((nElementsRequired > nElements) && "invalid attempt to shrink RPageRange");
156
157 std::vector<RPageInfo> pageInfos;
158 // Synthesize new `RPageInfo`s as needed
159 const std::uint64_t nElementsPerPage = pageSize / element.GetSize();
160 R__ASSERT(nElementsPerPage > 0);
161 for (auto nRemainingElements = nElementsRequired - nElements; nRemainingElements > 0;) {
163 PI.fNElements = std::min(nElementsPerPage, nRemainingElements);
164 PI.fLocator.fType = RNTupleLocator::kTypePageZero;
165 PI.fLocator.fBytesOnStorage = element.GetPackedSize(PI.fNElements);
166 pageInfos.emplace_back(PI);
167 nRemainingElements -= PI.fNElements;
168 }
169
170 pageInfos.insert(pageInfos.end(), std::make_move_iterator(fPageInfos.begin()),
171 std::make_move_iterator(fPageInfos.end()));
172 std::swap(fPageInfos, pageInfos);
173 return nElementsRequired - nElements;
174}
175
177{
178 return fClusterId == other.fClusterId && fFirstEntryIndex == other.fFirstEntryIndex &&
179 fNEntries == other.fNEntries && fColumnRanges == other.fColumnRanges && fPageRanges == other.fPageRanges;
180}
181
182
183std::unordered_set<ROOT::Experimental::DescriptorId_t> ROOT::Experimental::RClusterDescriptor::GetColumnIds() const
184{
185 std::unordered_set<DescriptorId_t> result;
186 for (const auto &x : fColumnRanges)
187 result.emplace(x.first);
188 return result;
189}
190
192{
193 std::uint64_t nbytes = 0;
194 for (const auto &pr : fPageRanges) {
195 for (const auto &pi : pr.second.fPageInfos) {
196 nbytes += pi.fLocator.fBytesOnStorage;
197 }
198 }
199 return nbytes;
200}
201
203{
205 clone.fClusterId = fClusterId;
206 clone.fFirstEntryIndex = fFirstEntryIndex;
207 clone.fNEntries = fNEntries;
208 clone.fColumnRanges = fColumnRanges;
209 for (const auto &d : fPageRanges)
210 clone.fPageRanges.emplace(d.first, d.second.Clone());
211 return clone;
212}
213
214////////////////////////////////////////////////////////////////////////////////
215
216
218{
219 // clang-format off
220 return fName == other.fName &&
221 fDescription == other.fDescription &&
222 fNEntries == other.fNEntries &&
223 fGeneration == other.fGeneration &&
224 fFieldDescriptors == other.fFieldDescriptors &&
225 fColumnDescriptors == other.fColumnDescriptors &&
226 fClusterGroupDescriptors == other.fClusterGroupDescriptors &&
227 fClusterDescriptors == other.fClusterDescriptors;
228 // clang-format on
229}
230
233{
235 for (const auto &cd : fClusterDescriptors) {
236 if (!cd.second.ContainsColumn(physicalColumnId))
237 continue;
238 auto columnRange = cd.second.GetColumnRange(physicalColumnId);
239 result = std::max(result, columnRange.fFirstElementIndex + columnRange.fNElements);
240 }
241 return result;
242}
243
244
247{
248 std::string leafName(fieldName);
249 auto posDot = leafName.find_last_of('.');
250 if (posDot != std::string::npos) {
251 auto parentName = leafName.substr(0, posDot);
252 leafName = leafName.substr(posDot + 1);
253 parentId = FindFieldId(parentName, parentId);
254 }
255 for (const auto &fd : fFieldDescriptors) {
256 if (fd.second.GetParentId() == parentId && fd.second.GetFieldName() == leafName)
257 return fd.second.GetId();
258 }
260}
261
262
264{
265 if (fieldId == kInvalidDescriptorId)
266 return "";
267
268 const auto &fieldDescriptor = fFieldDescriptors.at(fieldId);
269 auto prefix = GetQualifiedFieldName(fieldDescriptor.GetParentId());
270 if (prefix.empty())
271 return fieldDescriptor.GetFieldName();
272 return prefix + "." + fieldDescriptor.GetFieldName();
273}
274
275
278{
279 return FindFieldId("", kInvalidDescriptorId);
280}
281
282
285{
286 return FindFieldId(fieldName, GetFieldZeroId());
287}
288
291{
292 for (const auto &cd : fColumnDescriptors) {
293 if (cd.second.GetFieldId() == fieldId && cd.second.GetIndex() == columnIndex)
294 return cd.second.GetLogicalId();
295 }
297}
298
301{
302 auto logicalId = FindLogicalColumnId(fieldId, columnIndex);
303 if (logicalId == kInvalidDescriptorId)
305 return GetColumnDescriptor(logicalId).GetPhysicalId();
306}
307
310{
311 // TODO(jblomer): binary search?
312 for (const auto &cd : fClusterDescriptors) {
313 if (!cd.second.ContainsColumn(physicalColumnId))
314 continue;
315 auto columnRange = cd.second.GetColumnRange(physicalColumnId);
316 if (columnRange.Contains(index))
317 return cd.second.GetId();
318 }
320}
321
322
323// TODO(jblomer): fix for cases of sharded clasters
326{
327 const auto &clusterDesc = GetClusterDescriptor(clusterId);
328 auto firstEntryInNextCluster = clusterDesc.GetFirstEntryIndex() + clusterDesc.GetNEntries();
329 // TODO(jblomer): binary search?
330 for (const auto &cd : fClusterDescriptors) {
331 if (cd.second.GetFirstEntryIndex() == firstEntryInNextCluster)
332 return cd.second.GetId();
333 }
335}
336
337
338// TODO(jblomer): fix for cases of sharded clasters
341{
342 const auto &clusterDesc = GetClusterDescriptor(clusterId);
343 // TODO(jblomer): binary search?
344 for (const auto &cd : fClusterDescriptors) {
345 if (cd.second.GetFirstEntryIndex() + cd.second.GetNEntries() == clusterDesc.GetFirstEntryIndex())
346 return cd.second.GetId();
347 }
349}
350
351std::vector<ROOT::Experimental::DescriptorId_t>
353{
354 auto fieldZeroId = desc.GetFieldZeroId();
355
356 std::vector<DescriptorId_t> fields;
357 for (const DescriptorId_t fieldId : fFields) {
358 if (desc.GetFieldDescriptor(fieldId).GetParentId() == fieldZeroId)
359 fields.emplace_back(fieldId);
360 }
361 return fields;
362}
363
365 for (unsigned int i = 0; true; ++i) {
366 auto logicalId = fNTuple.FindLogicalColumnId(fieldId, i);
367 if (logicalId == kInvalidDescriptorId)
368 break;
369 fColumns.emplace_back(logicalId);
370 }
371}
372
374 const RNTupleDescriptor &ntuple, const RFieldDescriptor &field)
375 : fNTuple(ntuple)
376{
377 CollectColumnIds(field.GetId());
378}
379
381 const RNTupleDescriptor &ntuple)
382 : fNTuple(ntuple)
383{
384 std::deque<DescriptorId_t> fieldIdQueue{ntuple.GetFieldZeroId()};
385
386 while (!fieldIdQueue.empty()) {
387 auto currFieldId = fieldIdQueue.front();
388 fieldIdQueue.pop_front();
389
390 CollectColumnIds(currFieldId);
391
392 for (const auto &field : ntuple.GetFieldIterable(currFieldId)) {
393 auto fieldId = field.GetId();
394 fieldIdQueue.push_back(fieldId);
395 }
396 }
397}
398
400{
401 std::vector<std::uint64_t> result;
402 unsigned int base = 0;
403 std::uint64_t flags = 0;
404 for (auto f : fFeatureFlags) {
405 if ((f > 0) && ((f % 64) == 0))
406 throw RException(R__FAIL("invalid feature flag: " + std::to_string(f)));
407 while (f > base + 64) {
408 result.emplace_back(flags);
409 flags = 0;
410 base += 64;
411 }
412 f -= base;
413 flags |= 1 << f;
414 }
415 result.emplace_back(flags);
416 return result;
417}
418
421 std::vector<RClusterDescriptor> &clusterDescs)
422{
423 auto iter = fClusterGroupDescriptors.find(clusterGroupId);
424 if (iter == fClusterGroupDescriptors.end())
425 return R__FAIL("invalid attempt to add details of unknown cluster group");
426 if (iter->second.HasClusterDetails())
427 return R__FAIL("invalid attempt to re-populate cluster group details");
428 if (iter->second.GetNClusters() != clusterDescs.size())
429 return R__FAIL("mismatch of number of clusters");
430
431 std::vector<DescriptorId_t> clusterIds;
432 for (unsigned i = 0; i < clusterDescs.size(); ++i) {
433 clusterIds.emplace_back(clusterDescs[i].GetId());
434 auto [_, success] = fClusterDescriptors.emplace(clusterIds.back(), std::move(clusterDescs[i]));
435 if (!success) {
436 return R__FAIL("invalid attempt to re-populate existing cluster");
437 }
438 }
439 auto cgBuilder = Internal::RClusterGroupDescriptorBuilder::FromSummary(iter->second);
440 cgBuilder.AddClusters(clusterIds);
441 iter->second = cgBuilder.MoveDescriptor().Unwrap();
442 return RResult<void>::Success();
443}
444
447{
448 auto iter = fClusterGroupDescriptors.find(clusterGroupId);
449 if (iter == fClusterGroupDescriptors.end())
450 return R__FAIL("invalid attempt to drop cluster details of unknown cluster group");
451 if (!iter->second.HasClusterDetails())
452 return R__FAIL("invalid attempt to drop details of cluster group summary");
453
454 for (auto clusterId : iter->second.GetClusterIds())
455 fClusterDescriptors.erase(clusterId);
456 iter->second = iter->second.CloneSummary();
457 return RResult<void>::Success();
458}
459
460std::unique_ptr<ROOT::Experimental::RNTupleModel> ROOT::Experimental::RNTupleDescriptor::CreateModel() const
461{
462 auto fieldZero = std::make_unique<RFieldZero>();
463 fieldZero->SetOnDiskId(GetFieldZeroId());
464 auto model = RNTupleModel::Create(std::move(fieldZero));
465 for (const auto &topDesc : GetTopLevelFields())
466 model->AddField(topDesc.CreateField(*this));
467 model->Freeze();
468 return model;
469}
470
471std::unique_ptr<ROOT::Experimental::RNTupleDescriptor> ROOT::Experimental::RNTupleDescriptor::Clone() const
472{
473 auto clone = std::make_unique<RNTupleDescriptor>();
474 clone->fName = fName;
475 clone->fDescription = fDescription;
476 clone->fOnDiskHeaderXxHash3 = fOnDiskHeaderXxHash3;
477 clone->fOnDiskHeaderSize = fOnDiskHeaderSize;
478 clone->fOnDiskFooterSize = fOnDiskFooterSize;
479 clone->fNEntries = fNEntries;
480 clone->fNClusters = fNClusters;
481 clone->fNPhysicalColumns = fNPhysicalColumns;
482 clone->fGeneration = fGeneration;
483 for (const auto &d : fFieldDescriptors)
484 clone->fFieldDescriptors.emplace(d.first, d.second.Clone());
485 for (const auto &d : fColumnDescriptors)
486 clone->fColumnDescriptors.emplace(d.first, d.second.Clone());
487 for (const auto &d : fClusterGroupDescriptors)
488 clone->fClusterGroupDescriptors.emplace(d.first, d.second.Clone());
489 for (const auto &d : fClusterDescriptors)
490 clone->fClusterDescriptors.emplace(d.first, d.second.Clone());
492 clone->fHeaderExtension = std::make_unique<RHeaderExtension>(*fHeaderExtension);
493 return clone;
494}
495
496////////////////////////////////////////////////////////////////////////////////
497
499{
500 return fColumnGroupId == other.fColumnGroupId && fPhysicalColumnIds == other.fPhysicalColumnIds;
501}
502
503////////////////////////////////////////////////////////////////////////////////
504
506{
507 return fClusterGroupId == other.fClusterGroupId && fClusterIds == other.fClusterIds &&
508 fMinEntry == other.fMinEntry && fEntrySpan == other.fEntrySpan && fNClusters == other.fNClusters;
509}
510
512{
514 clone.fClusterGroupId = fClusterGroupId;
515 clone.fPageListLocator = fPageListLocator;
516 clone.fPageListLength = fPageListLength;
517 clone.fMinEntry = fMinEntry;
518 clone.fEntrySpan = fEntrySpan;
519 clone.fNClusters = fNClusters;
520 return clone;
521}
522
524{
525 RClusterGroupDescriptor clone = CloneSummary();
526 clone.fClusterIds = fClusterIds;
527 return clone;
528}
529
530////////////////////////////////////////////////////////////////////////////////
531
533 DescriptorId_t physicalId, std::uint64_t firstElementIndex, std::uint32_t compressionSettings,
534 const RClusterDescriptor::RPageRange &pageRange)
535{
536 if (physicalId != pageRange.fPhysicalColumnId)
537 return R__FAIL("column ID mismatch");
538 if (fCluster.fPageRanges.count(physicalId) > 0)
539 return R__FAIL("column ID conflict");
540 RClusterDescriptor::RColumnRange columnRange{physicalId, firstElementIndex, ClusterSize_t{0}};
541 columnRange.fCompressionSettings = compressionSettings;
542 for (const auto &pi : pageRange.fPageInfos) {
543 columnRange.fNElements += pi.fNElements;
544 }
545 fCluster.fPageRanges[physicalId] = pageRange.Clone();
546 fCluster.fColumnRanges[physicalId] = columnRange;
547 return RResult<void>::Success();
548}
549
552{
553 /// Carries out a depth-first traversal of a field subtree rooted at `rootFieldId`. For each field, `visitField` is
554 /// called passing the field ID and the number of overall repetitions, taking into account the repetitions of each
555 /// parent field in the hierarchy.
556 auto fnTraverseSubtree = [&](DescriptorId_t rootFieldId, std::uint64_t nRepetitionsAtThisLevel,
557 const auto &visitField, const auto &enterSubtree) -> void {
558 visitField(rootFieldId, nRepetitionsAtThisLevel);
559 for (const auto &f : desc.GetFieldIterable(rootFieldId)) {
560 const std::uint64_t nRepetitions = std::max(f.GetNRepetitions(), std::uint64_t{1U}) * nRepetitionsAtThisLevel;
561 enterSubtree(f.GetId(), nRepetitions, visitField, enterSubtree);
562 }
563 };
564
565 // Deferred columns can only be part of the header extension
566 auto xHeader = desc.GetHeaderExtension();
567 if (!xHeader)
568 return *this;
569
570 // Ensure that all columns in the header extension have their associated `R(Column|Page)Range`
571 for (const auto &topLevelFieldId : xHeader->GetTopLevelFields(desc)) {
572 fnTraverseSubtree(
573 topLevelFieldId, std::max(desc.GetFieldDescriptor(topLevelFieldId).GetNRepetitions(), std::uint64_t{1U}),
574 [&](DescriptorId_t fieldId, std::uint64_t nRepetitions) {
575 for (const auto &c : desc.GetColumnIterable(fieldId)) {
576 const DescriptorId_t physicalId = c.GetPhysicalId();
577 auto &columnRange = fCluster.fColumnRanges[physicalId];
578 auto &pageRange = fCluster.fPageRanges[physicalId];
579 // Initialize a RColumnRange for `physicalId` if it was not there. Columns that were created during model
580 // extension won't have on-disk metadata for the clusters that were already committed before the model
581 // was extended. Therefore, these need to be synthetically initialized upon reading.
582 if (columnRange.fPhysicalColumnId == kInvalidDescriptorId) {
583 columnRange.fPhysicalColumnId = physicalId;
584 columnRange.fFirstElementIndex = 0;
585 columnRange.fNElements = 0;
586
587 pageRange.fPhysicalColumnId = physicalId;
588 }
589 // Fixup the RColumnRange and RPageRange in deferred columns. We know what the first element index and
590 // number of elements should have been if the column was not deferred; fix those and let
591 // `ExtendToFitColumnRange()` synthesize RPageInfos accordingly.
592 // Note that a column whose first element index is != 0 already met the criteria of
593 // `RFieldBase::EntryToColumnElementIndex()`, i.e. it is a principal column reachable from the field zero
594 // excluding subfields of collection and variant fields.
595 if (c.IsDeferredColumn()) {
596 columnRange.fFirstElementIndex = fCluster.GetFirstEntryIndex() * nRepetitions;
597 columnRange.fNElements = fCluster.GetNEntries() * nRepetitions;
598 const auto element = Internal::RColumnElementBase::Generate<void>(c.GetModel().GetType());
599 pageRange.ExtendToFitColumnRange(columnRange, *element, Internal::RPage::kPageZeroSize);
600 }
601 }
602 },
603 fnTraverseSubtree);
604 }
605 return *this;
606}
607
610{
611 if (fCluster.fClusterId == kInvalidDescriptorId)
612 return R__FAIL("unset cluster ID");
613 if (fCluster.fNEntries == 0)
614 return R__FAIL("empty cluster");
615 for (const auto &pr : fCluster.fPageRanges) {
616 if (fCluster.fColumnRanges.count(pr.first) == 0) {
617 return R__FAIL("missing column range");
618 }
619 }
621 std::swap(result, fCluster);
622 return result;
623}
624
625////////////////////////////////////////////////////////////////////////////////
626
629 const RClusterGroupDescriptor &clusterGroupDesc)
630{
632 builder.ClusterGroupId(clusterGroupDesc.GetId())
633 .PageListLocator(clusterGroupDesc.GetPageListLocator())
634 .PageListLength(clusterGroupDesc.GetPageListLength())
635 .MinEntry(clusterGroupDesc.GetMinEntry())
636 .EntrySpan(clusterGroupDesc.GetEntrySpan())
637 .NClusters(clusterGroupDesc.GetNClusters());
638 return builder;
639}
640
643{
644 if (fClusterGroup.fClusterGroupId == kInvalidDescriptorId)
645 return R__FAIL("unset cluster group ID");
647 std::swap(result, fClusterGroup);
648 return result;
649}
650
651////////////////////////////////////////////////////////////////////////////////
652
655{
656 if (fColumnGroup.fColumnGroupId == kInvalidDescriptorId)
657 return R__FAIL("unset column group ID");
659 std::swap(result, fColumnGroup);
660 return result;
661}
662
663////////////////////////////////////////////////////////////////////////////////
664
667{
668 if (fDescriptor.fFieldDescriptors.count(fieldId) == 0)
669 return R__FAIL("field with id '" + std::to_string(fieldId) + "' doesn't exist");
670 return RResult<void>::Success();
671}
672
674{
675 // Reuse field name validity check
676 auto validName = RFieldBase::EnsureValidFieldName(fDescriptor.GetName());
677 if (!validName) {
678 return R__FORWARD_ERROR(validName);
679 }
680 // open-ended list of invariant checks
681 for (const auto& key_val: fDescriptor.fFieldDescriptors) {
682 const auto& id = key_val.first;
683 const auto& desc = key_val.second;
684 // parent not properly set
685 if (id != DescriptorId_t(0) && desc.GetParentId() == kInvalidDescriptorId) {
686 return R__FAIL("field with id '" + std::to_string(id) + "' has an invalid parent id");
687 }
688 }
689 return RResult<void>::Success();
690}
691
693{
695 std::swap(result, fDescriptor);
696 return result;
697}
698
700 const std::string_view description)
701{
702 fDescriptor.fName = std::string(name);
703 fDescriptor.fDescription = std::string(description);
704}
705
707{
708 if (flag % 64 == 0)
709 throw RException(R__FAIL("invalid feature flag: " + std::to_string(flag)));
710 fDescriptor.fFeatureFlags.insert(flag);
711}
712
715{
716 if (fColumn.GetLogicalId() == kInvalidDescriptorId)
717 return R__FAIL("invalid logical column id");
718 if (fColumn.GetPhysicalId() == kInvalidDescriptorId)
719 return R__FAIL("invalid physical column id");
720 if (fColumn.GetModel().GetType() == EColumnType::kUnknown)
721 return R__FAIL("invalid column model");
722 if (fColumn.GetFieldId() == kInvalidDescriptorId)
723 return R__FAIL("invalid field id, dangling column");
724 return fColumn.Clone();
725}
726
728 : fField(fieldDesc.Clone())
729{
731 fField.fLinkIds = {};
732}
733
736{
737 RFieldDescriptorBuilder fieldDesc;
738 fieldDesc.FieldVersion(field.GetFieldVersion())
740 .FieldName(field.GetFieldName())
742 .TypeName(field.GetTypeName())
743 .TypeAlias(field.GetTypeAlias())
744 .Structure(field.GetStructure())
746 return fieldDesc;
747}
748
751{
752 if (fField.GetId() == kInvalidDescriptorId) {
753 return R__FAIL("invalid field id");
754 }
755 if (fField.GetStructure() == ENTupleStructure::kInvalid) {
756 return R__FAIL("invalid field structure");
757 }
758 // FieldZero is usually named "" and would be a false positive here
759 if (fField.GetParentId() != kInvalidDescriptorId) {
760 auto validName = RFieldBase::EnsureValidFieldName(fField.GetFieldName());
761 if (!validName) {
762 return R__FORWARD_ERROR(validName);
763 }
764 }
765 return fField.Clone();
766}
767
769{
770 fDescriptor.fFieldDescriptors.emplace(fieldDesc.GetId(), fieldDesc.Clone());
771 if (fDescriptor.fHeaderExtension)
772 fDescriptor.fHeaderExtension->AddFieldId(fieldDesc.GetId());
773}
774
777{
778 auto fieldExists = RResult<void>::Success();
779 if (!(fieldExists = EnsureFieldExists(fieldId)))
780 return R__FORWARD_ERROR(fieldExists);
781 if (!(fieldExists = EnsureFieldExists(linkId)))
782 return R__FAIL("child field with id '" + std::to_string(linkId) + "' doesn't exist in NTuple");
783
784 if (linkId == fDescriptor.GetFieldZeroId()) {
785 return R__FAIL("cannot make FieldZero a child field");
786 }
787 // fail if field already has another valid parent
788 auto parentId = fDescriptor.fFieldDescriptors.at(linkId).GetParentId();
789 if ((parentId != kInvalidDescriptorId) && (parentId != fieldId)) {
790 return R__FAIL("field '" + std::to_string(linkId) + "' already has a parent ('" +
791 std::to_string(parentId) + ")");
792 }
793 if (fieldId == linkId) {
794 return R__FAIL("cannot make field '" + std::to_string(fieldId) + "' a child of itself");
795 }
796 fDescriptor.fFieldDescriptors.at(linkId).fParentId = fieldId;
797 fDescriptor.fFieldDescriptors.at(fieldId).fLinkIds.push_back(linkId);
798 return RResult<void>::Success();
799}
800
802 DescriptorId_t physicalId,
803 DescriptorId_t fieldId,
804 const RColumnModel &model, std::uint32_t index,
805 std::uint64_t firstElementIdx)
806{
808 c.fLogicalColumnId = logicalId;
809 c.fPhysicalColumnId = physicalId;
810 c.fFieldId = fieldId;
811 c.fModel = model;
812 c.fIndex = index;
813 c.fFirstElementIndex = firstElementIdx;
814 if (!c.IsAliasColumn())
815 fDescriptor.fNPhysicalColumns++;
816 if (fDescriptor.fHeaderExtension)
817 fDescriptor.fHeaderExtension->AddColumn(/*isAliasColumn=*/c.IsAliasColumn());
818 fDescriptor.fColumnDescriptors.emplace(logicalId, std::move(c));
819}
820
823{
824 const auto fieldId = columnDesc.GetFieldId();
825 const auto index = columnDesc.GetIndex();
826
827 auto fieldExists = EnsureFieldExists(fieldId);
828 if (!fieldExists)
829 return R__FORWARD_ERROR(fieldExists);
830 if (fDescriptor.FindLogicalColumnId(fieldId, index) != kInvalidDescriptorId) {
831 return R__FAIL("column index clash");
832 }
833 if (index > 0) {
834 if (fDescriptor.FindLogicalColumnId(fieldId, index - 1) == kInvalidDescriptorId)
835 return R__FAIL("out of bounds column index");
836 }
837 if (columnDesc.IsAliasColumn()) {
838 if (columnDesc.GetModel() != fDescriptor.GetColumnDescriptor(columnDesc.GetPhysicalId()).GetModel())
839 return R__FAIL("alias column type mismatch");
840 }
841
842 auto logicalId = columnDesc.GetLogicalId();
843 if (!columnDesc.IsAliasColumn())
844 fDescriptor.fNPhysicalColumns++;
845 fDescriptor.fColumnDescriptors.emplace(logicalId, std::move(columnDesc));
846 if (fDescriptor.fHeaderExtension)
847 fDescriptor.fHeaderExtension->AddColumn(/*isAliasColumn=*/columnDesc.IsAliasColumn());
848
849 return RResult<void>::Success();
850}
851
854{
855 const auto id = clusterGroup.GetId();
856 if (fDescriptor.fClusterGroupDescriptors.count(id) > 0)
857 return R__FAIL("cluster group id clash");
858 fDescriptor.fNEntries = std::max(fDescriptor.fNEntries, clusterGroup.GetMinEntry() + clusterGroup.GetEntrySpan());
859 fDescriptor.fNClusters += clusterGroup.GetNClusters();
860 fDescriptor.fClusterGroupDescriptors.emplace(id, std::move(clusterGroup));
861 return RResult<void>::Success();
862}
863
865{
866 fDescriptor.fName = "";
867 fDescriptor.fDescription = "";
868 fDescriptor.fFieldDescriptors.clear();
869 fDescriptor.fColumnDescriptors.clear();
870 fDescriptor.fClusterDescriptors.clear();
871 fDescriptor.fClusterGroupDescriptors.clear();
872 fDescriptor.fHeaderExtension.reset();
873}
874
876{
877 if (!fDescriptor.fHeaderExtension)
878 fDescriptor.fHeaderExtension = std::make_unique<RNTupleDescriptor::RHeaderExtension>();
879}
880
883{
884 auto clusterId = clusterDesc.GetId();
885 if (fDescriptor.fClusterDescriptors.count(clusterId) > 0)
886 return R__FAIL("cluster id clash");
887 fDescriptor.fClusterDescriptors.emplace(clusterId, std::move(clusterDesc));
888 return RResult<void>::Success();
889}
#define R__FORWARD_ERROR(res)
Short-hand to return an RResult<T> in an error state (i.e. after checking)
Definition RError.hxx:294
#define R__FAIL(msg)
Short-hand to return an RResult<T> in an error state; the RError is implicitly converted into RResult...
Definition RError.hxx:290
#define d(i)
Definition RSha256.hxx:102
#define f(i)
Definition RSha256.hxx:104
#define c(i)
Definition RSha256.hxx:101
TObject * clone(const char *newname) const override
Definition RooChi2Var.h:9
#define PI
#define R__ASSERT(e)
Definition TError.h:118
Option_t Option_t TPoint TPoint const char GetTextMagnitude GetFillStyle GetLineColor GetLineWidth GetMarkerStyle GetTextAlign GetTextColor GetTextSize void char Point_t Rectangle_t WindowAttributes_t Float_t Float_t Float_t Int_t Int_t UInt_t UInt_t Rectangle_t result
Option_t Option_t TPoint TPoint const char GetTextMagnitude GetFillStyle GetLineColor GetLineWidth GetMarkerStyle GetTextAlign GetTextColor GetTextSize void char Point_t Rectangle_t WindowAttributes_t index
char name[80]
Definition TGX11.cxx:110
#define _(A, B)
Definition cfortran.h:108
A helper class for piece-wise construction of an RClusterDescriptor.
RClusterDescriptorBuilder & AddDeferredColumnRanges(const RNTupleDescriptor &desc)
Add column and page ranges for deferred columns missing in this cluster.
RResult< RClusterDescriptor > MoveDescriptor()
Move out the full cluster descriptor including page locations.
RResult< void > CommitColumnRange(DescriptorId_t physicalId, std::uint64_t firstElementIndex, std::uint32_t compressionSettings, const RClusterDescriptor::RPageRange &pageRange)
A helper class for piece-wise construction of an RClusterGroupDescriptor.
RClusterGroupDescriptorBuilder & PageListLocator(const RNTupleLocator &pageListLocator)
RClusterGroupDescriptorBuilder & MinEntry(std::uint64_t minEntry)
RClusterGroupDescriptorBuilder & ClusterGroupId(DescriptorId_t clusterGroupId)
RClusterGroupDescriptorBuilder & EntrySpan(std::uint64_t entrySpan)
RClusterGroupDescriptorBuilder & NClusters(std::uint32_t nClusters)
RClusterGroupDescriptorBuilder & PageListLength(std::uint64_t pageListLength)
static RClusterGroupDescriptorBuilder FromSummary(const RClusterGroupDescriptor &clusterGroupDesc)
RResult< RColumnDescriptor > MakeDescriptor() const
Attempt to make a column descriptor.
A column element encapsulates the translation between basic C++ types and their column representation...
std::size_t GetPackedSize(std::size_t nElements=1U) const
A helper class for piece-wise construction of an RFieldDescriptor.
RFieldDescriptorBuilder & TypeVersion(std::uint32_t typeVersion)
RFieldDescriptorBuilder & NRepetitions(std::uint64_t nRepetitions)
RFieldDescriptorBuilder & FieldVersion(std::uint32_t fieldVersion)
RFieldDescriptorBuilder & Structure(const ENTupleStructure &structure)
RFieldDescriptorBuilder & TypeName(const std::string &typeName)
static RFieldDescriptorBuilder FromField(const RFieldBase &field)
Make a new RFieldDescriptorBuilder based off a live NTuple field.
RResult< RFieldDescriptor > MakeDescriptor() const
Attempt to make a field descriptor.
RFieldDescriptorBuilder & FieldName(const std::string &fieldName)
RFieldDescriptorBuilder()=default
Make an empty dangling field descriptor.
RFieldDescriptorBuilder & TypeAlias(const std::string &typeAlias)
RFieldDescriptorBuilder & FieldDescription(const std::string &fieldDescription)
void BeginHeaderExtension()
Mark the beginning of the header extension; any fields and columns added after a call to this functio...
RResult< void > EnsureFieldExists(DescriptorId_t fieldId) const
RResult< void > AddFieldLink(DescriptorId_t fieldId, DescriptorId_t linkId)
RResult< void > EnsureValidDescriptor() const
Checks whether invariants hold:
RResult< void > AddCluster(RClusterDescriptor &&clusterDesc)
void SetNTuple(const std::string_view name, const std::string_view description)
RResult< void > AddClusterGroup(RClusterGroupDescriptor &&clusterGroup)
void AddColumn(DescriptorId_t logicalId, DescriptorId_t physicalId, DescriptorId_t fieldId, const RColumnModel &model, std::uint32_t index, std::uint64_t firstElementIdx=0U)
void Reset()
Clears so-far stored clusters, fields, and columns and return to a pristine ntuple descriptor.
Records the parition of data into pages for a particular column in a particular cluster.
std::size_t ExtendToFitColumnRange(const RColumnRange &columnRange, const Internal::RColumnElementBase &element, std::size_t pageSize)
Extend this RPageRange to fit the given RColumnRange, i.e.
RPageInfoExtended Find(ClusterSize_t::ValueType idxInCluster) const
Find the page in the RPageRange that contains the given element. The element must exist.
Meta-data for a set of ntuple clusters.
std::unordered_map< DescriptorId_t, RPageRange > fPageRanges
NTupleSize_t fFirstEntryIndex
Clusters can be swapped by adjusting the entry offsets.
std::unordered_set< DescriptorId_t > GetColumnIds() const
std::unordered_map< DescriptorId_t, RColumnRange > fColumnRanges
bool operator==(const RClusterDescriptor &other) const
Clusters are bundled in cluster groups.
std::uint64_t fMinEntry
The minimum first entry number of the clusters in the cluster group.
std::uint64_t fEntrySpan
Number of entries that are (partially for sharded clusters) covered by this cluster group.
RClusterGroupDescriptor CloneSummary() const
std::uint32_t fNClusters
Number of clusters is always known even if the cluster IDs are not (yet) populated.
bool operator==(const RClusterGroupDescriptor &other) const
std::vector< DescriptorId_t > fClusterIds
The cluster IDs can be empty if the corresponding page list is not loaded.
Meta-data stored for every column of an ntuple.
DescriptorId_t fPhysicalColumnId
Usually identical to the logical column ID, except for alias columns where it references the shadowed...
DescriptorId_t fLogicalColumnId
The actual column identifier, which is the link to the corresponding field.
RColumnDescriptor Clone() const
Get a copy of the descriptor.
DescriptorId_t fFieldId
Every column belongs to one and only one field.
RColumnModel fModel
Contains the column type and whether it is sorted.
std::uint32_t fIndex
A field can be serialized into several columns, which are numbered from zero to $n$.
bool operator==(const RColumnDescriptor &other) const
Meta-data for a sets of columns; non-trivial column groups are used for sharded clusters.
std::unordered_set< DescriptorId_t > fPhysicalColumnIds
bool operator==(const RColumnGroupDescriptor &other) const
Holds the static meta-data of an RNTuple column.
Base class for all ROOT issued exceptions.
Definition RError.hxx:78
A field translates read and write calls from/to underlying columns to/from tree values.
Definition RField.hxx:94
std::string GetFieldName() const
Definition RField.hxx:664
ENTupleStructure GetStructure() const
Definition RField.hxx:669
std::string GetTypeName() const
Definition RField.hxx:667
virtual std::uint32_t GetTypeVersion() const
Indicates an evolution of the C++ type itself.
Definition RField.hxx:696
virtual std::uint32_t GetFieldVersion() const
Indicates an evolution of the mapping scheme from C++ type to columns.
Definition RField.hxx:694
static RResult< std::unique_ptr< RFieldBase > > Create(const std::string &fieldName, const std::string &canonicalType, const std::string &typeAlias, bool fContinueOnError=false)
Factory method to resurrect a field from the stored on-disk type information.
Definition RField.cxx:565
std::string GetDescription() const
Get the field's description.
Definition RField.hxx:677
std::size_t GetNRepetitions() const
Definition RField.hxx:670
std::string GetTypeAlias() const
Definition RField.hxx:668
static RResult< void > EnsureValidFieldName(std::string_view fieldName)
Check whether a given string is a valid field name.
Definition RField.cxx:771
Meta-data stored for every field of an ntuple.
std::vector< DescriptorId_t > fLinkIds
The pointers in the other direction from parent to children.
std::unique_ptr< RFieldBase > CreateField(const RNTupleDescriptor &ntplDesc) const
In general, we create a field simply from the C++ type name.
std::uint32_t fTypeVersion
The version of the C++ type itself.
std::string fFieldDescription
Free text set by the user.
std::string fFieldName
The leaf name, not including parent fields.
std::uint32_t fFieldVersion
The version of the C++-type-to-column translation mechanics.
DescriptorId_t fParentId
Establishes sub field relationships, such as classes and collections.
RFieldDescriptor Clone() const
Get a copy of the descriptor.
bool operator==(const RFieldDescriptor &other) const
std::string fTypeAlias
A typedef or using directive that resolved to the type name during field creation.
ENTupleStructure fStructure
The structural information carried by this field in the data model tree.
std::string fTypeName
The C++ type that was used when writing the field.
std::uint64_t fNRepetitions
The number of elements per entry for fixed-size arrays.
RColumnDescriptorIterable(const RNTupleDescriptor &ntuple, const RFieldDescriptor &field)
std::vector< DescriptorId_t > GetTopLevelFields(const RNTupleDescriptor &desc) const
Return a vector containing the IDs of the top-level fields defined in the extension header.
The on-storage meta-data of an ntuple.
std::uint64_t fNPhysicalColumns
Updated by the descriptor builder when columns are added.
std::unordered_map< DescriptorId_t, RClusterDescriptor > fClusterDescriptors
May contain only a subset of all the available clusters, e.g.
std::uint64_t fGeneration
Once constructed by an RNTupleDescriptorBuilder, the descriptor is mostly immutable except for set of...
std::uint64_t fOnDiskFooterSize
Like fOnDiskHeaderSize, contains both cluster summaries and page locations.
std::uint64_t fNEntries
Updated by the descriptor builder when the cluster groups are added.
DescriptorId_t FindPhysicalColumnId(DescriptorId_t fieldId, std::uint32_t columnIndex) const
NTupleSize_t GetNElements(DescriptorId_t physicalColumnId) const
DescriptorId_t FindLogicalColumnId(DescriptorId_t fieldId, std::uint32_t columnIndex) const
std::unordered_map< DescriptorId_t, RClusterGroupDescriptor > fClusterGroupDescriptors
DescriptorId_t FindNextClusterId(DescriptorId_t clusterId) const
DescriptorId_t FindPrevClusterId(DescriptorId_t clusterId) const
DescriptorId_t GetFieldZeroId() const
Returns the logical parent of all top-level NTuple data fields.
std::unordered_map< DescriptorId_t, RColumnDescriptor > fColumnDescriptors
std::unique_ptr< RNTupleDescriptor > Clone() const
DescriptorId_t FindClusterId(DescriptorId_t physicalColumnId, NTupleSize_t index) const
std::uint64_t fNClusters
Updated by the descriptor builder when the cluster groups are added.
std::string fName
The ntuple name needs to be unique in a given storage location (file)
RFieldDescriptorIterable GetTopLevelFields() const
std::unordered_map< DescriptorId_t, RFieldDescriptor > fFieldDescriptors
RFieldDescriptorIterable GetFieldIterable(const RFieldDescriptor &fieldDesc) const
std::uint64_t fOnDiskHeaderXxHash3
Set by the descriptor builder when deserialized.
bool operator==(const RNTupleDescriptor &other) const
std::string GetQualifiedFieldName(DescriptorId_t fieldId) const
Walks up the parents of the field ID and returns a field name of the form a.b.c.d In case of invalid ...
RResult< void > AddClusterGroupDetails(DescriptorId_t clusterGroupId, std::vector< RClusterDescriptor > &clusterDescs)
Methods to load and drop cluster group details (cluster IDs and page locations)
DescriptorId_t FindFieldId(std::string_view fieldName, DescriptorId_t parentId) const
const RFieldDescriptor & GetFieldDescriptor(DescriptorId_t fieldId) const
std::unique_ptr< RNTupleModel > CreateModel() const
Re-create the C++ model from the stored meta-data.
RResult< void > DropClusterGroupDetails(DescriptorId_t clusterGroupId)
std::unique_ptr< RHeaderExtension > fHeaderExtension
std::string fDescription
Free text from the user.
const RHeaderExtension * GetHeaderExtension() const
Return header extension information; if the descriptor does not have a header extension,...
std::uint64_t fOnDiskHeaderSize
Set by the descriptor builder when deserialized.
std::vector< std::uint64_t > GetFeatureFlags() const
static std::unique_ptr< RNTupleModel > Create()
The class is used as a return type for operations that can fail; wraps a value of type T or an RError...
Definition RError.hxx:194
struct void * fTypeName
Definition cppyy.h:9
Double_t x[n]
Definition legend1.C:17
const Int_t n
Definition legend1.C:16
std::uint64_t NTupleSize_t
Integer type long enough to hold the maximum number of entries in a column.
std::uint64_t DescriptorId_t
Distriniguishes elements of the same type within a descriptor, e.g. different fields.
constexpr DescriptorId_t kInvalidDescriptorId
The window of element indexes of a particular column in a particular cluster.
ClusterSize_t fNElements
The number of column elements in the cluster.
We do not need to store the element size / uncompressed page size because we know to which column the...
std::uint32_t fNElements
The sum of the elements of all the pages must match the corresponding fNElements field in fColumnRang...
Wrap the integer in a struct in order to avoid template specialization clash with std::uint64_t.