Logo ROOT  
Reference Guide
 
Loading...
Searching...
No Matches
RNTupleDescriptor.cxx
Go to the documentation of this file.
1/// \file RNTupleDescriptor.cxx
2/// \ingroup NTuple ROOT7
3/// \author Jakob Blomer <jblomer@cern.ch>
4/// \author Javier Lopez-Gomez <javier.lopez.gomez@cern.ch>
5/// \date 2018-10-04
6/// \warning This is part of the ROOT 7 prototype! It will change without notice. It might trigger earthquakes. Feedback
7/// is welcome!
8
9/*************************************************************************
10 * Copyright (C) 1995-2019, Rene Brun and Fons Rademakers. *
11 * All rights reserved. *
12 * *
13 * For the licensing terms see $ROOTSYS/LICENSE. *
14 * For the list of contributors see $ROOTSYS/README/CREDITS. *
15 *************************************************************************/
16
17#include <ROOT/RError.hxx>
18#include <ROOT/RField.hxx>
20#include <ROOT/RNTupleModel.hxx>
21#include <ROOT/RNTupleUtil.hxx>
22#include <ROOT/RPage.hxx>
23#include <ROOT/RStringView.hxx>
24
25#include <RZip.h>
26#include <TError.h>
27
28#include <algorithm>
29#include <cstdint>
30#include <deque>
31#include <iostream>
32#include <set>
33#include <utility>
34
35
37{
38 return fFieldId == other.fFieldId && fFieldVersion == other.fFieldVersion && fTypeVersion == other.fTypeVersion &&
40 fTypeName == other.fTypeName && fTypeAlias == other.fTypeAlias && fNRepetitions == other.fNRepetitions &&
41 fStructure == other.fStructure && fParentId == other.fParentId && fLinkIds == other.fLinkIds;
42}
43
46{
48 clone.fFieldId = fFieldId;
49 clone.fFieldVersion = fFieldVersion;
50 clone.fTypeVersion = fTypeVersion;
51 clone.fFieldName = fFieldName;
52 clone.fFieldDescription = fFieldDescription;
53 clone.fTypeName = fTypeName;
54 clone.fTypeAlias = fTypeAlias;
55 clone.fNRepetitions = fNRepetitions;
56 clone.fStructure = fStructure;
57 clone.fParentId = fParentId;
58 clone.fLinkIds = fLinkIds;
59 return clone;
60}
61
62std::unique_ptr<ROOT::Experimental::Detail::RFieldBase>
64{
65 if (GetTypeName().empty() && GetStructure() == ENTupleStructure::kCollection) {
66 // For untyped collections, we have no class available to collect all the sub fields.
67 // Therefore, we create an untyped record field as an artifical binder for the collection items.
68 std::vector<std::unique_ptr<Detail::RFieldBase>> memberFields;
69 for (auto id : fLinkIds) {
70 const auto &memberDesc = ntplDesc.GetFieldDescriptor(id);
71 memberFields.emplace_back(memberDesc.CreateField(ntplDesc));
72 }
73 auto recordField = std::make_unique<RRecordField>("_0", memberFields);
74 auto collectionField = std::make_unique<RVectorField>(GetFieldName(), std::move(recordField));
75 collectionField->SetOnDiskId(fFieldId);
76 return collectionField;
77 }
78
79 auto field =
80 Detail::RFieldBase::Create(GetFieldName(), GetTypeAlias().empty() ? GetTypeName() : GetTypeAlias()).Unwrap();
81 field->SetOnDiskId(fFieldId);
82 for (auto &f : *field)
83 f.SetOnDiskId(ntplDesc.FindFieldId(f.GetName(), f.GetParent()->GetOnDiskId()));
84 return field;
85}
86
87
88////////////////////////////////////////////////////////////////////////////////
89
90
92{
93 return fLogicalColumnId == other.fLogicalColumnId && fPhysicalColumnId == other.fPhysicalColumnId &&
94 fModel == other.fModel && fFieldId == other.fFieldId && fIndex == other.fIndex;
95}
96
97
100{
102 clone.fLogicalColumnId = fLogicalColumnId;
103 clone.fPhysicalColumnId = fPhysicalColumnId;
104 clone.fModel = fModel;
105 clone.fFieldId = fFieldId;
106 clone.fIndex = fIndex;
107 clone.fFirstElementIndex = fFirstElementIndex;
108 return clone;
109}
110
111
112////////////////////////////////////////////////////////////////////////////////
113
114
117{
118 // TODO(jblomer): binary search
119 RPageInfo pageInfo;
120 decltype(idxInCluster) firstInPage = 0;
121 NTupleSize_t pageNo = 0;
122 for (const auto &pi : fPageInfos) {
123 if (firstInPage + pi.fNElements > idxInCluster) {
124 pageInfo = pi;
125 break;
126 }
127 firstInPage += pi.fNElements;
128 ++pageNo;
129 }
130 R__ASSERT(firstInPage <= idxInCluster);
131 R__ASSERT((firstInPage + pageInfo.fNElements) > idxInCluster);
132 return RPageInfoExtended{pageInfo, firstInPage, pageNo};
133}
134
135std::size_t
137 const Detail::RColumnElementBase &element,
138 std::size_t pageSize)
139{
140 R__ASSERT(fPhysicalColumnId == columnRange.fPhysicalColumnId);
141
142 const auto nElements = std::accumulate(fPageInfos.begin(), fPageInfos.end(), 0U,
143 [](std::size_t n, const auto &PI) { return n + PI.fNElements; });
144 const auto nElementsRequired = static_cast<std::uint64_t>(columnRange.fNElements);
145
146 if (nElementsRequired == nElements)
147 return 0U;
148 R__ASSERT((nElementsRequired > nElements) && "invalid attempt to shrink RPageRange");
149
150 std::vector<RPageInfo> pageInfos;
151 // Synthesize new `RPageInfo`s as needed
152 const std::uint64_t nElementsPerPage = pageSize / element.GetSize();
153 R__ASSERT(nElementsPerPage > 0);
154 for (auto nRemainingElements = nElementsRequired - nElements; nRemainingElements > 0;) {
156 PI.fNElements = std::min(nElementsPerPage, nRemainingElements);
157 PI.fLocator.fType = RNTupleLocator::kTypePageZero;
158 PI.fLocator.fBytesOnStorage = element.GetPackedSize(PI.fNElements);
159 pageInfos.emplace_back(PI);
160 nRemainingElements -= PI.fNElements;
161 }
162
163 pageInfos.insert(pageInfos.end(), std::make_move_iterator(fPageInfos.begin()),
164 std::make_move_iterator(fPageInfos.end()));
165 std::swap(fPageInfos, pageInfos);
166 return nElementsRequired - nElements;
167}
168
170{
171 return fClusterId == other.fClusterId && fFirstEntryIndex == other.fFirstEntryIndex &&
172 fNEntries == other.fNEntries && fHasPageLocations == other.fHasPageLocations &&
173 fColumnRanges == other.fColumnRanges && fPageRanges == other.fPageRanges;
174}
175
176
177std::unordered_set<ROOT::Experimental::DescriptorId_t> ROOT::Experimental::RClusterDescriptor::GetColumnIds() const
178{
179 EnsureHasPageLocations();
180 std::unordered_set<DescriptorId_t> result;
181 for (const auto &x : fColumnRanges)
182 result.emplace(x.first);
183 return result;
184}
185
187{
188 EnsureHasPageLocations();
189 return fColumnRanges.find(physicalId) != fColumnRanges.end();
190}
191
192
194{
195 EnsureHasPageLocations();
196 std::uint64_t nbytes = 0;
197 for (const auto &pr : fPageRanges) {
198 for (const auto &pi : pr.second.fPageInfos) {
199 nbytes += pi.fLocator.fBytesOnStorage;
200 }
201 }
202 return nbytes;
203}
204
206{
207 if (!fHasPageLocations)
208 throw RException(R__FAIL("invalid attempt to access page locations of summary-only cluster descriptor"));
209}
210
212{
214 clone.fClusterId = fClusterId;
215 clone.fFirstEntryIndex = fFirstEntryIndex;
216 clone.fNEntries = fNEntries;
217 clone.fHasPageLocations = fHasPageLocations;
218 clone.fColumnRanges = fColumnRanges;
219 for (const auto &d : fPageRanges)
220 clone.fPageRanges.emplace(d.first, d.second.Clone());
221 return clone;
222}
223
224////////////////////////////////////////////////////////////////////////////////
225
226
228{
229 // clang-format off
230 return fName == other.fName &&
231 fDescription == other.fDescription &&
232 fNEntries == other.fNEntries &&
233 fGeneration == other.fGeneration &&
234 fFieldDescriptors == other.fFieldDescriptors &&
235 fColumnDescriptors == other.fColumnDescriptors &&
236 fClusterGroupDescriptors == other.fClusterGroupDescriptors &&
237 fClusterDescriptors == other.fClusterDescriptors;
238 // clang-format on
239}
240
243{
245 for (const auto &cd : fClusterDescriptors) {
246 if (!cd.second.ContainsColumn(physicalColumnId))
247 continue;
248 auto columnRange = cd.second.GetColumnRange(physicalColumnId);
249 result = std::max(result, columnRange.fFirstElementIndex + columnRange.fNElements);
250 }
251 return result;
252}
253
254
257{
258 std::string leafName(fieldName);
259 auto posDot = leafName.find_last_of('.');
260 if (posDot != std::string::npos) {
261 auto parentName = leafName.substr(0, posDot);
262 leafName = leafName.substr(posDot + 1);
263 parentId = FindFieldId(parentName, parentId);
264 }
265 for (const auto &fd : fFieldDescriptors) {
266 if (fd.second.GetParentId() == parentId && fd.second.GetFieldName() == leafName)
267 return fd.second.GetId();
268 }
270}
271
272
274{
275 if (fieldId == kInvalidDescriptorId)
276 return "";
277
278 const auto &fieldDescriptor = fFieldDescriptors.at(fieldId);
279 auto prefix = GetQualifiedFieldName(fieldDescriptor.GetParentId());
280 if (prefix.empty())
281 return fieldDescriptor.GetFieldName();
282 return prefix + "." + fieldDescriptor.GetFieldName();
283}
284
285
288{
289 return FindFieldId("", kInvalidDescriptorId);
290}
291
292
295{
296 return FindFieldId(fieldName, GetFieldZeroId());
297}
298
301{
302 for (const auto &cd : fColumnDescriptors) {
303 if (cd.second.GetFieldId() == fieldId && cd.second.GetIndex() == columnIndex)
304 return cd.second.GetLogicalId();
305 }
307}
308
311{
312 auto logicalId = FindLogicalColumnId(fieldId, columnIndex);
313 if (logicalId == kInvalidDescriptorId)
315 return GetColumnDescriptor(logicalId).GetPhysicalId();
316}
317
320{
321 // TODO(jblomer): binary search?
322 for (const auto &cd : fClusterDescriptors) {
323 if (!cd.second.ContainsColumn(physicalColumnId))
324 continue;
325 auto columnRange = cd.second.GetColumnRange(physicalColumnId);
326 if (columnRange.Contains(index))
327 return cd.second.GetId();
328 }
330}
331
332
333// TODO(jblomer): fix for cases of sharded clasters
336{
337 const auto &clusterDesc = GetClusterDescriptor(clusterId);
338 auto firstEntryInNextCluster = clusterDesc.GetFirstEntryIndex() + clusterDesc.GetNEntries();
339 // TODO(jblomer): binary search?
340 for (const auto &cd : fClusterDescriptors) {
341 if (cd.second.GetFirstEntryIndex() == firstEntryInNextCluster)
342 return cd.second.GetId();
343 }
345}
346
347
348// TODO(jblomer): fix for cases of sharded clasters
351{
352 const auto &clusterDesc = GetClusterDescriptor(clusterId);
353 // TODO(jblomer): binary search?
354 for (const auto &cd : fClusterDescriptors) {
355 if (cd.second.GetFirstEntryIndex() + cd.second.GetNEntries() == clusterDesc.GetFirstEntryIndex())
356 return cd.second.GetId();
357 }
359}
360
361std::vector<ROOT::Experimental::DescriptorId_t>
363{
364 auto fieldZeroId = desc.GetFieldZeroId();
365
366 std::vector<DescriptorId_t> fields;
367 for (const DescriptorId_t fieldId : fFields) {
368 if (desc.GetFieldDescriptor(fieldId).GetParentId() == fieldZeroId)
369 fields.emplace_back(fieldId);
370 }
371 return fields;
372}
373
375 for (unsigned int i = 0; true; ++i) {
376 auto logicalId = fNTuple.FindLogicalColumnId(fieldId, i);
377 if (logicalId == kInvalidDescriptorId)
378 break;
379 fColumns.emplace_back(logicalId);
380 }
381}
382
384 const RNTupleDescriptor &ntuple, const RFieldDescriptor &field)
385 : fNTuple(ntuple)
386{
387 CollectColumnIds(field.GetId());
388}
389
391 const RNTupleDescriptor &ntuple)
392 : fNTuple(ntuple)
393{
394 std::deque<DescriptorId_t> fieldIdQueue{ntuple.GetFieldZeroId()};
395
396 while (!fieldIdQueue.empty()) {
397 auto currFieldId = fieldIdQueue.front();
398 fieldIdQueue.pop_front();
399
400 CollectColumnIds(currFieldId);
401
402 for (const auto &field : ntuple.GetFieldIterable(currFieldId)) {
403 auto fieldId = field.GetId();
404 fieldIdQueue.push_back(fieldId);
405 }
406 }
407}
408
411{
412 auto iter = fClusterDescriptors.find(clusterDesc.GetId());
413 if (iter == fClusterDescriptors.end())
414 return R__FAIL("invalid attempt to add cluster details without known cluster summary");
415 if (iter->second.HasPageLocations())
416 return R__FAIL("invalid attempt to re-populate page list");
417 if (!clusterDesc.HasPageLocations())
418 return R__FAIL("provided cluster descriptor does not contain page locations");
419 iter->second = std::move(clusterDesc);
420 return RResult<void>::Success();
421}
422
424{
425 auto iter = fClusterDescriptors.find(clusterId);
426 if (iter == fClusterDescriptors.end())
427 return R__FAIL("invalid attempt to drop cluster details of unknown cluster");
428 if (!iter->second.HasPageLocations())
429 return R__FAIL("invalid attempt to drop details of cluster summary");
430 iter->second = RClusterDescriptor(clusterId, iter->second.GetFirstEntryIndex(), iter->second.GetNEntries());
431 return RResult<void>::Success();
432}
433
434std::unique_ptr<ROOT::Experimental::RNTupleModel> ROOT::Experimental::RNTupleDescriptor::GenerateModel() const
435{
436 auto model = RNTupleModel::Create();
437 model->GetFieldZero()->SetOnDiskId(GetFieldZeroId());
438 for (const auto &topDesc : GetTopLevelFields())
439 model->AddField(topDesc.CreateField(*this));
440 model->Freeze();
441 return model;
442}
443
444std::unique_ptr<ROOT::Experimental::RNTupleDescriptor> ROOT::Experimental::RNTupleDescriptor::Clone() const
445{
446 auto clone = std::make_unique<RNTupleDescriptor>();
447 clone->fName = fName;
448 clone->fDescription = fDescription;
449 clone->fOnDiskHeaderSize = fOnDiskHeaderSize;
450 clone->fOnDiskFooterSize = fOnDiskFooterSize;
451 clone->fNEntries = fNEntries;
452 clone->fNPhysicalColumns = fNPhysicalColumns;
453 clone->fGeneration = fGeneration;
454 for (const auto &d : fFieldDescriptors)
455 clone->fFieldDescriptors.emplace(d.first, d.second.Clone());
456 for (const auto &d : fColumnDescriptors)
457 clone->fColumnDescriptors.emplace(d.first, d.second.Clone());
458 for (const auto &d : fClusterGroupDescriptors)
459 clone->fClusterGroupDescriptors.emplace(d.first, d.second.Clone());
460 for (const auto &d : fClusterDescriptors)
461 clone->fClusterDescriptors.emplace(d.first, d.second.Clone());
463 clone->fHeaderExtension = std::make_unique<RHeaderExtension>(*fHeaderExtension);
464 return clone;
465}
466
467////////////////////////////////////////////////////////////////////////////////
468
470{
471 return fColumnGroupId == other.fColumnGroupId && fPhysicalColumnIds == other.fPhysicalColumnIds;
472}
473
474////////////////////////////////////////////////////////////////////////////////
475
477{
478 return fClusterGroupId == other.fClusterGroupId && fClusterIds == other.fClusterIds;
479}
480
482{
484 clone.fClusterGroupId = fClusterGroupId;
485 clone.fClusterIds = fClusterIds;
486 clone.fPageListLocator = fPageListLocator;
487 clone.fPageListLength = fPageListLength;
488 return clone;
489}
490
491////////////////////////////////////////////////////////////////////////////////
492
495 std::uint64_t firstElementIndex,
496 std::uint32_t compressionSettings,
497 const RClusterDescriptor::RPageRange &pageRange)
498{
499 if (physicalId != pageRange.fPhysicalColumnId)
500 return R__FAIL("column ID mismatch");
501 if (fCluster.fPageRanges.count(physicalId) > 0)
502 return R__FAIL("column ID conflict");
503 RClusterDescriptor::RColumnRange columnRange{physicalId, firstElementIndex, RClusterSize(0)};
504 columnRange.fCompressionSettings = compressionSettings;
505 for (const auto &pi : pageRange.fPageInfos) {
506 columnRange.fNElements += pi.fNElements;
507 }
508 fCluster.fPageRanges[physicalId] = pageRange.Clone();
509 fCluster.fColumnRanges[physicalId] = columnRange;
510 return RResult<void>::Success();
511}
512
515{
516 /// Carries out a depth-first traversal of a field subtree rooted at `rootFieldId`. For each field, `visitField` is
517 /// called passing the field ID and the number of overall repetitions, taking into account the repetitions of each
518 /// parent field in the hierarchy.
519 auto fnTraverseSubtree = [&](DescriptorId_t rootFieldId, std::uint64_t nRepetitionsAtThisLevel,
520 const auto &visitField, const auto &enterSubtree) -> void {
521 visitField(rootFieldId, nRepetitionsAtThisLevel);
522 for (const auto &f : desc.GetFieldIterable(rootFieldId)) {
523 const std::uint64_t nRepetitions = std::max(f.GetNRepetitions(), std::uint64_t{1U}) * nRepetitionsAtThisLevel;
524 enterSubtree(f.GetId(), nRepetitions, visitField, enterSubtree);
525 }
526 };
527
528 // Deferred columns can only be part of the header extension
529 auto xHeader = desc.GetHeaderExtension();
530 if (!xHeader)
531 return *this;
532
533 // Ensure that all columns in the header extension have their associated `R(Column|Page)Range`
534 for (const auto &topLevelFieldId : xHeader->GetTopLevelFields(desc)) {
535 fnTraverseSubtree(
536 topLevelFieldId, std::max(desc.GetFieldDescriptor(topLevelFieldId).GetNRepetitions(), std::uint64_t{1U}),
537 [&](DescriptorId_t fieldId, std::uint64_t nRepetitions) {
538 for (const auto &c : desc.GetColumnIterable(fieldId)) {
539 const DescriptorId_t physicalId = c.GetPhysicalId();
540 auto &columnRange = fCluster.fColumnRanges[physicalId];
541 auto &pageRange = fCluster.fPageRanges[physicalId];
542 // Initialize a RColumnRange for `physicalId` if it was not there
543 if (columnRange.fPhysicalColumnId == kInvalidDescriptorId) {
544 columnRange.fPhysicalColumnId = physicalId;
545 pageRange.fPhysicalColumnId = physicalId;
546 }
547 // Fixup the RColumnRange and RPageRange in deferred columns. We know what the first element index and
548 // number of elements should have been if the column was not deferred; fix those and let
549 // `ExtendToFitColumnRange()` synthesize RPageInfos accordingly.
550 // Note that a column whose first element index is != 0 already met the criteria of
551 // `RFieldBase::EntryToColumnElementIndex()`, i.e. it is a principal column reachable from the field zero
552 // excluding subfields of collection and variant fields.
553 if (c.IsDeferredColumn()) {
554 columnRange.fFirstElementIndex = fCluster.GetFirstEntryIndex() * nRepetitions;
555 columnRange.fNElements = fCluster.GetNEntries() * nRepetitions;
556 const auto element = Detail::RColumnElementBase::Generate<void>(c.GetModel().GetType());
557 pageRange.ExtendToFitColumnRange(columnRange, *element, Detail::RPage::kPageZeroSize);
558 }
559 }
560 },
561 fnTraverseSubtree);
562 }
563 return *this;
564}
565
568{
569 if (fCluster.fClusterId == kInvalidDescriptorId)
570 return R__FAIL("unset cluster ID");
571 if (fCluster.fNEntries == 0)
572 return R__FAIL("empty cluster");
573 for (const auto &pr : fCluster.fPageRanges) {
574 if (fCluster.fColumnRanges.count(pr.first) == 0) {
575 return R__FAIL("missing column range");
576 }
577 }
578 fCluster.fHasPageLocations = true;
580 std::swap(result, fCluster);
581 return result;
582}
583
584std::vector<ROOT::Experimental::RClusterDescriptorBuilder>
586 DescriptorId_t clusterGroupId)
587{
588 const auto &clusterGroupDesc = ntplDesc.GetClusterGroupDescriptor(clusterGroupId);
589 std::vector<RClusterDescriptorBuilder> result;
590 for (auto clusterId : clusterGroupDesc.fClusterIds) {
591 const auto &cluster = ntplDesc.GetClusterDescriptor(clusterId);
592 result.emplace_back(RClusterDescriptorBuilder(clusterId, cluster.GetFirstEntryIndex(), cluster.GetNEntries()));
593 }
594 return result;
595}
596
597////////////////////////////////////////////////////////////////////////////////
598
601{
602 if (fClusterGroup.fClusterGroupId == kInvalidDescriptorId)
603 return R__FAIL("unset cluster group ID");
605 std::swap(result, fClusterGroup);
606 return result;
607}
608
609////////////////////////////////////////////////////////////////////////////////
610
613{
614 if (fColumnGroup.fColumnGroupId == kInvalidDescriptorId)
615 return R__FAIL("unset column group ID");
617 std::swap(result, fColumnGroup);
618 return result;
619}
620
621////////////////////////////////////////////////////////////////////////////////
622
625 if (fDescriptor.fFieldDescriptors.count(fieldId) == 0)
626 return R__FAIL("field with id '" + std::to_string(fieldId) + "' doesn't exist");
627 return RResult<void>::Success();
628}
629
632 // Reuse field name validity check
633 auto validName = Detail::RFieldBase::EnsureValidFieldName(fDescriptor.GetName());
634 if (!validName) {
635 return R__FORWARD_ERROR(validName);
636 }
637 // open-ended list of invariant checks
638 for (const auto& key_val: fDescriptor.fFieldDescriptors) {
639 const auto& id = key_val.first;
640 const auto& desc = key_val.second;
641 // parent not properly set
642 if (id != DescriptorId_t(0) && desc.GetParentId() == kInvalidDescriptorId) {
643 return R__FAIL("field with id '" + std::to_string(id) + "' has an invalid parent id");
644 }
645 }
646 return RResult<void>::Success();
647}
648
650{
652 std::swap(result, fDescriptor);
653 return result;
654}
655
657 const std::string_view description)
658{
659 fDescriptor.fName = std::string(name);
660 fDescriptor.fDescription = std::string(description);
661}
662
665{
666 if (fColumn.GetLogicalId() == kInvalidDescriptorId)
667 return R__FAIL("invalid logical column id");
668 if (fColumn.GetPhysicalId() == kInvalidDescriptorId)
669 return R__FAIL("invalid physical column id");
670 if (fColumn.GetModel().GetType() == EColumnType::kUnknown)
671 return R__FAIL("invalid column model");
672 if (fColumn.GetFieldId() == kInvalidDescriptorId)
673 return R__FAIL("invalid field id, dangling column");
674 return fColumn.Clone();
675}
676
678 const RFieldDescriptor& fieldDesc) : fField(fieldDesc.Clone())
679{
681 fField.fLinkIds = {};
682}
683
686 RFieldDescriptorBuilder fieldDesc;
687 fieldDesc.FieldVersion(field.GetFieldVersion())
689 .FieldName(field.GetName())
691 .TypeName(field.GetType())
692 .TypeAlias(field.GetTypeAlias())
693 .Structure(field.GetStructure())
695 return fieldDesc;
696}
697
700 if (fField.GetId() == kInvalidDescriptorId) {
701 return R__FAIL("invalid field id");
702 }
703 if (fField.GetStructure() == ENTupleStructure::kInvalid) {
704 return R__FAIL("invalid field structure");
705 }
706 // FieldZero is usually named "" and would be a false positive here
707 if (fField.GetParentId() != kInvalidDescriptorId) {
708 auto validName = Detail::RFieldBase::EnsureValidFieldName(fField.GetFieldName());
709 if (!validName) {
710 return R__FORWARD_ERROR(validName);
711 }
712 }
713 return fField.Clone();
714}
715
717 fDescriptor.fFieldDescriptors.emplace(fieldDesc.GetId(), fieldDesc.Clone());
718 if (fDescriptor.fHeaderExtension)
719 fDescriptor.fHeaderExtension->AddFieldId(fieldDesc.GetId());
720}
721
724{
725 auto fieldExists = RResult<void>::Success();
726 if (!(fieldExists = EnsureFieldExists(fieldId)))
727 return R__FORWARD_ERROR(fieldExists);
728 if (!(fieldExists = EnsureFieldExists(linkId)))
729 return R__FAIL("child field with id '" + std::to_string(linkId) + "' doesn't exist in NTuple");
730
731 if (linkId == fDescriptor.GetFieldZeroId()) {
732 return R__FAIL("cannot make FieldZero a child field");
733 }
734 // fail if field already has another valid parent
735 auto parentId = fDescriptor.fFieldDescriptors.at(linkId).GetParentId();
736 if ((parentId != kInvalidDescriptorId) && (parentId != fieldId)) {
737 return R__FAIL("field '" + std::to_string(linkId) + "' already has a parent ('" +
738 std::to_string(parentId) + ")");
739 }
740 if (fieldId == linkId) {
741 return R__FAIL("cannot make field '" + std::to_string(fieldId) + "' a child of itself");
742 }
743 fDescriptor.fFieldDescriptors.at(linkId).fParentId = fieldId;
744 fDescriptor.fFieldDescriptors.at(fieldId).fLinkIds.push_back(linkId);
745 return RResult<void>::Success();
746}
747
749 DescriptorId_t fieldId, const RColumnModel &model,
750 std::uint32_t index, std::uint64_t firstElementIdx)
751{
753 c.fLogicalColumnId = logicalId;
754 c.fPhysicalColumnId = physicalId;
755 c.fFieldId = fieldId;
756 c.fModel = model;
757 c.fIndex = index;
758 c.fFirstElementIndex = firstElementIdx;
759 if (!c.IsAliasColumn())
760 fDescriptor.fNPhysicalColumns++;
761 if (fDescriptor.fHeaderExtension)
762 fDescriptor.fHeaderExtension->AddColumn(/*isAliasColumn=*/c.IsAliasColumn());
763 fDescriptor.fColumnDescriptors.emplace(logicalId, std::move(c));
764}
765
766
769{
770 const auto fieldId = columnDesc.GetFieldId();
771 const auto index = columnDesc.GetIndex();
772
773 auto fieldExists = EnsureFieldExists(fieldId);
774 if (!fieldExists)
775 return R__FORWARD_ERROR(fieldExists);
776 if (fDescriptor.FindLogicalColumnId(fieldId, index) != kInvalidDescriptorId) {
777 return R__FAIL("column index clash");
778 }
779 if (index > 0) {
780 if (fDescriptor.FindLogicalColumnId(fieldId, index - 1) == kInvalidDescriptorId)
781 return R__FAIL("out of bounds column index");
782 }
783 if (columnDesc.IsAliasColumn()) {
784 if (columnDesc.GetModel() != fDescriptor.GetColumnDescriptor(columnDesc.GetPhysicalId()).GetModel())
785 return R__FAIL("alias column type mismatch");
786 }
787
788 auto logicalId = columnDesc.GetLogicalId();
789 if (!columnDesc.IsAliasColumn())
790 fDescriptor.fNPhysicalColumns++;
791 fDescriptor.fColumnDescriptors.emplace(logicalId, std::move(columnDesc));
792 if (fDescriptor.fHeaderExtension)
793 fDescriptor.fHeaderExtension->AddColumn(/*isAliasColumn=*/columnDesc.IsAliasColumn());
794
795 return RResult<void>::Success();
796}
797
800 std::uint64_t nEntries)
801{
802 if (fDescriptor.fClusterDescriptors.count(clusterId) > 0)
803 return R__FAIL("cluster id clash while adding cluster summary");
804 fDescriptor.fNEntries = std::max(fDescriptor.fNEntries, firstEntry + nEntries);
805 fDescriptor.fClusterDescriptors.emplace(clusterId, RClusterDescriptor(clusterId, firstEntry, nEntries));
806 return RResult<void>::Success();
807}
808
810{
811 auto id = clusterGroup.GetId();
812 fDescriptor.fClusterGroupDescriptors.emplace(id, clusterGroup.MoveDescriptor().Unwrap());
813}
814
816{
817 fDescriptor.fName = "";
818 fDescriptor.fDescription = "";
819 fDescriptor.fFieldDescriptors.clear();
820 fDescriptor.fColumnDescriptors.clear();
821 fDescriptor.fClusterDescriptors.clear();
822 fDescriptor.fClusterGroupDescriptors.clear();
823 fDescriptor.fHeaderExtension.reset();
824}
825
827{
828 if (!fDescriptor.fHeaderExtension)
829 fDescriptor.fHeaderExtension = std::make_unique<RNTupleDescriptor::RHeaderExtension>();
830}
831
834{
835 auto clusterId = clusterDesc.GetId();
836 if (fDescriptor.fClusterDescriptors.count(clusterId) > 0)
837 return R__FAIL("cluster id clash");
838 fDescriptor.fNEntries =
839 std::max(fDescriptor.fNEntries, clusterDesc.GetFirstEntryIndex() + clusterDesc.GetNEntries());
840 fDescriptor.fClusterDescriptors.emplace(clusterId, std::move(clusterDesc));
841 return RResult<void>::Success();
842}
#define R__FORWARD_ERROR(res)
Short-hand to return an RResult<T> in an error state (i.e. after checking)
Definition RError.hxx:307
#define R__FAIL(msg)
Short-hand to return an RResult<T> in an error state; the RError is implicitly converted into RResult...
Definition RError.hxx:303
#define d(i)
Definition RSha256.hxx:102
#define f(i)
Definition RSha256.hxx:104
#define c(i)
Definition RSha256.hxx:101
TObject * clone(const char *newname) const override
#define PI
#define R__ASSERT(e)
Definition TError.h:118
Option_t Option_t TPoint TPoint const char GetTextMagnitude GetFillStyle GetLineColor GetLineWidth GetMarkerStyle GetTextAlign GetTextColor GetTextSize void char Point_t Rectangle_t WindowAttributes_t Float_t Float_t Float_t Int_t Int_t UInt_t UInt_t Rectangle_t result
Option_t Option_t TPoint TPoint const char GetTextMagnitude GetFillStyle GetLineColor GetLineWidth GetMarkerStyle GetTextAlign GetTextColor GetTextSize void char Point_t Rectangle_t WindowAttributes_t index
char name[80]
Definition TGX11.cxx:110
A column element encapsulates the translation between basic C++ types and their column representation...
std::size_t GetPackedSize(std::size_t nElements=1U) const
A field translates read and write calls from/to underlying columns to/from tree values.
Definition RField.hxx:83
virtual std::uint32_t GetFieldVersion() const
Indicates an evolution of the mapping scheme from C++ type to columns.
Definition RField.hxx:629
std::string GetDescription() const
Get the field's description.
Definition RField.hxx:606
static RResult< std::unique_ptr< RFieldBase > > Create(const std::string &fieldName, const std::string &canonicalType, const std::string &typeAlias)
Factory method to resurrect a field from the stored on-disk type information.
Definition RField.cxx:351
std::size_t GetNRepetitions() const
Definition RField.hxx:600
virtual std::uint32_t GetTypeVersion() const
Indicates an evolution of the C++ type itself.
Definition RField.hxx:631
static RResult< void > EnsureValidFieldName(std::string_view fieldName)
Check whether a given string is a valid field name.
Definition RField.cxx:494
ENTupleStructure GetStructure() const
Definition RField.hxx:599
A helper class for piece-wise construction of an RClusterDescriptor.
RClusterDescriptorBuilder & AddDeferredColumnRanges(const RNTupleDescriptor &desc)
Add column and page ranges for deferred columns missing in this cluster.
RResult< void > CommitColumnRange(DescriptorId_t physicalId, std::uint64_t firstElementIndex, std::uint32_t compressionSettings, const RClusterDescriptor::RPageRange &pageRange)
RResult< RClusterDescriptor > MoveDescriptor()
Move out the full cluster descriptor including page locations.
Meta-data for a set of ntuple clusters.
std::unordered_map< DescriptorId_t, RPageRange > fPageRanges
bool ContainsColumn(DescriptorId_t physicalId) const
NTupleSize_t fFirstEntryIndex
Clusters can be swapped by adjusting the entry offsets.
std::unordered_set< DescriptorId_t > GetColumnIds() const
std::unordered_map< DescriptorId_t, RColumnRange > fColumnRanges
bool operator==(const RClusterDescriptor &other) const
A helper class for piece-wise construction of an RClusterGroupDescriptor.
RResult< RClusterGroupDescriptor > MoveDescriptor()
static std::vector< RClusterDescriptorBuilder > GetClusterSummaries(const RNTupleDescriptor &ntplDesc, DescriptorId_t clusterGroupId)
Used to prepare the cluster descriptor builders when loading the page locations for a certain cluster...
Clusters are stored in cluster groups.
bool operator==(const RClusterGroupDescriptor &other) const
RResult< RColumnDescriptor > MakeDescriptor() const
Attempt to make a column descriptor.
Meta-data stored for every column of an ntuple.
DescriptorId_t fPhysicalColumnId
Usually identical to the logical column ID, except for alias columns where it references the shadowed...
DescriptorId_t fLogicalColumnId
The actual column identifier, which is the link to the corresponding field.
RColumnDescriptor Clone() const
Get a copy of the descriptor.
DescriptorId_t fFieldId
Every column belongs to one and only one field.
RColumnModel fModel
Contains the column type and whether it is sorted.
std::uint32_t fIndex
A field can be serialized into several columns, which are numbered from zero to $n$.
bool operator==(const RColumnDescriptor &other) const
RResult< RColumnGroupDescriptor > MoveDescriptor()
Meta-data for a sets of columns; non-trivial column groups are used for sharded clusters.
std::unordered_set< DescriptorId_t > fPhysicalColumnIds
bool operator==(const RColumnGroupDescriptor &other) const
Holds the static meta-data of an RNTuple column.
Base class for all ROOT issued exceptions.
Definition RError.hxx:78
A helper class for piece-wise construction of an RFieldDescriptor.
RFieldDescriptorBuilder & FieldName(const std::string &fieldName)
RFieldDescriptorBuilder & NRepetitions(std::uint64_t nRepetitions)
static RFieldDescriptorBuilder FromField(const Detail::RFieldBase &field)
Make a new RFieldDescriptorBuilder based off a live NTuple field.
RFieldDescriptorBuilder & Structure(const ENTupleStructure &structure)
RResult< RFieldDescriptor > MakeDescriptor() const
Attempt to make a field descriptor.
RFieldDescriptorBuilder & TypeName(const std::string &typeName)
RFieldDescriptorBuilder & TypeVersion(std::uint32_t typeVersion)
RFieldDescriptorBuilder & FieldDescription(const std::string &fieldDescription)
RFieldDescriptorBuilder & FieldVersion(std::uint32_t fieldVersion)
RFieldDescriptorBuilder()=default
Make an empty dangling field descriptor.
RFieldDescriptorBuilder & TypeAlias(const std::string &typeAlias)
Meta-data stored for every field of an ntuple.
std::vector< DescriptorId_t > fLinkIds
The pointers in the other direction from parent to children.
std::uint32_t fTypeVersion
The version of the C++ type itself.
std::unique_ptr< Detail::RFieldBase > CreateField(const RNTupleDescriptor &ntplDesc) const
In general, we create a field simply from the C++ type name.
std::string fFieldDescription
Free text set by the user.
std::string fFieldName
The leaf name, not including parent fields.
std::uint32_t fFieldVersion
The version of the C++-type-to-column translation mechanics.
DescriptorId_t fParentId
Establishes sub field relationships, such as classes and collections.
RFieldDescriptor Clone() const
Get a copy of the descriptor.
bool operator==(const RFieldDescriptor &other) const
std::string fTypeAlias
A typedef or using directive that resolved to the type name during field creation.
ENTupleStructure fStructure
The structural information carried by this field in the data model tree.
std::string fTypeName
The C++ type that was used when writing the field.
std::uint64_t fNRepetitions
The number of elements per entry for fixed-size arrays.
RResult< void > EnsureValidDescriptor() const
Checks whether invariants hold:
RResult< void > AddClusterSummary(DescriptorId_t clusterId, std::uint64_t firstEntry, std::uint64_t nEntries)
RResult< void > EnsureFieldExists(DescriptorId_t fieldId) const
void AddColumn(DescriptorId_t logicalId, DescriptorId_t physicalId, DescriptorId_t fieldId, const RColumnModel &model, std::uint32_t index, std::uint64_t firstElementIdx=0U)
RResult< void > AddFieldLink(DescriptorId_t fieldId, DescriptorId_t linkId)
void SetNTuple(const std::string_view name, const std::string_view description)
void Reset()
Clears so-far stored clusters, fields, and columns and return to a pristine ntuple descriptor.
void AddClusterGroup(RClusterGroupDescriptorBuilder &&clusterGroup)
RResult< void > AddClusterWithDetails(RClusterDescriptor &&clusterDesc)
Used during writing.
void BeginHeaderExtension()
Mark the beginning of the header extension; any fields and columns added after a call to this functio...
void AddField(const RFieldDescriptor &fieldDesc)
RColumnDescriptorIterable(const RNTupleDescriptor &ntuple, const RFieldDescriptor &field)
std::vector< DescriptorId_t > GetTopLevelFields(const RNTupleDescriptor &desc) const
Return a vector containing the IDs of the top-level fields defined in the extension header.
The on-storage meta-data of an ntuple.
std::uint64_t fNPhysicalColumns
Updated by the descriptor builder when columns are added.
std::unordered_map< DescriptorId_t, RClusterDescriptor > fClusterDescriptors
May contain only a subset of all the available clusters, e.g.
std::uint64_t fGeneration
Once constructed by an RNTupleDescriptorBuilder, the descriptor is mostly immutable except for set of...
std::uint64_t fOnDiskFooterSize
Like fOnDiskHeaderSize, contains both cluster summaries and page locations.
std::uint64_t fNEntries
Updated by the descriptor builder when the cluster summaries are added.
DescriptorId_t FindPhysicalColumnId(DescriptorId_t fieldId, std::uint32_t columnIndex) const
NTupleSize_t GetNElements(DescriptorId_t physicalColumnId) const
std::unique_ptr< RNTupleModel > GenerateModel() const
Re-create the C++ model from the stored meta-data.
DescriptorId_t FindLogicalColumnId(DescriptorId_t fieldId, std::uint32_t columnIndex) const
std::unordered_map< DescriptorId_t, RClusterGroupDescriptor > fClusterGroupDescriptors
DescriptorId_t FindNextClusterId(DescriptorId_t clusterId) const
DescriptorId_t FindPrevClusterId(DescriptorId_t clusterId) const
RResult< void > DropClusterDetails(DescriptorId_t clusterId)
DescriptorId_t GetFieldZeroId() const
Returns the logical parent of all top-level NTuple data fields.
std::unordered_map< DescriptorId_t, RColumnDescriptor > fColumnDescriptors
std::unique_ptr< RNTupleDescriptor > Clone() const
DescriptorId_t FindClusterId(DescriptorId_t physicalColumnId, NTupleSize_t index) const
std::string fName
The ntuple name needs to be unique in a given storage location (file)
RFieldDescriptorIterable GetTopLevelFields() const
const RClusterDescriptor & GetClusterDescriptor(DescriptorId_t clusterId) const
std::unordered_map< DescriptorId_t, RFieldDescriptor > fFieldDescriptors
RFieldDescriptorIterable GetFieldIterable(const RFieldDescriptor &fieldDesc) const
bool operator==(const RNTupleDescriptor &other) const
std::string GetQualifiedFieldName(DescriptorId_t fieldId) const
Walks up the parents of the field ID and returns a field name of the form a.b.c.d In case of invalid ...
DescriptorId_t FindFieldId(std::string_view fieldName, DescriptorId_t parentId) const
RResult< void > AddClusterDetails(RClusterDescriptor &&clusterDesc)
Methods to load and drop cluster details.
const RFieldDescriptor & GetFieldDescriptor(DescriptorId_t fieldId) const
std::unique_ptr< RHeaderExtension > fHeaderExtension
const RClusterGroupDescriptor & GetClusterGroupDescriptor(DescriptorId_t clusterGroupId) const
std::string fDescription
Free text from the user.
const RHeaderExtension * GetHeaderExtension() const
Return header extension information; if the descriptor does not have a header extension,...
std::uint64_t fOnDiskHeaderSize
Set by the descriptor builder when deserialized.
static std::unique_ptr< RNTupleModel > Create()
The class is used as a return type for operations that can fail; wraps a value of type T or an RError...
Definition RError.hxx:207
struct void * fTypeName
Definition cppyy.h:9
Double_t x[n]
Definition legend1.C:17
const Int_t n
Definition legend1.C:16
std::uint64_t NTupleSize_t
Integer type long enough to hold the maximum number of entries in a column.
std::uint64_t DescriptorId_t
Distriniguishes elements of the same type within a descriptor, e.g. different fields.
constexpr DescriptorId_t kInvalidDescriptorId
The window of element indexes of a particular column in a particular cluster.
ClusterSize_t fNElements
The number of column elements in the cluster.
We do not need to store the element size / uncompressed page size because we know to which column the...
std::uint32_t fNElements
The sum of the elements of all the pages must match the corresponding fNElements field in fColumnRang...
Records the parition of data into pages for a particular column in a particular cluster.
RPageInfoExtended Find(RClusterSize::ValueType idxInCluster) const
Find the page in the RPageRange that contains the given element. The element must exist.
std::size_t ExtendToFitColumnRange(const RColumnRange &columnRange, const Detail::RColumnElementBase &element, std::size_t pageSize)
Extend this RPageRange to fit the given RColumnRange, i.e.
Wrap the integer in a struct in order to avoid template specialization clash with std::uint32_t.