65std::unique_ptr<ROOT::Experimental::Detail::RFieldBase>
71 std::vector<std::unique_ptr<Detail::RFieldBase>> memberFields;
72 for (
auto id : fLinkIds) {
74 memberFields.emplace_back(memberDesc.CreateField(ntplDesc));
76 auto recordField = std::make_unique<RRecordField>(
"_0", memberFields);
77 auto collectionField = std::make_unique<RVectorField>(GetFieldName(), std::move(recordField));
78 collectionField->SetOnDiskId(fFieldId);
79 return collectionField;
83 field->SetOnDiskId(fFieldId);
84 for (
auto &
f : *field)
85 f.SetOnDiskId(ntplDesc.
FindFieldId(
f.GetName(),
f.GetParent()->GetOnDiskId()));
121 decltype(idxInCluster) firstInPage = 0;
123 for (
const auto &
pi : fPageInfos) {
124 if (firstInPage +
pi.fNElements > idxInCluster) {
128 firstInPage +=
pi.fNElements;
147 EnsureHasPageLocations();
148 std::unordered_set<DescriptorId_t>
result;
149 for (
const auto &
x : fColumnRanges)
156 EnsureHasPageLocations();
157 return fColumnRanges.find(physicalId) != fColumnRanges.end();
163 EnsureHasPageLocations();
164 std::uint64_t nbytes = 0;
165 for (
const auto &pr : fPageRanges) {
166 for (
const auto &
pi : pr.second.fPageInfos) {
167 nbytes +=
pi.fLocator.fBytesOnStorage;
175 if (!fHasPageLocations)
176 throw RException(
R__FAIL(
"invalid attempt to access page locations of summary-only cluster descriptor"));
187 for (
const auto &
d : fPageRanges)
208 for (
const auto &cd : fClusterDescriptors) {
209 if (!cd.second.ContainsColumn(physicalColumnId))
211 auto columnRange = cd.second.GetColumnRange(physicalColumnId);
212 result = std::max(
result, columnRange.fFirstElementIndex + columnRange.fNElements);
221 std::string leafName(fieldName);
222 auto posDot = leafName.find_last_of(
'.');
223 if (posDot != std::string::npos) {
224 auto parentName = leafName.substr(0, posDot);
225 leafName = leafName.substr(posDot + 1);
226 parentId = FindFieldId(parentName, parentId);
228 for (
const auto &fd : fFieldDescriptors) {
229 if (fd.second.GetParentId() == parentId && fd.second.GetFieldName() == leafName)
230 return fd.second.GetId();
241 const auto &fieldDescriptor = fFieldDescriptors.at(fieldId);
242 auto prefix = GetQualifiedFieldName(fieldDescriptor.GetParentId());
244 return fieldDescriptor.GetFieldName();
245 return prefix +
"." + fieldDescriptor.GetFieldName();
259 return FindFieldId(fieldName, GetFieldZeroId());
265 for (
const auto &cd : fColumnDescriptors) {
266 if (cd.second.GetFieldId() == fieldId && cd.second.GetIndex() == columnIndex)
267 return cd.second.GetLogicalId();
275 auto logicalId = FindLogicalColumnId(fieldId, columnIndex);
278 return GetColumnDescriptor(logicalId).GetPhysicalId();
285 for (
const auto &cd : fClusterDescriptors) {
286 if (!cd.second.ContainsColumn(physicalColumnId))
288 auto columnRange = cd.second.GetColumnRange(physicalColumnId);
289 if (columnRange.Contains(
index))
290 return cd.second.GetId();
300 const auto &clusterDesc = GetClusterDescriptor(clusterId);
301 auto firstEntryInNextCluster = clusterDesc.GetFirstEntryIndex() + clusterDesc.GetNEntries();
303 for (
const auto &cd : fClusterDescriptors) {
304 if (cd.second.GetFirstEntryIndex() == firstEntryInNextCluster)
305 return cd.second.GetId();
315 const auto &clusterDesc = GetClusterDescriptor(clusterId);
317 for (
const auto &cd : fClusterDescriptors) {
318 if (cd.second.GetFirstEntryIndex() + cd.second.GetNEntries() == clusterDesc.GetFirstEntryIndex())
319 return cd.second.GetId();
327 auto iter = fClusterDescriptors.find(clusterDesc.GetId());
328 if (iter == fClusterDescriptors.end())
329 return R__FAIL(
"invalid attempt to add cluster details without known cluster summary");
330 if (iter->second.HasPageLocations())
331 return R__FAIL(
"invalid attempt to re-populate page list");
332 if (!clusterDesc.HasPageLocations())
333 return R__FAIL(
"provided cluster descriptor does not contain page locations");
334 iter->second = std::move(clusterDesc);
340 auto iter = fClusterDescriptors.find(clusterId);
341 if (iter == fClusterDescriptors.end())
342 return R__FAIL(
"invalid attempt to drop cluster details of unknown cluster");
343 if (!iter->second.HasPageLocations())
344 return R__FAIL(
"invalid attempt to drop details of cluster summary");
345 iter->second =
RClusterDescriptor(clusterId, iter->second.GetFirstEntryIndex(), iter->second.GetNEntries());
352 model->GetFieldZero()->SetOnDiskId(GetFieldZeroId());
353 for (
const auto &topDesc : GetTopLevelFields())
354 model->AddField(topDesc.CreateField(*
this));
361 auto clone = std::make_unique<RNTupleDescriptor>();
362 clone->fName = fName;
363 clone->fDescription = fDescription;
364 clone->fOnDiskHeaderSize = fOnDiskHeaderSize;
365 clone->fOnDiskFooterSize = fOnDiskFooterSize;
366 clone->fNEntries = fNEntries;
367 clone->fNPhysicalColumns = fNPhysicalColumns;
368 clone->fGeneration = fGeneration;
369 for (
const auto &
d : fFieldDescriptors)
370 clone->fFieldDescriptors.emplace(
d.first,
d.second.Clone());
371 for (
const auto &
d : fColumnDescriptors)
372 clone->fColumnDescriptors.emplace(
d.first,
d.second.Clone());
373 for (
const auto &
d : fClusterGroupDescriptors)
374 clone->fClusterGroupDescriptors.emplace(
d.first,
d.second.Clone());
375 for (
const auto &
d : fClusterDescriptors)
376 clone->fClusterDescriptors.emplace(
d.first,
d.second.Clone());
408 std::uint64_t firstElementIndex,
409 std::uint32_t compressionSettings,
413 return R__FAIL(
"column ID mismatch");
414 if (fCluster.fPageRanges.count(physicalId) > 0)
415 return R__FAIL(
"column ID conflict");
417 columnRange.fCompressionSettings = compressionSettings;
419 columnRange.fNElements +=
pi.fNElements;
421 fCluster.fPageRanges[physicalId] = pageRange.
Clone();
422 fCluster.fColumnRanges[physicalId] = columnRange;
431 return R__FAIL(
"unset cluster ID");
432 if (fCluster.fNEntries == 0)
433 return R__FAIL(
"empty cluster");
434 for (
const auto &pr : fCluster.fPageRanges) {
435 if (fCluster.fColumnRanges.count(pr.first) == 0) {
436 return R__FAIL(
"missing column range");
439 fCluster.fHasPageLocations =
true;
445std::vector<ROOT::Experimental::RClusterDescriptorBuilder>
450 std::vector<RClusterDescriptorBuilder>
result;
451 for (
auto clusterId : clusterGroupDesc.fClusterIds) {
464 return R__FAIL(
"unset cluster group ID");
476 return R__FAIL(
"unset column group ID");
486 if (fDescriptor.fFieldDescriptors.count(fieldId) == 0)
487 return R__FAIL(
"field with id '" + std::to_string(fieldId) +
"' doesn't exist");
499 for (
const auto& key_val: fDescriptor.fFieldDescriptors) {
500 const auto&
id = key_val.first;
501 const auto& desc = key_val.second;
504 return R__FAIL(
"field with id '" + std::to_string(
id) +
"' has an invalid parent id");
520 fDescriptor.fName = std::string(
name);
521 fDescriptor.fDescription = std::string(description);
528 return R__FAIL(
"invalid logical column id");
530 return R__FAIL(
"invalid physical column id");
532 return R__FAIL(
"invalid column model");
534 return R__FAIL(
"invalid field id, dangling column");
535 return fColumn.Clone();
561 return R__FAIL(
"invalid field id");
564 return R__FAIL(
"invalid field structure");
573 return fField.Clone();
577 fDescriptor.fFieldDescriptors.emplace(fieldDesc.
GetId(), fieldDesc.
Clone());
584 if (!(fieldExists = EnsureFieldExists(fieldId)))
586 if (!(fieldExists = EnsureFieldExists(linkId)))
587 return R__FAIL(
"child field with id '" + std::to_string(linkId) +
"' doesn't exist in NTuple");
589 if (linkId == fDescriptor.GetFieldZeroId()) {
590 return R__FAIL(
"cannot make FieldZero a child field");
593 auto parentId = fDescriptor.fFieldDescriptors.at(linkId).GetParentId();
595 return R__FAIL(
"field '" + std::to_string(linkId) +
"' already has a parent ('" +
596 std::to_string(parentId) +
")");
598 if (fieldId == linkId) {
599 return R__FAIL(
"cannot make field '" + std::to_string(fieldId) +
"' a child of itself");
601 fDescriptor.fFieldDescriptors.at(linkId).fParentId = fieldId;
602 fDescriptor.fFieldDescriptors.at(fieldId).fLinkIds.push_back(linkId);
611 c.fLogicalColumnId = logicalId;
612 c.fPhysicalColumnId = physicalId;
613 c.fFieldId = fieldId;
616 if (!
c.IsAliasColumn())
617 fDescriptor.fNPhysicalColumns++;
618 fDescriptor.fColumnDescriptors.emplace(logicalId, std::move(
c));
625 const auto fieldId = columnDesc.GetFieldId();
626 const auto index = columnDesc.GetIndex();
628 auto fieldExists = EnsureFieldExists(fieldId);
632 return R__FAIL(
"column index clash");
636 return R__FAIL(
"out of bounds column index");
638 if (columnDesc.IsAliasColumn()) {
639 if (columnDesc.GetModel() != fDescriptor.GetColumnDescriptor(columnDesc.GetPhysicalId()).GetModel())
640 return R__FAIL(
"alias column type mismatch");
643 auto logicalId = columnDesc.GetLogicalId();
644 if (!columnDesc.IsAliasColumn())
645 fDescriptor.fNPhysicalColumns++;
646 fDescriptor.fColumnDescriptors.emplace(logicalId, std::move(columnDesc));
653 std::uint64_t nEntries)
655 if (fDescriptor.fClusterDescriptors.count(clusterId) > 0)
656 return R__FAIL(
"cluster id clash while adding cluster summary");
657 fDescriptor.fNEntries = std::max(fDescriptor.fNEntries, firstEntry + nEntries);
658 fDescriptor.fClusterDescriptors.emplace(clusterId,
RClusterDescriptor(clusterId, firstEntry, nEntries));
664 auto id = clusterGroup.GetId();
665 fDescriptor.fClusterGroupDescriptors.emplace(
id, clusterGroup.MoveDescriptor().Unwrap());
670 fDescriptor.fName =
"";
671 fDescriptor.fDescription =
"";
672 fDescriptor.fFieldDescriptors.clear();
673 fDescriptor.fColumnDescriptors.clear();
674 fDescriptor.fClusterDescriptors.clear();
675 fDescriptor.fClusterGroupDescriptors.clear();
681 auto clusterId = clusterDesc.GetId();
682 if (fDescriptor.fClusterDescriptors.count(clusterId) > 0)
683 return R__FAIL(
"cluster id clash");
684 fDescriptor.fNEntries =
685 std::max(fDescriptor.fNEntries, clusterDesc.GetFirstEntryIndex() + clusterDesc.GetNEntries());
686 fDescriptor.fClusterDescriptors.emplace(clusterId, std::move(clusterDesc));
#define R__FORWARD_ERROR(res)
Short-hand to return an RResult<T> in an error state (i.e. after checking)
#define R__FAIL(msg)
Short-hand to return an RResult<T> in an error state; the RError is implicitly converted into RResult...
Option_t Option_t TPoint TPoint const char GetTextMagnitude GetFillStyle GetLineColor GetLineWidth GetMarkerStyle GetTextAlign GetTextColor GetTextSize void char Point_t Rectangle_t WindowAttributes_t Float_t Float_t Float_t Int_t Int_t UInt_t UInt_t Rectangle_t result
Option_t Option_t TPoint TPoint const char GetTextMagnitude GetFillStyle GetLineColor GetLineWidth GetMarkerStyle GetTextAlign GetTextColor GetTextSize void char Point_t Rectangle_t WindowAttributes_t index
virtual std::uint32_t GetFieldVersion() const
Indicates an evolution of the mapping scheme from C++ type to columns.
std::string GetDescription() const
Get the field's description.
std::string GetName() const
std::size_t GetNRepetitions() const
virtual std::uint32_t GetTypeVersion() const
Indicates an evolution of the C++ type itself.
std::string GetType() const
static RResult< void > EnsureValidFieldName(std::string_view fieldName)
Check whether a given string is a valid field name.
static RResult< std::unique_ptr< RFieldBase > > Create(const std::string &fieldName, const std::string &typeName)
Factory method to resurrect a field from the stored on-disk type information.
ENTupleStructure GetStructure() const
A helper class for piece-wise construction of an RClusterDescriptor.
RResult< void > CommitColumnRange(DescriptorId_t physicalId, std::uint64_t firstElementIndex, std::uint32_t compressionSettings, const RClusterDescriptor::RPageRange &pageRange)
RResult< RClusterDescriptor > MoveDescriptor()
Move out the full cluster descriptor including page locations.
Meta-data for a set of ntuple clusters.
std::unordered_map< DescriptorId_t, RPageRange > fPageRanges
bool ContainsColumn(DescriptorId_t physicalId) const
NTupleSize_t fFirstEntryIndex
Clusters can be swapped by adjusting the entry offsets.
DescriptorId_t fClusterId
std::unordered_set< DescriptorId_t > GetColumnIds() const
RClusterDescriptor Clone() const
std::unordered_map< DescriptorId_t, RColumnRange > fColumnRanges
bool operator==(const RClusterDescriptor &other) const
std::uint64_t GetBytesOnStorage() const
void EnsureHasPageLocations() const
A helper class for piece-wise construction of an RClusterGroupDescriptor.
RResult< RClusterGroupDescriptor > MoveDescriptor()
static std::vector< RClusterDescriptorBuilder > GetClusterSummaries(const RNTupleDescriptor &ntplDesc, DescriptorId_t clusterGroupId)
Used to prepare the cluster descriptor builders when loading the page locations for a certain cluster...
Clusters are stored in cluster groups.
RClusterGroupDescriptor Clone() const
std::uint32_t fPageListLength
Uncompressed size of the page list.
RNTupleLocator fPageListLocator
The page list that corresponds to the cluster group.
DescriptorId_t fClusterGroupId
bool operator==(const RClusterGroupDescriptor &other) const
std::vector< DescriptorId_t > fClusterIds
RResult< RColumnDescriptor > MakeDescriptor() const
Attempt to make a column descriptor.
Meta-data stored for every column of an ntuple.
DescriptorId_t fPhysicalColumnId
Usually identical to the logical column ID, except for alias columns where it references the shadowed...
DescriptorId_t fLogicalColumnId
The actual column identifier, which is the link to the corresponding field.
RColumnDescriptor Clone() const
Get a copy of the descriptor.
DescriptorId_t fFieldId
Every column belongs to one and only one field.
RColumnModel fModel
Contains the column type and whether it is sorted.
std::uint32_t fIndex
A field can be serialized into several columns, which are numbered from zero to $n$.
bool operator==(const RColumnDescriptor &other) const
RResult< RColumnGroupDescriptor > MoveDescriptor()
Meta-data for a sets of columns; non-trivial column groups are used for sharded clusters.
std::unordered_set< DescriptorId_t > fPhysicalColumnIds
bool operator==(const RColumnGroupDescriptor &other) const
DescriptorId_t fColumnGroupId
Holds the static meta-data of an RNTuple column.
Base class for all ROOT issued exceptions.
A helper class for piece-wise construction of an RFieldDescriptor.
RFieldDescriptorBuilder & FieldName(const std::string &fieldName)
RFieldDescriptorBuilder & NRepetitions(std::uint64_t nRepetitions)
static RFieldDescriptorBuilder FromField(const Detail::RFieldBase &field)
Make a new RFieldDescriptorBuilder based off a live NTuple field.
RFieldDescriptorBuilder & Structure(const ENTupleStructure &structure)
RResult< RFieldDescriptor > MakeDescriptor() const
Attempt to make a field descriptor.
RFieldDescriptorBuilder & TypeName(const std::string &typeName)
RFieldDescriptorBuilder & TypeVersion(std::uint32_t typeVersion)
RFieldDescriptorBuilder & FieldDescription(const std::string &fieldDescription)
RFieldDescriptorBuilder & FieldVersion(std::uint32_t fieldVersion)
RFieldDescriptorBuilder()=default
Make an empty dangling field descriptor.
Meta-data stored for every field of an ntuple.
std::vector< DescriptorId_t > fLinkIds
The pointers in the other direction from parent to children.
std::uint32_t fTypeVersion
The version of the C++ type itself.
std::unique_ptr< Detail::RFieldBase > CreateField(const RNTupleDescriptor &ntplDesc) const
In general, we create a field simply from the C++ type name.
std::string fFieldDescription
Free text set by the user.
std::string fFieldName
The leaf name, not including parent fields.
std::uint32_t fFieldVersion
The version of the C++-type-to-column translation mechanics.
DescriptorId_t GetId() const
DescriptorId_t fParentId
Establishes sub field relationships, such as classes and collections.
RFieldDescriptor Clone() const
Get a copy of the descriptor.
bool operator==(const RFieldDescriptor &other) const
ENTupleStructure fStructure
The structural information carried by this field in the data model tree.
std::string fTypeName
The C++ type that was used when writing the field.
std::uint64_t fNRepetitions
The number of elements per entry for fixed-size arrays.
RNTupleDescriptor MoveDescriptor()
RResult< void > EnsureValidDescriptor() const
Checks whether invariants hold:
RResult< void > AddClusterSummary(DescriptorId_t clusterId, std::uint64_t firstEntry, std::uint64_t nEntries)
RResult< void > EnsureFieldExists(DescriptorId_t fieldId) const
RResult< void > AddFieldLink(DescriptorId_t fieldId, DescriptorId_t linkId)
void SetNTuple(const std::string_view name, const std::string_view description)
void Reset()
Clears so-far stored clusters, fields, and columns and return to a pristine ntuple descriptor.
void AddColumn(DescriptorId_t logicalId, DescriptorId_t physicalId, DescriptorId_t fieldId, const RColumnModel &model, std::uint32_t index)
void AddClusterGroup(RClusterGroupDescriptorBuilder &&clusterGroup)
RResult< void > AddClusterWithDetails(RClusterDescriptor &&clusterDesc)
Used during writing.
void AddField(const RFieldDescriptor &fieldDesc)
The on-storage meta-data of an ntuple.
std::unordered_map< DescriptorId_t, RClusterDescriptor > fClusterDescriptors
May contain only a subset of all the available clusters, e.g.
std::uint64_t fGeneration
Once constructed by an RNTupleDescriptorBuilder, the descriptor is mostly immutable except for set of...
std::uint64_t fNEntries
Updated by the descriptor builder when the cluster summaries are added.
DescriptorId_t FindPhysicalColumnId(DescriptorId_t fieldId, std::uint32_t columnIndex) const
NTupleSize_t GetNElements(DescriptorId_t physicalColumnId) const
std::unique_ptr< RNTupleModel > GenerateModel() const
Re-create the C++ model from the stored meta-data.
DescriptorId_t FindLogicalColumnId(DescriptorId_t fieldId, std::uint32_t columnIndex) const
std::unordered_map< DescriptorId_t, RClusterGroupDescriptor > fClusterGroupDescriptors
DescriptorId_t FindNextClusterId(DescriptorId_t clusterId) const
DescriptorId_t FindPrevClusterId(DescriptorId_t clusterId) const
RResult< void > DropClusterDetails(DescriptorId_t clusterId)
DescriptorId_t GetFieldZeroId() const
Returns the logical parent of all top-level NTuple data fields.
std::unordered_map< DescriptorId_t, RColumnDescriptor > fColumnDescriptors
std::unique_ptr< RNTupleDescriptor > Clone() const
DescriptorId_t FindClusterId(DescriptorId_t physicalColumnId, NTupleSize_t index) const
std::string fName
The ntuple name needs to be unique in a given storage location (file)
const RClusterDescriptor & GetClusterDescriptor(DescriptorId_t clusterId) const
std::unordered_map< DescriptorId_t, RFieldDescriptor > fFieldDescriptors
bool operator==(const RNTupleDescriptor &other) const
std::string GetQualifiedFieldName(DescriptorId_t fieldId) const
Walks up the parents of the field ID and returns a field name of the form a.b.c.d In case of invalid ...
DescriptorId_t FindFieldId(std::string_view fieldName, DescriptorId_t parentId) const
RResult< void > AddClusterDetails(RClusterDescriptor &&clusterDesc)
Methods to load and drop cluster details.
const RFieldDescriptor & GetFieldDescriptor(DescriptorId_t fieldId) const
const RClusterGroupDescriptor & GetClusterGroupDescriptor(DescriptorId_t clusterGroupId) const
std::string fDescription
Free text from the user.
static std::unique_ptr< RNTupleModel > Create()
RResult<void> has no data member and no Inspect() method but instead a Success() factory method.
The class is used as a return type for operations that can fail; wraps a value of type T or an RError...
basic_string_view< char > string_view
void swap(RDirectoryEntry &e1, RDirectoryEntry &e2) noexcept
std::uint64_t NTupleSize_t
Integer type long enough to hold the maximum number of entries in a column.
std::uint64_t DescriptorId_t
Distriniguishes elements of the same type within a descriptor, e.g. different fields.
constexpr DescriptorId_t kInvalidDescriptorId
static constexpr double pi
The window of element indexes of a particular column in a particular cluster.
Wrap the 32bit integer in a struct in order to avoid template specialization clash with std::uint32_t...