65std::unique_ptr<ROOT::Experimental::Detail::RFieldBase>
71 std::vector<std::unique_ptr<Detail::RFieldBase>> memberFields;
72 for (
auto id : fLinkIds) {
74 memberFields.emplace_back(memberDesc.CreateField(ntplDesc));
76 auto recordField = std::make_unique<RRecordField>(
"_0", memberFields);
77 auto collectionField = std::make_unique<RVectorField>(GetFieldName(), std::move(recordField));
78 collectionField->SetOnDiskId(fFieldId);
79 return collectionField;
83 field->SetOnDiskId(fFieldId);
84 for (
auto &
f : *field)
85 f.SetOnDiskId(ntplDesc.
FindFieldId(
f.GetName(),
f.GetParent()->GetOnDiskId()));
122 decltype(idxInCluster) firstInPage = 0;
124 for (
const auto &pi : fPageInfos) {
125 if (firstInPage + pi.fNElements > idxInCluster) {
148 EnsureHasPageLocations();
149 std::unordered_set<DescriptorId_t>
result;
150 for (
const auto &
x : fColumnRanges)
158 EnsureHasPageLocations();
159 return fColumnRanges.find(columnId) != fColumnRanges.end();
165 EnsureHasPageLocations();
166 std::uint64_t nbytes = 0;
167 for (
const auto &pr : fPageRanges) {
168 for (
const auto &pi : pr.second.fPageInfos) {
169 nbytes += pi.fLocator.fBytesOnStorage;
177 if (!fHasPageLocations)
178 throw RException(
R__FAIL(
"invalid attempt to access page locations of summary-only cluster descriptor"));
189 for (
const auto &
d : fPageRanges)
210 for (
const auto &cd : fClusterDescriptors) {
211 if (!cd.second.ContainsColumn(columnId))
213 auto columnRange = cd.second.GetColumnRange(columnId);
214 result = std::max(
result, columnRange.fFirstElementIndex + columnRange.fNElements);
223 std::string leafName(fieldName);
224 auto posDot = leafName.find_last_of(
'.');
225 if (posDot != std::string::npos) {
226 auto parentName = leafName.substr(0, posDot);
227 leafName = leafName.substr(posDot + 1);
228 parentId = FindFieldId(parentName, parentId);
230 for (
const auto &fd : fFieldDescriptors) {
231 if (fd.second.GetParentId() == parentId && fd.second.GetFieldName() == leafName)
232 return fd.second.GetId();
243 const auto &fieldDescriptor = fFieldDescriptors.at(fieldId);
244 auto prefix = GetQualifiedFieldName(fieldDescriptor.GetParentId());
246 return fieldDescriptor.GetFieldName();
247 return prefix +
"." + fieldDescriptor.GetFieldName();
261 return FindFieldId(fieldName, GetFieldZeroId());
268 for (
const auto &cd : fColumnDescriptors) {
269 if (cd.second.GetFieldId() == fieldId && cd.second.GetIndex() == columnIndex)
270 return cd.second.GetId();
279 for (
const auto &cd : fClusterDescriptors) {
280 if (!cd.second.ContainsColumn(columnId))
282 auto columnRange = cd.second.GetColumnRange(columnId);
283 if (columnRange.Contains(
index))
284 return cd.second.GetId();
294 const auto &clusterDesc = GetClusterDescriptor(clusterId);
295 auto firstEntryInNextCluster = clusterDesc.GetFirstEntryIndex() + clusterDesc.GetNEntries();
297 for (
const auto &cd : fClusterDescriptors) {
298 if (cd.second.GetFirstEntryIndex() == firstEntryInNextCluster)
299 return cd.second.GetId();
309 const auto &clusterDesc = GetClusterDescriptor(clusterId);
311 for (
const auto &cd : fClusterDescriptors) {
312 if (cd.second.GetFirstEntryIndex() + cd.second.GetNEntries() == clusterDesc.GetFirstEntryIndex())
313 return cd.second.GetId();
321 auto iter = fClusterDescriptors.find(clusterDesc.GetId());
322 if (iter == fClusterDescriptors.end())
323 return R__FAIL(
"invalid attempt to add cluster details without known cluster summary");
324 if (iter->second.HasPageLocations())
325 return R__FAIL(
"invalid attempt to re-populate page list");
326 if (!clusterDesc.HasPageLocations())
327 return R__FAIL(
"provided cluster descriptor does not contain page locations");
328 iter->second = std::move(clusterDesc);
334 auto iter = fClusterDescriptors.find(clusterId);
335 if (iter == fClusterDescriptors.end())
336 return R__FAIL(
"invalid attempt to drop cluster details of unknown cluster");
337 if (!iter->second.HasPageLocations())
338 return R__FAIL(
"invalid attempt to drop details of cluster summary");
339 iter->second =
RClusterDescriptor(clusterId, iter->second.GetFirstEntryIndex(), iter->second.GetNEntries());
346 model->GetFieldZero()->SetOnDiskId(GetFieldZeroId());
347 for (
const auto &topDesc : GetTopLevelFields())
348 model->AddField(topDesc.CreateField(*
this));
355 auto clone = std::make_unique<RNTupleDescriptor>();
356 clone->fName = fName;
357 clone->fDescription = fDescription;
358 clone->fOnDiskHeaderSize = fOnDiskHeaderSize;
359 clone->fOnDiskFooterSize = fOnDiskFooterSize;
360 clone->fNEntries = fNEntries;
361 clone->fGeneration = fGeneration;
362 for (
const auto &
d : fFieldDescriptors)
363 clone->fFieldDescriptors.emplace(
d.first,
d.second.Clone());
364 for (
const auto &
d : fColumnDescriptors)
365 clone->fColumnDescriptors.emplace(
d.first,
d.second.Clone());
366 for (
const auto &
d : fClusterGroupDescriptors)
367 clone->fClusterGroupDescriptors.emplace(
d.first,
d.second.Clone());
368 for (
const auto &
d : fClusterDescriptors)
369 clone->fClusterDescriptors.emplace(
d.first,
d.second.Clone());
401 DescriptorId_t columnId, std::uint64_t firstElementIndex, std::uint32_t compressionSettings,
405 return R__FAIL(
"column ID mismatch");
406 if (fCluster.fPageRanges.count(columnId) > 0)
407 return R__FAIL(
"column ID conflict");
409 columnRange.fCompressionSettings = compressionSettings;
411 columnRange.fNElements += pi.fNElements;
413 fCluster.fPageRanges[columnId] = pageRange.
Clone();
414 fCluster.fColumnRanges[columnId] = columnRange;
423 return R__FAIL(
"unset cluster ID");
424 if (fCluster.fNEntries == 0)
425 return R__FAIL(
"empty cluster");
426 for (
const auto &pr : fCluster.fPageRanges) {
427 if (fCluster.fColumnRanges.count(pr.first) == 0) {
428 return R__FAIL(
"missing column range");
431 fCluster.fHasPageLocations =
true;
433 std::swap(
result, fCluster);
437std::vector<ROOT::Experimental::RClusterDescriptorBuilder>
442 std::vector<RClusterDescriptorBuilder>
result;
443 for (
auto clusterId : clusterGroupDesc.fClusterIds) {
456 return R__FAIL(
"unset cluster group ID");
458 std::swap(
result, fClusterGroup);
468 return R__FAIL(
"unset column group ID");
470 std::swap(
result, fColumnGroup);
478 if (fDescriptor.fFieldDescriptors.count(fieldId) == 0)
479 return R__FAIL(
"field with id '" + std::to_string(fieldId) +
"' doesn't exist");
491 for (
const auto& key_val: fDescriptor.fFieldDescriptors) {
492 const auto&
id = key_val.first;
493 const auto& desc = key_val.second;
496 return R__FAIL(
"field with id '" + std::to_string(
id) +
"' has an invalid parent id");
505 std::swap(
result, fDescriptor);
510 const std::string_view description)
512 fDescriptor.fName = std::string(
name);
513 fDescriptor.fDescription = std::string(description);
520 return R__FAIL(
"invalid column id");
522 return R__FAIL(
"invalid column model");
524 return R__FAIL(
"invalid field id, dangling column");
525 return fColumn.Clone();
551 return R__FAIL(
"invalid field id");
554 return R__FAIL(
"invalid field structure");
563 return fField.Clone();
567 fDescriptor.fFieldDescriptors.emplace(fieldDesc.
GetId(), fieldDesc.
Clone());
574 if (!(fieldExists = EnsureFieldExists(fieldId)))
576 if (!(fieldExists = EnsureFieldExists(linkId)))
577 return R__FAIL(
"child field with id '" + std::to_string(linkId) +
"' doesn't exist in NTuple");
579 if (linkId == fDescriptor.GetFieldZeroId()) {
580 return R__FAIL(
"cannot make FieldZero a child field");
583 auto parentId = fDescriptor.fFieldDescriptors.at(linkId).GetParentId();
585 return R__FAIL(
"field '" + std::to_string(linkId) +
"' already has a parent ('" +
586 std::to_string(parentId) +
")");
588 if (fieldId == linkId) {
589 return R__FAIL(
"cannot make field '" + std::to_string(fieldId) +
"' a child of itself");
591 fDescriptor.fFieldDescriptors.at(linkId).fParentId = fieldId;
592 fDescriptor.fFieldDescriptors.at(fieldId).fLinkIds.push_back(linkId);
601 c.fFieldId = fieldId;
604 fDescriptor.fColumnDescriptors.emplace(columnId, std::move(
c));
611 const auto fieldId = columnDesc.GetFieldId();
612 const auto index = columnDesc.GetIndex();
614 auto fieldExists = EnsureFieldExists(fieldId);
618 return R__FAIL(
"column index clash");
622 return R__FAIL(
"out of bounds column index");
625 auto columnId = columnDesc.GetId();
626 fDescriptor.fColumnDescriptors.emplace(columnId, std::move(columnDesc));
633 std::uint64_t nEntries)
635 if (fDescriptor.fClusterDescriptors.count(clusterId) > 0)
636 return R__FAIL(
"cluster id clash while adding cluster summary");
637 fDescriptor.fNEntries = std::max(fDescriptor.fNEntries, firstEntry + nEntries);
638 fDescriptor.fClusterDescriptors.emplace(clusterId,
RClusterDescriptor(clusterId, firstEntry, nEntries));
644 auto id = clusterGroup.GetId();
645 fDescriptor.fClusterGroupDescriptors.emplace(
id, clusterGroup.MoveDescriptor().Unwrap());
650 fDescriptor.fName =
"";
651 fDescriptor.fDescription =
"";
652 fDescriptor.fFieldDescriptors.clear();
653 fDescriptor.fColumnDescriptors.clear();
654 fDescriptor.fClusterDescriptors.clear();
655 fDescriptor.fClusterGroupDescriptors.clear();
661 auto clusterId = clusterDesc.GetId();
662 if (fDescriptor.fClusterDescriptors.count(clusterId) > 0)
663 return R__FAIL(
"cluster id clash");
664 fDescriptor.fNEntries =
665 std::max(fDescriptor.fNEntries, clusterDesc.GetFirstEntryIndex() + clusterDesc.GetNEntries());
666 fDescriptor.fClusterDescriptors.emplace(clusterId, std::move(clusterDesc));
#define R__FORWARD_ERROR(res)
Short-hand to return an RResult<T> in an error state (i.e. after checking)
#define R__FAIL(msg)
Short-hand to return an RResult<T> in an error state; the RError is implicitly converted into RResult...
Option_t Option_t TPoint TPoint const char GetTextMagnitude GetFillStyle GetLineColor GetLineWidth GetMarkerStyle GetTextAlign GetTextColor GetTextSize void char Point_t Rectangle_t WindowAttributes_t Float_t Float_t Float_t Int_t Int_t UInt_t UInt_t Rectangle_t result
Option_t Option_t TPoint TPoint const char GetTextMagnitude GetFillStyle GetLineColor GetLineWidth GetMarkerStyle GetTextAlign GetTextColor GetTextSize void char Point_t Rectangle_t WindowAttributes_t index
virtual std::uint32_t GetFieldVersion() const
Indicates an evolution of the mapping scheme from C++ type to columns.
std::string GetDescription() const
Get the field's description.
std::string GetName() const
std::size_t GetNRepetitions() const
virtual std::uint32_t GetTypeVersion() const
Indicates an evolution of the C++ type itself.
std::string GetType() const
static RResult< void > EnsureValidFieldName(std::string_view fieldName)
Check whether a given string is a valid field name.
static RResult< std::unique_ptr< RFieldBase > > Create(const std::string &fieldName, const std::string &typeName)
Factory method to resurrect a field from the stored on-disk type information.
ENTupleStructure GetStructure() const
A helper class for piece-wise construction of an RClusterDescriptor.
RResult< void > CommitColumnRange(DescriptorId_t columnId, std::uint64_t firstElementIndex, std::uint32_t compressionSettings, const RClusterDescriptor::RPageRange &pageRange)
RResult< RClusterDescriptor > MoveDescriptor()
Move out the full cluster descriptor including page locations.
Meta-data for a set of ntuple clusters.
std::unordered_map< DescriptorId_t, RPageRange > fPageRanges
bool ContainsColumn(DescriptorId_t columnId) const
NTupleSize_t fFirstEntryIndex
Clusters can be swapped by adjusting the entry offsets.
DescriptorId_t fClusterId
std::unordered_set< DescriptorId_t > GetColumnIds() const
RClusterDescriptor Clone() const
std::unordered_map< DescriptorId_t, RColumnRange > fColumnRanges
bool operator==(const RClusterDescriptor &other) const
std::uint64_t GetBytesOnStorage() const
void EnsureHasPageLocations() const
A helper class for piece-wise construction of an RClusterGroupDescriptor.
RResult< RClusterGroupDescriptor > MoveDescriptor()
static std::vector< RClusterDescriptorBuilder > GetClusterSummaries(const RNTupleDescriptor &ntplDesc, DescriptorId_t clusterGroupId)
Used to prepare the cluster descriptor builders when loading the page locations for a certain cluster...
Clusters are stored in cluster groups.
RClusterGroupDescriptor Clone() const
std::uint32_t fPageListLength
Uncompressed size of the page list.
RNTupleLocator fPageListLocator
The page list that corresponds to the cluster group.
DescriptorId_t fClusterGroupId
bool operator==(const RClusterGroupDescriptor &other) const
std::vector< DescriptorId_t > fClusterIds
RResult< RColumnDescriptor > MakeDescriptor() const
Attempt to make a column descriptor.
Meta-data stored for every column of an ntuple.
RColumnDescriptor Clone() const
Get a copy of the descriptor.
DescriptorId_t fFieldId
Every column belongs to one and only one field.
RColumnModel fModel
Contains the column type and whether it is sorted.
std::uint32_t fIndex
A field can be serialized into several columns, which are numbered from zero to $n$.
bool operator==(const RColumnDescriptor &other) const
RResult< RColumnGroupDescriptor > MoveDescriptor()
Meta-data for a sets of columns; non-trivial column groups are used for sharded clusters.
bool operator==(const RColumnGroupDescriptor &other) const
DescriptorId_t fColumnGroupId
std::unordered_set< DescriptorId_t > fColumnIds
Holds the static meta-data of a column in a tree.
Base class for all ROOT issued exceptions.
A helper class for piece-wise construction of an RFieldDescriptor.
RFieldDescriptorBuilder & FieldName(const std::string &fieldName)
RFieldDescriptorBuilder & NRepetitions(std::uint64_t nRepetitions)
static RFieldDescriptorBuilder FromField(const Detail::RFieldBase &field)
Make a new RFieldDescriptorBuilder based off a live NTuple field.
RFieldDescriptorBuilder & Structure(const ENTupleStructure &structure)
RResult< RFieldDescriptor > MakeDescriptor() const
Attempt to make a field descriptor.
RFieldDescriptorBuilder & TypeName(const std::string &typeName)
RFieldDescriptorBuilder & TypeVersion(std::uint32_t typeVersion)
RFieldDescriptorBuilder & FieldDescription(const std::string &fieldDescription)
RFieldDescriptorBuilder & FieldVersion(std::uint32_t fieldVersion)
RFieldDescriptorBuilder()=default
Make an empty dangling field descriptor.
Meta-data stored for every field of an ntuple.
std::vector< DescriptorId_t > fLinkIds
The pointers in the other direction from parent to children.
std::uint32_t fTypeVersion
The version of the C++ type itself.
std::unique_ptr< Detail::RFieldBase > CreateField(const RNTupleDescriptor &ntplDesc) const
In general, we create a field simply from the C++ type name.
std::string fFieldDescription
Free text set by the user.
std::string fFieldName
The leaf name, not including parent fields.
std::uint32_t fFieldVersion
The version of the C++-type-to-column translation mechanics.
DescriptorId_t GetId() const
DescriptorId_t fParentId
Establishes sub field relationships, such as classes and collections.
RFieldDescriptor Clone() const
Get a copy of the descriptor.
bool operator==(const RFieldDescriptor &other) const
ENTupleStructure fStructure
The structural information carried by this field in the data model tree.
std::string fTypeName
The C++ type that was used when writing the field.
std::uint64_t fNRepetitions
The number of elements per entry for fixed-size arrays.
RNTupleDescriptor MoveDescriptor()
RResult< void > EnsureValidDescriptor() const
Checks whether invariants hold:
void AddColumn(DescriptorId_t columnId, DescriptorId_t fieldId, const RColumnModel &model, std::uint32_t index)
RResult< void > AddClusterSummary(DescriptorId_t clusterId, std::uint64_t firstEntry, std::uint64_t nEntries)
RResult< void > EnsureFieldExists(DescriptorId_t fieldId) const
RResult< void > AddFieldLink(DescriptorId_t fieldId, DescriptorId_t linkId)
void SetNTuple(const std::string_view name, const std::string_view description)
void Reset()
Clears so-far stored clusters, fields, and columns and return to a pristine ntuple descriptor.
void AddClusterGroup(RClusterGroupDescriptorBuilder &&clusterGroup)
RResult< void > AddClusterWithDetails(RClusterDescriptor &&clusterDesc)
Used during writing.
void AddField(const RFieldDescriptor &fieldDesc)
The on-storage meta-data of an ntuple.
std::unordered_map< DescriptorId_t, RClusterDescriptor > fClusterDescriptors
May contain only a subset of all the available clusters, e.g.
std::uint64_t fGeneration
Once constructed by an RNTupleDescriptorBuilder, the descriptor is mostly immutable except for set of...
std::uint64_t fNEntries
Updated by the descriptor builder when the cluster summaries are added.
std::unique_ptr< RNTupleModel > GenerateModel() const
Re-create the C++ model from the stored meta-data.
std::unordered_map< DescriptorId_t, RClusterGroupDescriptor > fClusterGroupDescriptors
DescriptorId_t FindNextClusterId(DescriptorId_t clusterId) const
DescriptorId_t FindPrevClusterId(DescriptorId_t clusterId) const
RResult< void > DropClusterDetails(DescriptorId_t clusterId)
DescriptorId_t GetFieldZeroId() const
Returns the logical parent of all top-level NTuple data fields.
std::unordered_map< DescriptorId_t, RColumnDescriptor > fColumnDescriptors
std::unique_ptr< RNTupleDescriptor > Clone() const
std::string fName
The ntuple name needs to be unique in a given storage location (file)
const RClusterDescriptor & GetClusterDescriptor(DescriptorId_t clusterId) const
std::unordered_map< DescriptorId_t, RFieldDescriptor > fFieldDescriptors
bool operator==(const RNTupleDescriptor &other) const
std::string GetQualifiedFieldName(DescriptorId_t fieldId) const
Walks up the parents of the field ID and returns a field name of the form a.b.c.d In case of invalid ...
DescriptorId_t FindFieldId(std::string_view fieldName, DescriptorId_t parentId) const
RResult< void > AddClusterDetails(RClusterDescriptor &&clusterDesc)
Methods to load and drop cluster details.
const RFieldDescriptor & GetFieldDescriptor(DescriptorId_t fieldId) const
DescriptorId_t FindColumnId(DescriptorId_t fieldId, std::uint32_t columnIndex) const
NTupleSize_t GetNElements(DescriptorId_t columnId) const
const RClusterGroupDescriptor & GetClusterGroupDescriptor(DescriptorId_t clusterGroupId) const
std::string fDescription
Free text from the user.
DescriptorId_t FindClusterId(DescriptorId_t columnId, NTupleSize_t index) const
static std::unique_ptr< RNTupleModel > Create()
The class is used as a return type for operations that can fail; wraps a value of type T or an RError...
std::uint64_t NTupleSize_t
Integer type long enough to hold the maximum number of entries in a column.
std::uint64_t DescriptorId_t
Distriniguishes elements of the same type within a descriptor, e.g. different fields.
constexpr DescriptorId_t kInvalidDescriptorId
The window of element indexes of a particular column in a particular cluster.
Wrap the 32bit integer in a struct in order to avoid template specialization clash with std::uint32_t...