17#ifndef ROOT7_RNTupleDescriptor
18#define ROOT7_RNTupleDescriptor
37#include <unordered_map>
38#include <unordered_set>
41namespace Experimental {
44class RNTupleDescriptor;
48class RColumnElementBase;
52class RColumnDescriptorBuilder;
53class RColumnGroupDescriptorBuilder;
54class RClusterDescriptorBuilder;
55class RClusterGroupDescriptorBuilder;
56class RFieldDescriptorBuilder;
57class RNTupleDescriptorBuilder;
255 std::size_t pageSize);
337 std::unordered_set<DescriptorId_t>
GetColumnIds()
const;
697 std::unique_ptr<RNTupleDescriptor>
Clone()
const;
1092 void SetNTuple(
const std::string_view
name,
const std::string_view description);
1104 const RColumnModel &model, std::uint32_t
index, std::uint64_t firstElementIdx = 0U);
#define R__FAIL(msg)
Short-hand to return an RResult<T> in an error state; the RError is implicitly converted into RResult...
TObject * clone(const char *newname) const override
size_t size(const MatrixT &matrix)
retrieve the size of a square matrix
Option_t Option_t TPoint TPoint const char GetTextMagnitude GetFillStyle GetLineColor GetLineWidth GetMarkerStyle GetTextAlign GetTextColor GetTextSize void char Point_t Rectangle_t WindowAttributes_t index
Option_t Option_t TPoint TPoint const char GetTextMagnitude GetFillStyle GetLineColor GetLineWidth GetMarkerStyle GetTextAlign GetTextColor GetTextSize id
A helper class for piece-wise construction of an RClusterDescriptor.
RClusterDescriptorBuilder & AddDeferredColumnRanges(const RNTupleDescriptor &desc)
Add column and page ranges for deferred columns missing in this cluster.
RResult< RClusterDescriptor > MoveDescriptor()
Move out the full cluster descriptor including page locations.
RClusterDescriptorBuilder & ClusterId(DescriptorId_t clusterId)
RClusterDescriptorBuilder & NEntries(std::uint64_t nEntries)
RClusterDescriptorBuilder & FirstEntryIndex(std::uint64_t firstEntryIndex)
RClusterDescriptor fCluster
RResult< void > CommitColumnRange(DescriptorId_t physicalId, std::uint64_t firstElementIndex, std::uint32_t compressionSettings, const RClusterDescriptor::RPageRange &pageRange)
A helper class for piece-wise construction of an RClusterGroupDescriptor.
RClusterGroupDescriptorBuilder & PageListLocator(const RNTupleLocator &pageListLocator)
void AddClusters(const std::vector< DescriptorId_t > &clusterIds)
RClusterGroupDescriptorBuilder & MinEntry(std::uint64_t minEntry)
RClusterGroupDescriptorBuilder & ClusterGroupId(DescriptorId_t clusterGroupId)
RClusterGroupDescriptorBuilder & EntrySpan(std::uint64_t entrySpan)
RClusterGroupDescriptorBuilder & NClusters(std::uint32_t nClusters)
RClusterGroupDescriptorBuilder & PageListLength(std::uint64_t pageListLength)
RResult< RClusterGroupDescriptor > MoveDescriptor()
static RClusterGroupDescriptorBuilder FromSummary(const RClusterGroupDescriptor &clusterGroupDesc)
RClusterGroupDescriptor fClusterGroup
RClusterGroupDescriptorBuilder()=default
A helper class for piece-wise construction of an RColumnDescriptor.
RColumnDescriptorBuilder & PhysicalColumnId(DescriptorId_t physicalColumnId)
RColumnDescriptorBuilder()=default
Make an empty column descriptor builder.
RColumnDescriptorBuilder & Model(const RColumnModel &model)
RColumnDescriptorBuilder & FieldId(DescriptorId_t fieldId)
RColumnDescriptorBuilder & Index(std::uint32_t index)
RColumnDescriptorBuilder & FirstElementIndex(std::uint64_t firstElementIdx)
RResult< RColumnDescriptor > MakeDescriptor() const
Attempt to make a column descriptor.
DescriptorId_t GetFieldId() const
RColumnDescriptorBuilder & LogicalColumnId(DescriptorId_t logicalColumnId)
RColumnDescriptor fColumn
A column element encapsulates the translation between basic C++ types and their column representation...
A helper class for piece-wise construction of an RColumnGroupDescriptor.
RResult< RColumnGroupDescriptor > MoveDescriptor()
RColumnGroupDescriptor fColumnGroup
RColumnGroupDescriptorBuilder()=default
RColumnGroupDescriptorBuilder & ColumnGroupId(DescriptorId_t columnGroupId)
void AddColumn(DescriptorId_t physicalId)
A helper class for piece-wise construction of an RFieldDescriptor.
DescriptorId_t GetParentId() const
RFieldDescriptorBuilder & TypeVersion(std::uint32_t typeVersion)
RFieldDescriptorBuilder & NRepetitions(std::uint64_t nRepetitions)
RFieldDescriptorBuilder & FieldVersion(std::uint32_t fieldVersion)
RFieldDescriptorBuilder & Structure(const ENTupleStructure &structure)
RFieldDescriptorBuilder & TypeName(const std::string &typeName)
static RFieldDescriptorBuilder FromField(const RFieldBase &field)
Make a new RFieldDescriptorBuilder based off a live NTuple field.
RResult< RFieldDescriptor > MakeDescriptor() const
Attempt to make a field descriptor.
RFieldDescriptorBuilder & FieldName(const std::string &fieldName)
RFieldDescriptorBuilder & ParentId(DescriptorId_t id)
RFieldDescriptorBuilder()=default
Make an empty dangling field descriptor.
RFieldDescriptorBuilder & TypeAlias(const std::string &typeAlias)
RFieldDescriptorBuilder & FieldId(DescriptorId_t fieldId)
RFieldDescriptorBuilder & FieldDescription(const std::string &fieldDescription)
A helper class for piece-wise construction of an RNTupleDescriptor.
void BeginHeaderExtension()
Mark the beginning of the header extension; any fields and columns added after a call to this functio...
RNTupleDescriptor fDescriptor
RResult< void > EnsureFieldExists(DescriptorId_t fieldId) const
RResult< void > AddFieldLink(DescriptorId_t fieldId, DescriptorId_t linkId)
RResult< void > EnsureValidDescriptor() const
Checks whether invariants hold:
RResult< void > AddCluster(RClusterDescriptor &&clusterDesc)
void SetOnDiskHeaderSize(std::uint64_t size)
void AddToOnDiskFooterSize(std::uint64_t size)
The real footer size also include the page list envelopes.
void SetNTuple(const std::string_view name, const std::string_view description)
RResult< void > AddClusterGroup(RClusterGroupDescriptor &&clusterGroup)
void SetFeature(unsigned int flag)
const RNTupleDescriptor & GetDescriptor() const
void AddField(const RFieldDescriptor &fieldDesc)
void AddColumn(DescriptorId_t logicalId, DescriptorId_t physicalId, DescriptorId_t fieldId, const RColumnModel &model, std::uint32_t index, std::uint64_t firstElementIdx=0U)
RNTupleDescriptor MoveDescriptor()
void Reset()
Clears so-far stored clusters, fields, and columns and return to a pristine ntuple descriptor.
void SetOnDiskHeaderXxHash3(std::uint64_t xxhash3)
Meta-data for a set of ntuple clusters.
std::unordered_map< DescriptorId_t, RPageRange > fPageRanges
RClusterDescriptor(RClusterDescriptor &&other)=default
bool ContainsColumn(DescriptorId_t physicalId) const
RClusterDescriptor(const RClusterDescriptor &other)=delete
NTupleSize_t fFirstEntryIndex
Clusters can be swapped by adjusting the entry offsets.
RClusterDescriptor & operator=(const RClusterDescriptor &other)=delete
const RColumnRange & GetColumnRange(DescriptorId_t physicalId) const
DescriptorId_t fClusterId
NTupleSize_t GetFirstEntryIndex() const
std::unordered_set< DescriptorId_t > GetColumnIds() const
RClusterDescriptor Clone() const
std::unordered_map< DescriptorId_t, RColumnRange > fColumnRanges
bool operator==(const RClusterDescriptor &other) const
ClusterSize_t GetNEntries() const
std::uint64_t GetBytesOnStorage() const
const RPageRange & GetPageRange(DescriptorId_t physicalId) const
DescriptorId_t GetId() const
RClusterDescriptor()=default
Clusters are bundled in cluster groups.
std::uint64_t fMinEntry
The minimum first entry number of the clusters in the cluster group.
RClusterGroupDescriptor Clone() const
RClusterGroupDescriptor(const RClusterGroupDescriptor &other)=delete
RClusterGroupDescriptor & operator=(RClusterGroupDescriptor &&other)=default
std::uint64_t fEntrySpan
Number of entries that are (partially for sharded clusters) covered by this cluster group.
std::uint64_t fPageListLength
Uncompressed size of the page list.
std::uint64_t GetMinEntry() const
std::uint64_t GetPageListLength() const
RClusterGroupDescriptor CloneSummary() const
const std::vector< DescriptorId_t > & GetClusterIds() const
RClusterGroupDescriptor & operator=(const RClusterGroupDescriptor &other)=delete
std::uint32_t fNClusters
Number of clusters is always known even if the cluster IDs are not (yet) populated.
std::uint32_t GetNClusters() const
RNTupleLocator fPageListLocator
The page list that corresponds to the cluster group.
RClusterGroupDescriptor()=default
DescriptorId_t fClusterGroupId
bool HasClusterDetails() const
A cluster group is loaded in two stages.
bool operator==(const RClusterGroupDescriptor &other) const
RNTupleLocator GetPageListLocator() const
std::vector< DescriptorId_t > fClusterIds
The cluster IDs can be empty if the corresponding page list is not loaded.
RClusterGroupDescriptor(RClusterGroupDescriptor &&other)=default
std::uint64_t GetEntrySpan() const
DescriptorId_t GetId() const
Meta-data stored for every column of an ntuple.
RColumnModel GetModel() const
bool IsDeferredColumn() const
DescriptorId_t fPhysicalColumnId
Usually identical to the logical column ID, except for alias columns where it references the shadowed...
std::uint64_t fFirstElementIndex
Specifies the index for the first stored element for this column.
RColumnDescriptor(const RColumnDescriptor &other)=delete
DescriptorId_t fLogicalColumnId
The actual column identifier, which is the link to the corresponding field.
RColumnDescriptor Clone() const
Get a copy of the descriptor.
std::uint64_t GetFirstElementIndex() const
RColumnDescriptor(RColumnDescriptor &&other)=default
DescriptorId_t fFieldId
Every column belongs to one and only one field.
RColumnDescriptor & operator=(const RColumnDescriptor &other)=delete
DescriptorId_t GetFieldId() const
RColumnDescriptor()=default
RColumnModel fModel
Contains the column type and whether it is sorted.
DescriptorId_t GetLogicalId() const
bool IsAliasColumn() const
DescriptorId_t GetPhysicalId() const
std::uint32_t GetIndex() const
std::uint32_t fIndex
A field can be serialized into several columns, which are numbered from zero to $n$.
bool operator==(const RColumnDescriptor &other) const
Meta-data for a sets of columns; non-trivial column groups are used for sharded clusters.
RColumnGroupDescriptor(const RColumnGroupDescriptor &other)=delete
RColumnGroupDescriptor & operator=(const RColumnGroupDescriptor &other)=delete
std::unordered_set< DescriptorId_t > fPhysicalColumnIds
DescriptorId_t GetId() const
RColumnGroupDescriptor & operator=(RColumnGroupDescriptor &&other)=default
bool operator==(const RColumnGroupDescriptor &other) const
bool HasAllColumns() const
bool Contains(DescriptorId_t physicalId) const
RColumnGroupDescriptor()=default
DescriptorId_t fColumnGroupId
const std::unordered_set< DescriptorId_t > & GetPhysicalColumnIds() const
RColumnGroupDescriptor(RColumnGroupDescriptor &&other)=default
Holds the static meta-data of an RNTuple column.
Base class for all ROOT issued exceptions.
A field translates read and write calls from/to underlying columns to/from tree values.
Meta-data stored for every field of an ntuple.
std::vector< DescriptorId_t > fLinkIds
The pointers in the other direction from parent to children.
DescriptorId_t GetParentId() const
std::unique_ptr< RFieldBase > CreateField(const RNTupleDescriptor &ntplDesc) const
In general, we create a field simply from the C++ type name.
std::string GetTypeAlias() const
std::uint32_t fTypeVersion
The version of the C++ type itself.
std::uint32_t GetTypeVersion() const
std::string GetFieldName() const
std::string fFieldDescription
Free text set by the user.
std::string fFieldName
The leaf name, not including parent fields.
std::uint32_t GetFieldVersion() const
RFieldDescriptor()=default
std::uint32_t fFieldVersion
The version of the C++-type-to-column translation mechanics.
DescriptorId_t GetId() const
const std::vector< DescriptorId_t > & GetLinkIds() const
std::string GetFieldDescription() const
std::string GetTypeName() const
RFieldDescriptor(const RFieldDescriptor &other)=delete
DescriptorId_t fParentId
Establishes sub field relationships, such as classes and collections.
std::uint64_t GetNRepetitions() const
RFieldDescriptor Clone() const
Get a copy of the descriptor.
bool operator==(const RFieldDescriptor &other) const
std::string fTypeAlias
A typedef or using directive that resolved to the type name during field creation.
ENTupleStructure fStructure
The structural information carried by this field in the data model tree.
ENTupleStructure GetStructure() const
RFieldDescriptor & operator=(const RFieldDescriptor &other)=delete
RFieldDescriptor(RFieldDescriptor &&other)=default
std::string fTypeName
The C++ type that was used when writing the field.
std::uint64_t fNRepetitions
The number of elements per entry for fixed-size arrays.
std::forward_iterator_tag iterator_category
bool operator!=(const iterator &rh) const
RIterator(const RNTupleDescriptor &ntuple, std::size_t index)
bool operator==(const iterator &rh) const
const RNTupleDescriptor & fNTuple
The enclosing range's NTuple.
std::ptrdiff_t difference_type
Used to loop over all the clusters of an ntuple (in unspecified order)
RClusterDescriptorIterable(const RNTupleDescriptor &ntuple)
const RNTupleDescriptor & fNTuple
The associated NTuple for this range.
std::forward_iterator_tag iterator_category
std::ptrdiff_t difference_type
RIterator(const RNTupleDescriptor &ntuple, std::size_t index)
const RNTupleDescriptor & fNTuple
The enclosing range's NTuple.
bool operator!=(const iterator &rh) const
bool operator==(const iterator &rh) const
Used to loop over all the cluster groups of an ntuple (in unspecified order)
RClusterGroupDescriptorIterable(const RNTupleDescriptor &ntuple)
const RNTupleDescriptor & fNTuple
The associated NTuple for this range.
RIterator(const RNTupleDescriptor &ntuple, const std::vector< DescriptorId_t > &columns, std::size_t index)
bool operator==(const iterator &rh) const
bool operator!=(const iterator &rh) const
std::ptrdiff_t difference_type
const std::vector< DescriptorId_t > & fColumns
The enclosing range's descriptor id list.
std::forward_iterator_tag iterator_category
const RNTupleDescriptor & fNTuple
The enclosing range's NTuple.
Used to loop over a field's associated columns.
const RNTupleDescriptor & fNTuple
The associated NTuple for this range.
std::vector< DescriptorId_t > fColumns
The descriptor ids of the columns ordered by index id.
void CollectColumnIds(DescriptorId_t fieldId)
const std::vector< DescriptorId_t > & fFieldChildren
The enclosing range's descriptor id list.
bool operator!=(const iterator &rh) const
const RNTupleDescriptor & fNTuple
The enclosing range's NTuple.
bool operator==(const iterator &rh) const
std::forward_iterator_tag iterator_category
RIterator(const RNTupleDescriptor &ntuple, const std::vector< DescriptorId_t > &fieldChildren, std::size_t index)
std::ptrdiff_t difference_type
Used to loop over a field's child fields.
std::vector< DescriptorId_t > fFieldChildren
The descriptor ids of the child fields.
const RNTupleDescriptor & fNTuple
The associated NTuple for this range.
RFieldDescriptorIterable(const RNTupleDescriptor &ntuple, const RFieldDescriptor &field, const std::function< bool(DescriptorId_t, DescriptorId_t)> &comparator)
Sort the range using an arbitrary comparison function.
RFieldDescriptorIterable(const RNTupleDescriptor &ntuple, const RFieldDescriptor &field)
The on-storage meta-data of an ntuple.
std::uint64_t fNPhysicalColumns
Updated by the descriptor builder when columns are added.
std::unordered_map< DescriptorId_t, RClusterDescriptor > fClusterDescriptors
May contain only a subset of all the available clusters, e.g.
std::uint64_t fGeneration
Once constructed by an RNTupleDescriptorBuilder, the descriptor is mostly immutable except for set of...
std::uint64_t fOnDiskFooterSize
Like fOnDiskHeaderSize, contains both cluster summaries and page locations.
std::size_t GetNLogicalColumns() const
std::uint64_t fNEntries
Updated by the descriptor builder when the cluster groups are added.
DescriptorId_t FindPhysicalColumnId(DescriptorId_t fieldId, std::uint32_t columnIndex) const
std::set< unsigned int > fFeatureFlags
NTupleSize_t GetNElements(DescriptorId_t physicalColumnId) const
DescriptorId_t FindLogicalColumnId(DescriptorId_t fieldId, std::uint32_t columnIndex) const
RClusterGroupDescriptorIterable GetClusterGroupIterable() const
std::unordered_map< DescriptorId_t, RClusterGroupDescriptor > fClusterGroupDescriptors
DescriptorId_t FindNextClusterId(DescriptorId_t clusterId) const
DescriptorId_t FindPrevClusterId(DescriptorId_t clusterId) const
std::string GetDescription() const
std::string GetName() const
std::uint64_t GetOnDiskHeaderXxHash3() const
RFieldDescriptorIterable GetFieldIterable(const RFieldDescriptor &fieldDesc, const std::function< bool(DescriptorId_t, DescriptorId_t)> &comparator) const
std::uint64_t GetOnDiskHeaderSize() const
std::size_t GetNClusters() const
DescriptorId_t GetFieldZeroId() const
Returns the logical parent of all top-level NTuple data fields.
RColumnDescriptorIterable GetColumnIterable(const RFieldDescriptor &fieldDesc) const
std::unordered_map< DescriptorId_t, RColumnDescriptor > fColumnDescriptors
std::unique_ptr< RNTupleDescriptor > Clone() const
DescriptorId_t FindClusterId(DescriptorId_t physicalColumnId, NTupleSize_t index) const
RNTupleDescriptor(RNTupleDescriptor &&other)=default
std::uint64_t fNClusters
Updated by the descriptor builder when the cluster groups are added.
std::string fName
The ntuple name needs to be unique in a given storage location (file)
RFieldDescriptorIterable GetTopLevelFields() const
const RClusterDescriptor & GetClusterDescriptor(DescriptorId_t clusterId) const
RNTupleDescriptor(const RNTupleDescriptor &other)=delete
std::uint64_t GetGeneration() const
std::unordered_map< DescriptorId_t, RFieldDescriptor > fFieldDescriptors
RFieldDescriptorIterable GetFieldIterable(const RFieldDescriptor &fieldDesc) const
RNTupleDescriptor()=default
static constexpr unsigned int kFeatureFlagTest
RNTupleDescriptor & operator=(RNTupleDescriptor &&other)=default
std::uint64_t fOnDiskHeaderXxHash3
Set by the descriptor builder when deserialized.
NTupleSize_t GetNEntries() const
We know the number of entries from adding the cluster summaries.
RFieldDescriptorIterable GetTopLevelFields(const std::function< bool(DescriptorId_t, DescriptorId_t)> &comparator) const
std::size_t GetNFields() const
RFieldDescriptorIterable GetFieldIterable(DescriptorId_t fieldId) const
bool operator==(const RNTupleDescriptor &other) const
std::string GetQualifiedFieldName(DescriptorId_t fieldId) const
Walks up the parents of the field ID and returns a field name of the form a.b.c.d In case of invalid ...
RResult< void > AddClusterGroupDetails(DescriptorId_t clusterGroupId, std::vector< RClusterDescriptor > &clusterDescs)
Methods to load and drop cluster group details (cluster IDs and page locations)
DescriptorId_t FindFieldId(std::string_view fieldName, DescriptorId_t parentId) const
std::uint64_t GetOnDiskFooterSize() const
const RColumnDescriptor & GetColumnDescriptor(DescriptorId_t columnId) const
RClusterDescriptorIterable GetClusterIterable() const
std::size_t GetNClusterGroups() const
const RFieldDescriptor & GetFieldDescriptor(DescriptorId_t fieldId) const
std::unique_ptr< RNTupleModel > CreateModel() const
Re-create the C++ model from the stored meta-data.
RResult< void > DropClusterGroupDetails(DescriptorId_t clusterGroupId)
std::unique_ptr< RHeaderExtension > fHeaderExtension
const RClusterGroupDescriptor & GetClusterGroupDescriptor(DescriptorId_t clusterGroupId) const
RColumnDescriptorIterable GetColumnIterable() const
bool HasFeature(unsigned int flag) const
std::size_t GetNPhysicalColumns() const
RNTupleDescriptor & operator=(const RNTupleDescriptor &other)=delete
std::size_t GetNActiveClusters() const
std::string fDescription
Free text from the user.
RColumnDescriptorIterable GetColumnIterable(DescriptorId_t fieldId) const
const RHeaderExtension * GetHeaderExtension() const
Return header extension information; if the descriptor does not have a header extension,...
std::uint64_t fOnDiskHeaderSize
Set by the descriptor builder when deserialized.
const RFieldDescriptor & GetFieldZero() const
void PrintInfo(std::ostream &output) const
RFieldDescriptorIterable GetFieldIterable(DescriptorId_t fieldId, const std::function< bool(DescriptorId_t, DescriptorId_t)> &comparator) const
std::vector< std::uint64_t > GetFeatureFlags() const
The class is used as a return type for operations that can fail; wraps a value of type T or an RError...
std::uint64_t NTupleSize_t
Integer type long enough to hold the maximum number of entries in a column.
ENTupleStructure
The fields in the ntuple model tree can carry different structural information about the type system.
constexpr int kUnknownCompressionSettings
std::uint64_t DescriptorId_t
Distriniguishes elements of the same type within a descriptor, e.g. different fields.
constexpr NTupleSize_t kInvalidNTupleIndex
constexpr ClusterSize_t kInvalidClusterIndex(std::uint64_t(-1))
constexpr DescriptorId_t kInvalidDescriptorId
tbb::task_arena is an alias of tbb::interface7::task_arena, which doesn't allow to forward declare tb...
The window of element indexes of a particular column in a particular cluster.
bool Contains(NTupleSize_t index) const
NTupleSize_t fFirstElementIndex
A 64bit element index.
int fCompressionSettings
The usual format for ROOT compression settings (see Compression.h).
ClusterSize_t fNElements
The number of column elements in the cluster.
bool operator==(const RColumnRange &other) const
DescriptorId_t fPhysicalColumnId
Wrap the integer in a struct in order to avoid template specialization clash with std::uint64_t.
Generic information about the physical location of data.