Logo ROOT  
Reference Guide
Loading...
Searching...
No Matches
RNTupleDescriptor.hxx
Go to the documentation of this file.
1/// \file ROOT/RNTupleDescriptor.hxx
2/// \author Jakob Blomer <jblomer@cern.ch>
3/// \author Javier Lopez-Gomez <javier.lopez.gomez@cern.ch>
4/// \date 2018-07-19
5
6/*************************************************************************
7 * Copyright (C) 1995-2019, Rene Brun and Fons Rademakers. *
8 * All rights reserved. *
9 * *
10 * For the licensing terms see $ROOTSYS/LICENSE. *
11 * For the list of contributors see $ROOTSYS/README/CREDITS. *
12 *************************************************************************/
13
14#ifndef ROOT_RNTupleDescriptor
15#define ROOT_RNTupleDescriptor
16
18#include <ROOT/RError.hxx>
20#include <ROOT/RNTupleTypes.hxx>
21#include <ROOT/RSpan.hxx>
22
23#include <TError.h>
24
25#include <algorithm>
26#include <chrono>
27#include <cmath>
28#include <functional>
29#include <iterator>
30#include <map>
31#include <memory>
32#include <optional>
33#include <ostream>
34#include <vector>
35#include <set>
36#include <string>
37#include <string_view>
38#include <unordered_map>
39#include <unordered_set>
40
41namespace ROOT {
42
43class RFieldBase;
44class RNTupleModel;
45
46namespace Internal {
48}
49
51
52namespace Internal {
59
60RNTupleDescriptor CloneDescriptorSchema(const RNTupleDescriptor &desc);
65
66std::vector<ROOT::Internal::RNTupleClusterBoundaries> GetClusterBoundaries(const RNTupleDescriptor &desc);
67} // namespace Internal
68
69namespace Experimental {
70
71// clang-format off
72/**
73\class ROOT::Experimental::RNTupleAttrSetDescriptor
74\ingroup NTuple
75\brief Metadata stored for every Attribute Set linked to an RNTuple.
76*/
77// clang-format on
80
81 std::uint16_t fSchemaVersionMajor = 0;
82 std::uint16_t fSchemaVersionMinor = 0;
83 std::uint32_t fAnchorLength = 0; ///< uncompressed size of the linked anchor
84 // The locator of the AttributeSet anchor.
85 // In case of kTypeFile, it points to the beginning of the Anchor's payload.
86 // NOTE: Only kTypeFile is supported at the moment.
88 std::string fName;
89
90public:
96
97 bool operator==(const RNTupleAttrSetDescriptor &other) const;
98 bool operator!=(const RNTupleAttrSetDescriptor &other) const { return !(*this == other); }
99
100 const std::string &GetName() const { return fName; }
101 std::uint16_t GetSchemaVersionMajor() const { return fSchemaVersionMajor; }
102 std::uint16_t GetSchemaVersionMinor() const { return fSchemaVersionMinor; }
103 std::uint32_t GetAnchorLength() const { return fAnchorLength; }
105
107};
108
109class RNTupleAttrSetDescriptorIterable;
110
111} // namespace Experimental
112
113// clang-format off
114/**
115\class ROOT::RFieldDescriptor
116\ingroup NTuple
117\brief Metadata stored for every field of an RNTuple
118*/
119// clang-format on
120class RFieldDescriptor final {
123
124private:
126 /// The version of the C++-type-to-column translation mechanics
127 std::uint32_t fFieldVersion = 0;
128 /// The version of the C++ type itself
129 std::uint32_t fTypeVersion = 0;
130 /// The leaf name, not including parent fields
131 std::string fFieldName;
132 /// Free text set by the user
133 std::string fFieldDescription;
134 /// The C++ type that was used when writing the field
135 std::string fTypeName;
136 /// A typedef or using directive that resolved to the type name during field creation
137 std::string fTypeAlias;
138 /// The number of elements per entry for fixed-size arrays
139 std::uint64_t fNRepetitions = 0;
140 /// The structural information carried by this field in the data model tree
142 /// Establishes sub field relationships, such as classes and collections
144 /// For projected fields, the source field ID
146 /// The pointers in the other direction from parent to children. They are serialized, too, to keep the
147 /// order of sub fields.
148 std::vector<ROOT::DescriptorId_t> fLinkIds;
149 /// The number of columns in the column representations of the field. The column cardinality helps to navigate the
150 /// list of logical column ids. For example, the second column of the third column representation is
151 /// fLogicalColumnIds[2 * fColumnCardinality + 1]
152 std::uint32_t fColumnCardinality = 0;
153 /// The ordered list of columns attached to this field: first by representation index then by column index.
154 std::vector<ROOT::DescriptorId_t> fLogicalColumnIds;
155 /// For custom classes, we store the ROOT TClass reported checksum to facilitate the use of I/O rules that
156 /// identify types by their checksum
157 std::optional<std::uint32_t> fTypeChecksum;
158 /// Indicates if this is a collection that should be represented in memory by a SoA layout.
159 bool fIsSoACollection = false;
160
161public:
162 RFieldDescriptor() = default;
163 RFieldDescriptor(const RFieldDescriptor &other) = delete;
167
168 bool operator==(const RFieldDescriptor &other) const;
169 /// Get a copy of the descriptor
170 RFieldDescriptor Clone() const;
171
172 /// In general, we create a field simply from the C++ type name. For untyped fields, however, we potentially need
173 /// access to sub fields, which is provided by the RNTupleDescriptor argument.
174 std::unique_ptr<ROOT::RFieldBase>
175 CreateField(const RNTupleDescriptor &ntplDesc, const ROOT::RCreateFieldOptions &options = {}) const;
176
178 std::uint32_t GetFieldVersion() const { return fFieldVersion; }
179 std::uint32_t GetTypeVersion() const { return fTypeVersion; }
180 const std::string &GetFieldName() const { return fFieldName; }
181 const std::string &GetFieldDescription() const { return fFieldDescription; }
182 const std::string &GetTypeName() const { return fTypeName; }
183 const std::string &GetTypeAlias() const { return fTypeAlias; }
184 std::uint64_t GetNRepetitions() const { return fNRepetitions; }
188 const std::vector<ROOT::DescriptorId_t> &GetLinkIds() const { return fLinkIds; }
189 const std::vector<ROOT::DescriptorId_t> &GetLogicalColumnIds() const { return fLogicalColumnIds; }
190 std::uint32_t GetColumnCardinality() const { return fColumnCardinality; }
191 std::optional<std::uint32_t> GetTypeChecksum() const { return fTypeChecksum; }
193 bool IsSoACollection() const { return fIsSoACollection; }
194};
195
196// clang-format off
197/**
198\class ROOT::RColumnDescriptor
199\ingroup NTuple
200\brief Metadata stored for every column of an RNTuple
201*/
202// clang-format on
203class RColumnDescriptor final {
206
207public:
208 struct RValueRange {
209 double fMin = 0, fMax = 0;
210
211 RValueRange() = default;
212 RValueRange(double min, double max) : fMin(min), fMax(max) {}
213 RValueRange(std::pair<double, double> range) : fMin(range.first), fMax(range.second) {}
214
215 bool operator==(RValueRange other) const { return fMin == other.fMin && fMax == other.fMax; }
216 bool operator!=(RValueRange other) const { return !(*this == other); }
217 };
218
219private:
220 /// The actual column identifier, which is the link to the corresponding field
222 /// Usually identical to the logical column ID, except for alias columns where it references the shadowed column
224 /// Every column belongs to one and only one field
226 /// The absolute value specifies the index for the first stored element for this column.
227 /// For deferred columns the absolute value is larger than zero.
228 /// Negative values specify a suppressed and deferred column.
229 std::int64_t fFirstElementIndex = 0U;
230 /// A field can be serialized into several columns, which are numbered from zero to $n$
231 std::uint32_t fIndex = 0;
232 /// A field may use multiple column representations, which are numbered from zero to $m$.
233 /// Every representation has the same number of columns.
234 std::uint16_t fRepresentationIndex = 0;
235 /// The size in bits of elements of this column. Most columns have the size fixed by their type
236 /// but low-precision float columns have variable bit widths.
237 std::uint16_t fBitsOnStorage = 0;
238 /// The on-disk column type
240 /// Optional value range (used e.g. by quantized real fields)
241 std::optional<RValueRange> fValueRange;
242
243public:
244 RColumnDescriptor() = default;
245 RColumnDescriptor(const RColumnDescriptor &other) = delete;
249
250 bool operator==(const RColumnDescriptor &other) const;
251 /// Get a copy of the descriptor
252 RColumnDescriptor Clone() const;
253
257 std::uint32_t GetIndex() const { return fIndex; }
258 std::uint16_t GetRepresentationIndex() const { return fRepresentationIndex; }
259 std::uint64_t GetFirstElementIndex() const { return std::abs(fFirstElementIndex); }
260 std::uint16_t GetBitsOnStorage() const { return fBitsOnStorage; }
262 std::optional<RValueRange> GetValueRange() const { return fValueRange; }
264 bool IsDeferredColumn() const { return fFirstElementIndex != 0; }
266};
267
268// clang-format off
269/**
270\class ROOT::RClusterDescriptor
271\ingroup NTuple
272\brief Metadata for RNTuple clusters
273
274The cluster descriptor is built in two phases. In a first phase, the descriptor has only an ID.
275In a second phase, the event range, column group, page locations and column ranges are added.
276Both phases are populated by the RClusterDescriptorBuilder.
277Clusters span across all available columns in the RNTuple.
278*/
279// clang-format on
282
283public:
284 // clang-format off
285 /**
286 \class ROOT::RClusterDescriptor::RColumnRange
287 \ingroup NTuple
288 \brief The window of element indexes of a particular column in a particular cluster
289 */
290 // clang-format on
291 class RColumnRange final {
293 /// The global index of the first column element in the cluster
295 /// The number of column elements in the cluster
297 /// The usual format for ROOT compression settings (see Compression.h).
298 /// The pages of a particular column in a particular cluster are all compressed with the same settings.
299 /// If unset, the compression settings are undefined (deferred columns, suppressed columns).
300 std::optional<std::uint32_t> fCompressionSettings;
301 /// Suppressed columns have an empty page range and unknown compression settings.
302 /// Their element index range, however, is aligned with the corresponding column of the
303 /// primary column representation (see Section "Suppressed Columns" in the specification)
304 bool fIsSuppressed = false;
305
306 // TODO(jblomer): we perhaps want to store summary information, such as average, min/max, etc.
307 // Should this be done on the field level?
308
309 public:
310 RColumnRange() = default;
311
312 RColumnRange(ROOT::DescriptorId_t physicalColumnId, ROOT::NTupleSize_t firstElementIndex,
313 ROOT::NTupleSize_t nElements, std::optional<std::uint32_t> compressionSettings,
314 bool suppressed = false)
315 : fPhysicalColumnId(physicalColumnId),
316 fFirstElementIndex(firstElementIndex),
317 fNElements(nElements),
318 fCompressionSettings(compressionSettings),
319 fIsSuppressed(suppressed)
320 {
321 }
322
325
329
333
334 std::optional<std::uint32_t> GetCompressionSettings() const { return fCompressionSettings; }
335 void SetCompressionSettings(std::optional<std::uint32_t> comp) { fCompressionSettings = comp; }
336
337 bool IsSuppressed() const { return fIsSuppressed; }
338 void SetIsSuppressed(bool suppressed) { fIsSuppressed = suppressed; }
339
340 bool operator==(const RColumnRange &other) const
341 {
345 }
346
347 bool Contains(ROOT::NTupleSize_t index) const
348 {
349 return (fFirstElementIndex <= index && (fFirstElementIndex + fNElements) > index);
350 }
351 };
352
353 // clang-format off
354 /**
355 \class ROOT::RClusterDescriptor::RPageInfo
356 \ingroup NTuple
357 \brief Information about a single page in the context of a cluster's page range.
358 */
359 // clang-format on
360 // NOTE: We do not need to store the element size / uncompressed page size because we know to which column
361 // the page belongs
362 struct RPageInfo {
363 private:
364 /// The meaning of `fLocator` depends on the storage backend.
366 /// The sum of the elements of all the pages must match the corresponding `fNElements` field in `fColumnRanges`
367 std::uint32_t fNElements = std::uint32_t(-1);
368 /// If true, the 8 bytes following the serialized page are an xxhash of the on-disk page data
369 bool fHasChecksum = false;
370
371 public:
372 RPageInfo() = default;
373 RPageInfo(std::uint32_t nElements, const RNTupleLocator &locator, bool hasChecksum)
374 : fLocator(locator), fNElements(nElements), fHasChecksum(hasChecksum)
375 {
376 }
377
378 bool operator==(const RPageInfo &other) const
379 {
380 return fLocator == other.fLocator && fNElements == other.fNElements;
381 }
382
383 const RNTupleLocator &GetLocator() const { return fLocator; }
385 void SetLocator(const RNTupleLocator &locator) { fLocator = locator; }
386
387 std::uint32_t GetNElements() const { return fNElements; }
388 void SetNElements(std::uint32_t n) { fNElements = n; }
389
390 bool HasChecksum() const { return fHasChecksum; }
391 void SetHasChecksum(bool hasChecksum) { fHasChecksum = hasChecksum; }
392 };
393
394 // clang-format off
395 /**
396 \class ROOT::RClusterDescriptor::RPageInfoExtended
397 \ingroup NTuple
398 \brief Additional information about a page in an in-memory RPageRange.
399
400 Used by RPageRange::Find() to return information relative to the RPageRange. This information is not stored on disk
401 and we don't need to keep it in memory because it can be easily recomputed.
402 */
403 // clang-format on
405 private:
406 /// Index (in cluster) of the first element in page.
408 /// Page number in the corresponding RPageRange.
410
411 public:
412 RPageInfoExtended() = default;
413 RPageInfoExtended(const RPageInfo &pageInfo, ROOT::NTupleSize_t firstElementIndex, ROOT::NTupleSize_t pageNumber)
414 : RPageInfo(pageInfo), fFirstElementIndex(firstElementIndex), fPageNumber(pageNumber)
415 {
416 }
417
419 void SetFirstElementIndex(ROOT::NTupleSize_t firstInPage) { fFirstElementIndex = firstInPage; }
420
422 void SetPageNumber(ROOT::NTupleSize_t pageNumber) { fPageNumber = pageNumber; }
423 };
424
425 // clang-format off
426 /**
427 \class ROOT::RClusterDescriptor::RPageRange
428 \ingroup NTuple
429 \brief Records the partition of data into pages for a particular column in a particular cluster
430 */
431 // clang-format on
432 class RPageRange final {
434
435 private:
436 /// \brief Extend this RPageRange to fit the given RColumnRange.
437 ///
438 /// To do so, prepend as many synthetic RPageInfos as needed to cover the range in `columnRange`.
439 /// RPageInfos are constructed to contain as many elements of type `element` given a page size
440 /// limit of `pageSize` (in bytes); the locator for the referenced pages is `kTypePageZero`.
441 /// This function is used to make up RPageRanges for clusters that contain deferred columns.
442 /// \return The number of column elements covered by the synthesized RPageInfos
443 std::size_t ExtendToFitColumnRange(const RColumnRange &columnRange,
444 const ROOT::Internal::RColumnElementBase &element, std::size_t pageSize);
445
446 std::vector<RPageInfo> fPageInfos;
447
448 /// Has the same length than fPageInfos and stores the sum of the number of elements of all the pages
449 /// up to and including a given index. Used for binary search in Find().
450 /// This vector is only created if fPageInfos has at least kLargeRangeThreshold elements.
451 std::unique_ptr<std::vector<ROOT::NTupleSize_t>> fCumulativeNElements;
452
454
455 public:
456 /// Create the fCumulativeNElements only when its needed, i.e. when there are many pages to search through.
457 static constexpr std::size_t kLargeRangeThreshold = 10;
458
459 RPageRange() = default;
460 RPageRange(const RPageRange &other) = delete;
461 RPageRange &operator=(const RPageRange &other) = delete;
462 RPageRange(RPageRange &&other) = default;
463 RPageRange &operator=(RPageRange &&other) = default;
464
466 {
467 RPageRange clone;
469 clone.fPageInfos = fPageInfos;
471 clone.fCumulativeNElements = std::make_unique<std::vector<ROOT::NTupleSize_t>>(*fCumulativeNElements);
472 }
473 return clone;
474 }
475
476 /// Find the page in the RPageRange that contains the given element. The element must exist.
477 RPageInfoExtended Find(ROOT::NTupleSize_t idxInCluster) const;
478
481
482 const std::vector<RPageInfo> &GetPageInfos() const { return fPageInfos; }
483 std::vector<RPageInfo> &GetPageInfos() { return fPageInfos; }
484
485 bool operator==(const RPageRange &other) const
486 {
487 return fPhysicalColumnId == other.fPhysicalColumnId && fPageInfos == other.fPageInfos;
488 }
489 };
490
491private:
493 /// Clusters can be swapped by adjusting the entry offsets of the cluster and all ranges
496
497 std::unordered_map<ROOT::DescriptorId_t, RColumnRange> fColumnRanges;
498 std::unordered_map<ROOT::DescriptorId_t, RPageRange> fPageRanges;
499
500public:
502
508
510
511 bool operator==(const RClusterDescriptor &other) const;
512
516 const RColumnRange &GetColumnRange(ROOT::DescriptorId_t physicalId) const { return fColumnRanges.at(physicalId); }
517 const RPageRange &GetPageRange(ROOT::DescriptorId_t physicalId) const { return fPageRanges.at(physicalId); }
518 /// Returns an iterator over pairs { columnId, columnRange }. The iteration order is unspecified.
519 RColumnRangeIterable GetColumnRangeIterable() const;
521 {
522 return fColumnRanges.find(physicalId) != fColumnRanges.end();
523 }
524 std::uint64_t GetNBytesOnStorage() const;
525};
526
528private:
530
531public:
532 class RIterator final {
533 private:
534 using Iter_t = std::unordered_map<ROOT::DescriptorId_t, RColumnRange>::const_iterator;
535 /// The wrapped map iterator
537
538 public:
539 using iterator_category = std::forward_iterator_tag;
542 using difference_type = std::ptrdiff_t;
543 using pointer = const RColumnRange *;
544 using reference = const RColumnRange &;
545
546 RIterator(Iter_t iter) : fIter(iter) {}
547 iterator &operator++() /* prefix */
548 {
549 ++fIter;
550 return *this;
551 }
552 iterator operator++(int) /* postfix */
553 {
554 auto old = *this;
555 operator++();
556 return old;
557 }
558 reference operator*() const { return fIter->second; }
559 pointer operator->() const { return &fIter->second; }
560 bool operator!=(const iterator &rh) const { return fIter != rh.fIter; }
561 bool operator==(const iterator &rh) const { return fIter == rh.fIter; }
562 };
563
564 explicit RColumnRangeIterable(const RClusterDescriptor &desc) : fDesc(desc) {}
565
566 RIterator begin() { return RIterator{fDesc.fColumnRanges.cbegin()}; }
567 RIterator end() { return RIterator{fDesc.fColumnRanges.cend()}; }
568 size_t size() { return fDesc.fColumnRanges.size(); }
569};
570
571// clang-format off
572/**
573\class ROOT::RClusterGroupDescriptor
574\ingroup NTuple
575\brief Clusters are bundled in cluster groups.
576
577Very large RNTuples can contain multiple cluster groups to organize cluster metadata.
578Every RNTuple has at least one cluster group. The clusters in a cluster group are ordered
579corresponding to their first entry number.
580*/
581// clang-format on
584
585private:
587 /// The cluster IDs can be empty if the corresponding page list is not loaded.
588 /// Otherwise, cluster ids are sorted by first entry number.
589 std::vector<ROOT::DescriptorId_t> fClusterIds;
590 /// The page list that corresponds to the cluster group
592 /// Uncompressed size of the page list
593 std::uint64_t fPageListLength = 0;
594 /// The minimum first entry number of the clusters in the cluster group
595 std::uint64_t fMinEntry = 0;
596 /// Number of entries that are (partially for sharded clusters) covered by this cluster group.
597 std::uint64_t fEntrySpan = 0;
598 /// Number of clusters is always known even if the cluster IDs are not (yet) populated
599 std::uint32_t fNClusters = 0;
600
601public:
607
609 /// Creates a clone without the cluster IDs
611
612 bool operator==(const RClusterGroupDescriptor &other) const;
613
615 std::uint32_t GetNClusters() const { return fNClusters; }
617 std::uint64_t GetPageListLength() const { return fPageListLength; }
618 const std::vector<ROOT::DescriptorId_t> &GetClusterIds() const { return fClusterIds; }
619 std::uint64_t GetMinEntry() const { return fMinEntry; }
620 std::uint64_t GetEntrySpan() const { return fEntrySpan; }
621 /// A cluster group is loaded in two stages. Stage one loads only the summary information.
622 /// Stage two loads the list of cluster IDs.
623 bool HasClusterDetails() const { return !fClusterIds.empty(); }
624};
625
626/// Used in RExtraTypeInfoDescriptor
631
632// clang-format off
633/**
634\class ROOT::RExtraTypeInfoDescriptor
635\ingroup NTuple
636\brief Field specific extra type information from the header / extenstion header
637
638Currently only used by streamer fields to store RNTuple-wide list of streamer info records.
639*/
640// clang-format on
643
644private:
645 /// Specifies the meaning of the extra information
647 /// Type version the extra type information is bound to
648 std::uint32_t fTypeVersion = 0;
649 /// The type name the extra information refers to; empty for RNTuple-wide extra information
650 std::string fTypeName;
651 /// The content format depends on the content ID and may be binary
652 std::string fContent;
653
654public:
660
661 bool operator==(const RExtraTypeInfoDescriptor &other) const;
662
664
666 std::uint32_t GetTypeVersion() const { return fTypeVersion; }
667 const std::string &GetTypeName() const { return fTypeName; }
668 const std::string &GetContent() const { return fContent; }
669};
670
671namespace Internal {
672// Used by the RNTupleReader to activate/deactivate entries. Needs to adapt when we have sharded clusters.
674} // namespace Internal
675
676// clang-format off
677/**
678\class ROOT::RNTupleDescriptor
679\ingroup NTuple
680\brief The on-storage metadata of an RNTuple
681
682Represents the on-disk (on storage) information about an RNTuple. The metadata consists of a header, a footer, and
683potentially multiple page lists.
684The header carries the RNTuple schema, i.e. the fields and the associated columns and their relationships.
685The footer carries information about one or several cluster groups and links to their page lists.
686For every cluster group, a page list envelope stores cluster summaries and page locations.
687For every cluster, it stores for every column the range of element indexes as well as a list of pages and page
688locations.
689
690The descriptor provides machine-independent (de-)serialization of headers and footers, and it provides lookup routines
691for RNTuple objects (pages, clusters, ...). It is supposed to be usable by all RPageStorage implementations.
692
693The serialization does not use standard ROOT streamers in order to not let it depend on libCore. The serialization uses
694the concept of envelopes and frames: header, footer, and page list envelopes have a preamble with a type ID and length.
695Substructures are serialized in frames and have a size and number of items (for list frames). This allows for forward
696and backward compatibility when the metadata evolves.
697*/
698// clang-format on
699class RNTupleDescriptor final {
703
704public:
705 class RHeaderExtension;
706
707private:
708 /// The RNTuple name needs to be unique in a given storage location (file)
709 std::string fName;
710 /// Free text from the user
711 std::string fDescription;
712
714
715 std::uint64_t fNPhysicalColumns = 0; ///< Updated by the descriptor builder when columns are added
716
717 std::set<unsigned int> fFeatureFlags;
718 std::unordered_map<ROOT::DescriptorId_t, RFieldDescriptor> fFieldDescriptors;
719 std::unordered_map<ROOT::DescriptorId_t, RColumnDescriptor> fColumnDescriptors;
720
721 std::vector<RExtraTypeInfoDescriptor> fExtraTypeInfoDescriptors;
722 std::unique_ptr<RHeaderExtension> fHeaderExtension;
723
724 //// All fields above are part of the schema and are cloned when creating a new descriptor from a given one
725 //// (see CloneSchema())
726
727 std::uint16_t fVersionEpoch = 0; ///< Set by the descriptor builder when deserialized
728 std::uint16_t fVersionMajor = 0; ///< Set by the descriptor builder when deserialized
729 std::uint16_t fVersionMinor = 0; ///< Set by the descriptor builder when deserialized
730 std::uint16_t fVersionPatch = 0; ///< Set by the descriptor builder when deserialized
731
732 std::uint64_t fOnDiskHeaderSize = 0; ///< Set by the descriptor builder when deserialized
733 std::uint64_t fOnDiskHeaderXxHash3 = 0; ///< Set by the descriptor builder when deserialized
734 std::uint64_t fOnDiskFooterSize = 0; ///< Like fOnDiskHeaderSize, contains both cluster summaries and page locations
735
736 std::uint64_t fNEntries = 0; ///< Updated by the descriptor builder when the cluster groups are added
737 std::uint64_t fNClusters = 0; ///< Updated by the descriptor builder when the cluster groups are added
738
739 /// \brief The generation of the descriptor
740 ///
741 /// Once constructed by an RNTupleDescriptorBuilder, the descriptor is mostly immutable except for the set of
742 /// active page locations. During the lifetime of the descriptor, page location information for clusters
743 /// can be added or removed. When this happens, the generation should be increased, so that users of the
744 /// descriptor know that the information changed. The generation is increased, e.g., by the page source's
745 /// exclusive lock guard around the descriptor. It is used, e.g., by the descriptor cache in RNTupleReader.
746 std::uint64_t fGeneration = 0;
747
748 std::unordered_map<ROOT::DescriptorId_t, RClusterGroupDescriptor> fClusterGroupDescriptors;
749 /// References cluster groups sorted by entry range and thus allows for binary search.
750 /// Note that this list is empty during the descriptor building process and will only be
751 /// created when the final descriptor is extracted from the builder.
752 std::vector<ROOT::DescriptorId_t> fSortedClusterGroupIds;
753 /// Potentially a subset of all the available clusters
754 std::unordered_map<ROOT::DescriptorId_t, RClusterDescriptor> fClusterDescriptors;
755 /// List of AttributeSets linked to this RNTuple
756 std::vector<Experimental::RNTupleAttrSetDescriptor> fAttributeSets;
757
758 // We don't expose this publicly because when we add sharded clusters, this interface does not make sense anymore
760
761 /// Creates a descriptor containing only the schema information about this RNTuple, i.e. all the information needed
762 /// to create a new RNTuple with the same schema as this one but not necessarily the same clustering. This is used
763 /// when merging two RNTuples.
765
766public:
767 /// All known feature flags.
768 /// Note that the flag values represent the bit _index_, not the already-bitshifted integer.
770 // Insert new feature flags here, with contiguous values. If at any point a "hole" appears in the valid feature
771 // flags values, the check in RNTupleSerialize must be updated.
772
773 // End of regular feature flags
775
776 /// Reserved for forward-compatibility testing
778 };
779
780 class RColumnDescriptorIterable;
781 class RFieldDescriptorIterable;
782 class RClusterGroupDescriptorIterable;
783 class RClusterDescriptorIterable;
784 class RExtraTypeInfoDescriptorIterable;
786
787 /// Modifiers passed to CreateModel()
789 private:
790 /// If set to true, projected fields will be reconstructed as such. This will prevent the model to be used
791 /// with an RNTupleReader, but it is useful, e.g., to accurately merge data.
793 /// By default, creating a model will fail if any of the reconstructed fields contains an unknown column type
794 /// or an unknown field structural role.
795 /// If this option is enabled, the model will be created and all fields containing unknown data (directly
796 /// or indirectly) will be skipped instead.
797 bool fForwardCompatible = false;
798 /// If true, the model will be created without a default entry (bare model).
799 bool fCreateBare = false;
800 /// If true, fields with a user defined type that have no available dictionaries will be reconstructed
801 /// as record fields from the on-disk information; otherwise, they will cause an error.
803
804 public:
805 RCreateModelOptions() {} // Work around compiler bug, see https://gcc.gnu.org/bugzilla/show_bug.cgi?id=88165
806
809
812
813 void SetCreateBare(bool v) { fCreateBare = v; }
814 bool GetCreateBare() const { return fCreateBare; }
815
818 };
819
820 RNTupleDescriptor() = default;
821 RNTupleDescriptor(const RNTupleDescriptor &other) = delete;
825
826 RNTupleDescriptor Clone() const;
827
828 bool operator==(const RNTupleDescriptor &other) const;
829
830 std::uint64_t GetOnDiskHeaderXxHash3() const { return fOnDiskHeaderXxHash3; }
831 std::uint64_t GetOnDiskHeaderSize() const { return fOnDiskHeaderSize; }
832 std::uint64_t GetOnDiskFooterSize() const { return fOnDiskFooterSize; }
833 /// \see ROOT::RNTuple::GetCurrentVersion()
834 std::uint64_t GetVersion() const
835 {
836 return (static_cast<std::uint64_t>(fVersionEpoch) << 48) | (static_cast<std::uint64_t>(fVersionMajor) << 32) |
837 (static_cast<std::uint64_t>(fVersionMinor) << 16) | (static_cast<std::uint64_t>(fVersionPatch));
838 }
839
841 {
842 return fFieldDescriptors.at(fieldId);
843 }
845 {
846 return fColumnDescriptors.at(columnId);
847 }
849 {
850 return fClusterGroupDescriptors.at(clusterGroupId);
851 }
853 {
854 return fClusterDescriptors.at(clusterId);
855 }
856
857 RFieldDescriptorIterable GetFieldIterable(const RFieldDescriptor &fieldDesc) const;
858 RFieldDescriptorIterable
859 GetFieldIterable(const RFieldDescriptor &fieldDesc,
860 const std::function<bool(ROOT::DescriptorId_t, ROOT::DescriptorId_t)> &comparator) const;
861 RFieldDescriptorIterable GetFieldIterable(ROOT::DescriptorId_t fieldId) const;
862 RFieldDescriptorIterable
864 const std::function<bool(ROOT::DescriptorId_t, ROOT::DescriptorId_t)> &comparator) const;
865
866 RFieldDescriptorIterable GetTopLevelFields() const;
867 RFieldDescriptorIterable
868 GetTopLevelFields(const std::function<bool(ROOT::DescriptorId_t, ROOT::DescriptorId_t)> &comparator) const;
869
870 RColumnDescriptorIterable GetColumnIterable() const;
871 RColumnDescriptorIterable GetColumnIterable(const RFieldDescriptor &fieldDesc) const;
872 RColumnDescriptorIterable GetColumnIterable(ROOT::DescriptorId_t fieldId) const;
873
874 RClusterGroupDescriptorIterable GetClusterGroupIterable() const;
875
876 RClusterDescriptorIterable GetClusterIterable() const;
877
878 RExtraTypeInfoDescriptorIterable GetExtraTypeInfoIterable() const;
879
881
882 const std::string &GetName() const { return fName; }
883 const std::string &GetDescription() const { return fDescription; }
884
885 std::size_t GetNFields() const { return fFieldDescriptors.size(); }
886 std::size_t GetNLogicalColumns() const { return fColumnDescriptors.size(); }
887 std::size_t GetNPhysicalColumns() const { return fNPhysicalColumns; }
888 std::size_t GetNClusterGroups() const { return fClusterGroupDescriptors.size(); }
889 std::size_t GetNClusters() const { return fNClusters; }
890 std::size_t GetNActiveClusters() const { return fClusterDescriptors.size(); }
891 std::size_t GetNExtraTypeInfos() const { return fExtraTypeInfoDescriptors.size(); }
892 std::size_t GetNAttributeSets() const { return fAttributeSets.size(); }
893
894 /// We know the number of entries from adding the cluster summaries
897
898 /// Returns the logical parent of all top-level RNTuple data fields.
901 ROOT::DescriptorId_t FindFieldId(std::string_view fieldName, ROOT::DescriptorId_t parentId) const;
902 /// Searches for a top-level field
903 ROOT::DescriptorId_t FindFieldId(std::string_view fieldName) const;
904 ROOT::DescriptorId_t FindLogicalColumnId(ROOT::DescriptorId_t fieldId, std::uint32_t columnIndex,
905 std::uint16_t representationIndex) const;
906 ROOT::DescriptorId_t FindPhysicalColumnId(ROOT::DescriptorId_t fieldId, std::uint32_t columnIndex,
907 std::uint16_t representationIndex) const;
911
912 /// Walks up the parents of the field ID and returns a field name of the form a.b.c.d
913 /// In case of invalid field ID, an empty string is returned.
914 std::string GetQualifiedFieldName(ROOT::DescriptorId_t fieldId) const;
915
916 /// Adjust the type name of the passed RFieldDescriptor for comparison with another renormalized type name.
917 std::string GetTypeNameForComparison(const RFieldDescriptor &fieldDesc) const;
918
919 bool HasFeature(unsigned int flag) const { return fFeatureFlags.count(flag) > 0; }
920 std::vector<std::uint64_t> GetFeatureFlags() const;
921
922 /// Return header extension information; if the descriptor does not have a header extension, return `nullptr`
923 const RHeaderExtension *GetHeaderExtension() const { return fHeaderExtension.get(); }
924
925 /// Methods to load and drop cluster group details (cluster IDs and page locations)
927 AddClusterGroupDetails(ROOT::DescriptorId_t clusterGroupId, std::vector<RClusterDescriptor> &clusterDescs);
929
930 std::uint64_t GetGeneration() const { return fGeneration; }
932
933 /// Re-create the C++ model from the stored metadata
934 std::unique_ptr<ROOT::RNTupleModel> CreateModel(const RCreateModelOptions &options = RCreateModelOptions()) const;
935 void PrintInfo(std::ostream &output) const;
936};
937
938// clang-format off
939/**
940\class ROOT::RNTupleDescriptor::RColumnDescriptorIterable
941\ingroup NTuple
942\brief Used to loop over a field's associated columns
943*/
944// clang-format on
946private:
947 /// The associated RNTuple for this range.
949 /// The descriptor ids of the columns ordered by field, representation, and column index
950 std::vector<ROOT::DescriptorId_t> fColumns = {};
951
952public:
953 class RIterator final {
954 private:
955 /// The enclosing range's RNTuple.
957 /// The enclosing range's descriptor id list.
958 const std::vector<ROOT::DescriptorId_t> &fColumns;
959 std::size_t fIndex = 0;
960
961 public:
962 using iterator_category = std::forward_iterator_tag;
965 using difference_type = std::ptrdiff_t;
966 using pointer = const RColumnDescriptor *;
968
969 RIterator(const RNTupleDescriptor &ntuple, const std::vector<ROOT::DescriptorId_t> &columns, std::size_t index)
970 : fNTuple(ntuple), fColumns(columns), fIndex(index)
971 {
972 }
973 iterator &operator++() /* prefix */
974 {
975 ++fIndex;
976 return *this;
977 }
978 iterator operator++(int) /* postfix */
979 {
980 auto old = *this;
981 operator++();
982 return old;
983 }
984 reference operator*() const { return fNTuple.GetColumnDescriptor(fColumns.at(fIndex)); }
985 pointer operator->() const { return &fNTuple.GetColumnDescriptor(fColumns.at(fIndex)); }
986 bool operator!=(const iterator &rh) const { return fIndex != rh.fIndex; }
987 bool operator==(const iterator &rh) const { return fIndex == rh.fIndex; }
988 };
989
990 RColumnDescriptorIterable(const RNTupleDescriptor &ntuple, const RFieldDescriptor &fieldDesc);
992
995 size_t size() { return fColumns.size(); }
996};
997
998// clang-format off
999/**
1000\class ROOT::RNTupleDescriptor::RFieldDescriptorIterable
1001\ingroup NTuple
1002\brief Used to loop over a field's child fields
1003*/
1004// clang-format on
1006private:
1007 /// The associated RNTuple for this range.
1009 /// The descriptor IDs of the child fields. These may be sorted using
1010 /// a comparison function.
1011 std::vector<ROOT::DescriptorId_t> fFieldChildren = {};
1012
1013public:
1014 class RIterator final {
1015 private:
1016 /// The enclosing range's RNTuple.
1018 /// The enclosing range's descriptor id list.
1019 const std::vector<ROOT::DescriptorId_t> &fFieldChildren;
1020 std::size_t fIndex = 0;
1021
1022 public:
1023 using iterator_category = std::forward_iterator_tag;
1026 using difference_type = std::ptrdiff_t;
1027 using pointer = const RFieldDescriptor *;
1029
1030 RIterator(const RNTupleDescriptor &ntuple, const std::vector<ROOT::DescriptorId_t> &fieldChildren,
1031 std::size_t index)
1032 : fNTuple(ntuple), fFieldChildren(fieldChildren), fIndex(index)
1033 {
1034 }
1035 iterator &operator++() /* prefix */
1036 {
1037 ++fIndex;
1038 return *this;
1039 }
1040 iterator operator++(int) /* postfix */
1041 {
1042 auto old = *this;
1043 operator++();
1044 return old;
1045 }
1046 reference operator*() const { return fNTuple.GetFieldDescriptor(fFieldChildren.at(fIndex)); }
1047 pointer operator->() const { return &fNTuple.GetFieldDescriptor(fFieldChildren.at(fIndex)); }
1048 bool operator!=(const iterator &rh) const { return fIndex != rh.fIndex; }
1049 bool operator==(const iterator &rh) const { return fIndex == rh.fIndex; }
1050 };
1052 : fNTuple(ntuple), fFieldChildren(field.GetLinkIds())
1053 {
1054 }
1055 /// Sort the range using an arbitrary comparison function.
1057 const std::function<bool(ROOT::DescriptorId_t, ROOT::DescriptorId_t)> &comparator)
1058 : fNTuple(ntuple), fFieldChildren(field.GetLinkIds())
1059 {
1060 std::sort(fFieldChildren.begin(), fFieldChildren.end(), comparator);
1061 }
1064};
1065
1066// clang-format off
1067/**
1068\class ROOT::RNTupleDescriptor::RClusterGroupDescriptorIterable
1069\ingroup NTuple
1070\brief Used to loop over all the cluster groups of an RNTuple (in unspecified order)
1071
1072Enumerate all cluster group IDs from the descriptor. No specific order can be assumed.
1073*/
1074// clang-format on
1076private:
1077 /// The associated RNTuple for this range.
1079
1080public:
1081 class RIterator final {
1082 private:
1083 using Iter_t = std::unordered_map<ROOT::DescriptorId_t, RClusterGroupDescriptor>::const_iterator;
1084 /// The wrapped map iterator
1086
1087 public:
1088 using iterator_category = std::forward_iterator_tag;
1091 using difference_type = std::ptrdiff_t;
1094
1095 RIterator(Iter_t iter) : fIter(iter) {}
1096 iterator &operator++() /* prefix */
1097 {
1098 ++fIter;
1099 return *this;
1100 }
1101 iterator operator++(int) /* postfix */
1102 {
1103 auto old = *this;
1104 operator++();
1105 return old;
1106 }
1107 reference operator*() const { return fIter->second; }
1108 pointer operator->() const { return &fIter->second; }
1109 bool operator!=(const iterator &rh) const { return fIter != rh.fIter; }
1110 bool operator==(const iterator &rh) const { return fIter == rh.fIter; }
1111 };
1112
1114 RIterator begin() { return RIterator(fNTuple.fClusterGroupDescriptors.cbegin()); }
1115 RIterator end() { return RIterator(fNTuple.fClusterGroupDescriptors.cend()); }
1116};
1117
1118// clang-format off
1119/**
1120\class ROOT::RNTupleDescriptor::RClusterDescriptorIterable
1121\ingroup NTuple
1122\brief Used to loop over all the clusters of an RNTuple (in unspecified order)
1123
1124Enumerate all cluster IDs from all cluster descriptors. No specific order can be assumed, use
1125RNTupleDescriptor::FindNextClusterId() and RNTupleDescriptor::FindPrevClusterId() to traverse
1126clusters by entry number.
1127*/
1128// clang-format on
1130private:
1131 /// The associated RNTuple for this range.
1133
1134public:
1135 class RIterator final {
1136 private:
1137 using Iter_t = std::unordered_map<ROOT::DescriptorId_t, RClusterDescriptor>::const_iterator;
1138 /// The wrapped map iterator
1140
1141 public:
1142 using iterator_category = std::forward_iterator_tag;
1145 using difference_type = std::ptrdiff_t;
1148
1149 RIterator(Iter_t iter) : fIter(iter) {}
1150 iterator &operator++() /* prefix */
1151 {
1152 ++fIter;
1153 return *this;
1154 }
1155 iterator operator++(int) /* postfix */
1156 {
1157 auto old = *this;
1158 operator++();
1159 return old;
1160 }
1161 reference operator*() const { return fIter->second; }
1162 pointer operator->() const { return &fIter->second; }
1163 bool operator!=(const iterator &rh) const { return fIter != rh.fIter; }
1164 bool operator==(const iterator &rh) const { return fIter == rh.fIter; }
1165 };
1166
1168 RIterator begin() { return RIterator(fNTuple.fClusterDescriptors.cbegin()); }
1169 RIterator end() { return RIterator(fNTuple.fClusterDescriptors.cend()); }
1170};
1171
1172// clang-format off
1173/**
1174\class ROOT::RNTupleDescriptor::RExtraTypeInfoDescriptorIterable
1175\ingroup NTuple
1176\brief Used to loop over all the extra type info record of an RNTuple (in unspecified order)
1177*/
1178// clang-format on
1180private:
1181 /// The associated RNTuple for this range.
1183
1184public:
1185 class RIterator final {
1186 private:
1187 using Iter_t = std::vector<RExtraTypeInfoDescriptor>::const_iterator;
1188 /// The wrapped vector iterator
1190
1191 public:
1192 using iterator_category = std::forward_iterator_tag;
1195 using difference_type = std::ptrdiff_t;
1198
1199 RIterator(Iter_t iter) : fIter(iter) {}
1200 iterator &operator++() /* prefix */
1201 {
1202 ++fIter;
1203 return *this;
1204 }
1205 iterator operator++(int) /* postfix */
1206 {
1207 auto old = *this;
1208 operator++();
1209 return old;
1210 }
1211 reference operator*() const { return *fIter; }
1212 pointer operator->() const { return &*fIter; }
1213 bool operator!=(const iterator &rh) const { return fIter != rh.fIter; }
1214 bool operator==(const iterator &rh) const { return fIter == rh.fIter; }
1215 };
1216
1218 RIterator begin() { return RIterator(fNTuple.fExtraTypeInfoDescriptors.cbegin()); }
1219 RIterator end() { return RIterator(fNTuple.fExtraTypeInfoDescriptors.cend()); }
1220};
1221
1222namespace Experimental {
1223// clang-format off
1224/**
1225\class ROOT::Experimental::RNTupleAttrSetDescriptorIterable
1226\ingroup NTuple
1227\brief Used to loop over all the Attribute Sets linked to an RNTuple
1228*/
1229// clang-format on
1230// TODO: move this to RNTupleDescriptor::RNTupleAttrSetDescriptorIterable when it moves out of Experimental.
1232private:
1233 /// The associated RNTuple for this range.
1235
1236public:
1237 class RIterator final {
1238 private:
1239 using Iter_t = std::vector<RNTupleAttrSetDescriptor>::const_iterator;
1240 /// The wrapped vector iterator
1242
1243 public:
1244 using iterator_category = std::forward_iterator_tag;
1247 using difference_type = std::ptrdiff_t;
1248 using pointer = const value_type *;
1249 using reference = const value_type &;
1250
1251 RIterator(Iter_t iter) : fIter(iter) {}
1252 iterator &operator++() /* prefix */
1253 {
1254 ++fIter;
1255 return *this;
1256 }
1257 iterator operator++(int) /* postfix */
1258 {
1259 auto old = *this;
1260 operator++();
1261 return old;
1262 }
1263 reference operator*() const { return *fIter; }
1264 pointer operator->() const { return &*fIter; }
1265 bool operator!=(const iterator &rh) const { return fIter != rh.fIter; }
1266 bool operator==(const iterator &rh) const { return fIter == rh.fIter; }
1267 };
1268
1270 RIterator begin() { return RIterator(fNTuple.fAttributeSets.cbegin()); }
1271 RIterator end() { return RIterator(fNTuple.fAttributeSets.cend()); }
1272};
1273} // namespace Experimental
1274
1275// clang-format off
1276/**
1277\class ROOT::RNTupleDescriptor::RHeaderExtension
1278\ingroup NTuple
1279\brief Summarizes information about fields and the corresponding columns that were added after the header has been serialized
1280*/
1281// clang-format on
1284
1285private:
1286 /// All field IDs of late model extensions, in the order of field addition. This is necessary to serialize the
1287 /// the fields in that order.
1288 std::vector<ROOT::DescriptorId_t> fFieldIdsOrder;
1289 /// All field IDs of late model extensions for efficient lookup. When a column gets added to the extension
1290 /// header, this enables us to determine if the column belongs to a field of the header extension of if it
1291 /// belongs to a field of the regular header that gets extended by additional column representations.
1292 std::unordered_set<ROOT::DescriptorId_t> fFieldIdsLookup;
1293 /// All logical column IDs of columns that extend, with additional column representations, fields of the regular
1294 /// header. During serialization, these columns are not picked up as columns of `fFieldIdsOrder`. But instead
1295 /// these columns need to be serialized in the extension header without re-serializing the field.
1296 std::vector<ROOT::DescriptorId_t> fExtendedColumnRepresentations;
1297 /// Number of logical and physical columns; updated by the descriptor builder when columns are added
1298 std::uint32_t fNLogicalColumns = 0;
1299 std::uint32_t fNPhysicalColumns = 0;
1300
1301 /// Marks `fieldDesc` as an extended field, i.e. a field that appears in the Header Extension (e.g. having been added
1302 /// through late model extension). Note that the field descriptor should also have been added to the RNTuple
1303 /// Descriptor alongside non-extended fields.
1305 {
1306 fFieldIdsOrder.emplace_back(fieldDesc.GetId());
1307 fFieldIdsLookup.insert(fieldDesc.GetId());
1308 }
1309
1310 /// Marks `columnDesc` as an extended column, i.e. a column that appears in the Header Extension (e.g. having been
1311 /// added through late model extension as an additional representation of an existing column). Note that the column
1312 /// descriptor should also have been added to the RNTuple Descriptor alongside non-extended columns.
1314 {
1316 if (!columnDesc.IsAliasColumn())
1318 if (fFieldIdsLookup.count(columnDesc.GetFieldId()) == 0) {
1319 fExtendedColumnRepresentations.emplace_back(columnDesc.GetLogicalId());
1320 }
1321 }
1322
1323public:
1324 std::size_t GetNFields() const { return fFieldIdsOrder.size(); }
1325 std::size_t GetNLogicalColumns() const { return fNLogicalColumns; }
1326 std::size_t GetNPhysicalColumns() const { return fNPhysicalColumns; }
1327 const std::vector<ROOT::DescriptorId_t> &GetExtendedColumnRepresentations() const
1328 {
1330 }
1331 /// Return a vector containing the IDs of the top-level fields defined in the extension header, in the order
1332 /// of their addition. Note that these fields are not necessarily top-level fields in the overall schema.
1333 /// If a nested field is extended, it will return the top-most field of the extended subtree.
1334 /// We cannot create this vector when building the fFields because at the time when AddExtendedField is called,
1335 /// the field is not yet linked into the schema tree.
1336 std::vector<ROOT::DescriptorId_t> GetTopMostFields(const RNTupleDescriptor &desc) const;
1337
1339 {
1340 return fFieldIdsLookup.find(fieldId) != fFieldIdsLookup.end();
1341 }
1343 {
1344 return std::find(fExtendedColumnRepresentations.begin(), fExtendedColumnRepresentations.end(), columnId) !=
1346 }
1347};
1348
1349namespace Experimental::Internal {
1352
1353public:
1355 {
1356 fDesc.fName = name;
1357 return *this;
1358 }
1359 RNTupleAttrSetDescriptorBuilder &SchemaVersion(std::uint16_t major, std::uint16_t minor)
1360 {
1361 fDesc.fSchemaVersionMajor = major;
1362 fDesc.fSchemaVersionMinor = minor;
1363 return *this;
1364 }
1366 {
1367 fDesc.fAnchorLocator = loc;
1368 return *this;
1369 }
1371 {
1372 fDesc.fAnchorLength = length;
1373 return *this;
1374 }
1375
1376 /// Attempt to make an AttributeSet descriptor. This may fail if the builder
1377 /// was not given enough information to make a proper descriptor.
1379};
1380} // namespace Experimental::Internal
1381
1382namespace Internal {
1383
1384// clang-format off
1385/**
1386\class ROOT::Internal::RColumnDescriptorBuilder
1387\ingroup NTuple
1388\brief A helper class for piece-wise construction of an RColumnDescriptor
1389
1390Dangling column descriptors can become actual descriptors when added to an
1391RNTupleDescriptorBuilder instance and then linked to their fields.
1392*/
1393// clang-format on
1395private:
1397
1398public:
1399 /// Make an empty column descriptor builder.
1401
1403 {
1404 fColumn.fLogicalColumnId = logicalColumnId;
1405 return *this;
1406 }
1408 {
1409 fColumn.fPhysicalColumnId = physicalColumnId;
1410 return *this;
1411 }
1412 RColumnDescriptorBuilder &BitsOnStorage(std::uint16_t bitsOnStorage)
1413 {
1414 fColumn.fBitsOnStorage = bitsOnStorage;
1415 return *this;
1416 }
1418 {
1419 fColumn.fType = type;
1420 return *this;
1421 }
1423 {
1424 fColumn.fFieldId = fieldId;
1425 return *this;
1426 }
1427 RColumnDescriptorBuilder &Index(std::uint32_t index)
1428 {
1429 fColumn.fIndex = index;
1430 return *this;
1431 }
1432 RColumnDescriptorBuilder &FirstElementIndex(std::uint64_t firstElementIdx)
1433 {
1434 fColumn.fFirstElementIndex = firstElementIdx;
1435 return *this;
1436 }
1438 {
1439 R__ASSERT(fColumn.fFirstElementIndex != 0);
1440 if (fColumn.fFirstElementIndex > 0)
1441 fColumn.fFirstElementIndex = -fColumn.fFirstElementIndex;
1442 return *this;
1443 }
1444 RColumnDescriptorBuilder &RepresentationIndex(std::uint16_t representationIndex)
1445 {
1446 fColumn.fRepresentationIndex = representationIndex;
1447 return *this;
1448 }
1449 RColumnDescriptorBuilder &ValueRange(double min, double max)
1450 {
1451 fColumn.fValueRange = {min, max};
1452 return *this;
1453 }
1454 RColumnDescriptorBuilder &ValueRange(std::optional<RColumnDescriptor::RValueRange> valueRange)
1455 {
1456 fColumn.fValueRange = valueRange;
1457 return *this;
1458 }
1459 ROOT::DescriptorId_t GetFieldId() const { return fColumn.fFieldId; }
1460 ROOT::DescriptorId_t GetRepresentationIndex() const { return fColumn.fRepresentationIndex; }
1461 /// Attempt to make a column descriptor. This may fail if the column
1462 /// was not given enough information to make a proper descriptor.
1464};
1465
1466// clang-format off
1467/**
1468\class ROOT::Internal::RFieldDescriptorBuilder
1469\ingroup NTuple
1470\brief A helper class for piece-wise construction of an RFieldDescriptor
1471
1472Dangling field descriptors describe a single field in isolation. They are
1473missing the necessary relationship information (parent field, any child fields)
1474required to describe a real RNTuple field.
1475
1476Dangling field descriptors can only become actual descriptors when added to an
1477RNTupleDescriptorBuilder instance and then linked to other fields.
1478*/
1479// clang-format on
1481private:
1483
1484public:
1485 /// Make an empty dangling field descriptor.
1487
1488 /// Make a new RFieldDescriptorBuilder based off a live RNTuple field.
1490
1492 {
1493 fField.fFieldId = fieldId;
1494 return *this;
1495 }
1496 RFieldDescriptorBuilder &FieldVersion(std::uint32_t fieldVersion)
1497 {
1498 fField.fFieldVersion = fieldVersion;
1499 return *this;
1500 }
1501 RFieldDescriptorBuilder &TypeVersion(std::uint32_t typeVersion)
1502 {
1503 fField.fTypeVersion = typeVersion;
1504 return *this;
1505 }
1507 {
1508 fField.fParentId = id;
1509 return *this;
1510 }
1512 {
1513 fField.fProjectionSourceId = id;
1514 return *this;
1515 }
1516 RFieldDescriptorBuilder &FieldName(const std::string &fieldName)
1517 {
1518 fField.fFieldName = fieldName;
1519 return *this;
1520 }
1521 RFieldDescriptorBuilder &FieldDescription(const std::string &fieldDescription)
1522 {
1523 fField.fFieldDescription = fieldDescription;
1524 return *this;
1525 }
1526 RFieldDescriptorBuilder &TypeName(const std::string &typeName)
1527 {
1528 fField.fTypeName = typeName;
1529 return *this;
1530 }
1531 RFieldDescriptorBuilder &TypeAlias(const std::string &typeAlias)
1532 {
1533 fField.fTypeAlias = typeAlias;
1534 return *this;
1535 }
1536 RFieldDescriptorBuilder &NRepetitions(std::uint64_t nRepetitions)
1537 {
1538 fField.fNRepetitions = nRepetitions;
1539 return *this;
1540 }
1542 {
1543 fField.fStructure = structure;
1544 return *this;
1545 }
1546 RFieldDescriptorBuilder &TypeChecksum(const std::optional<std::uint32_t> typeChecksum)
1547 {
1548 fField.fTypeChecksum = typeChecksum;
1549 return *this;
1550 }
1552 {
1553 fField.fIsSoACollection = val;
1554 return *this;
1555 }
1556 ROOT::DescriptorId_t GetParentId() const { return fField.fParentId; }
1557 /// Attempt to make a field descriptor. This may fail if the dangling field
1558 /// was not given enough information to make a proper descriptor.
1560};
1561
1562// clang-format off
1563/**
1564\class ROOT::Internal::RClusterDescriptorBuilder
1565\ingroup NTuple
1566\brief A helper class for piece-wise construction of an RClusterDescriptor
1567
1568The cluster descriptor builder starts from a summary-only cluster descriptor and allows for the
1569piecewise addition of page locations.
1570*/
1571// clang-format on
1573private:
1575
1576public:
1578 {
1579 fCluster.fClusterId = clusterId;
1580 return *this;
1581 }
1582
1583 RClusterDescriptorBuilder &FirstEntryIndex(std::uint64_t firstEntryIndex)
1584 {
1585 fCluster.fFirstEntryIndex = firstEntryIndex;
1586 return *this;
1587 }
1588
1589 RClusterDescriptorBuilder &NEntries(std::uint64_t nEntries)
1590 {
1591 fCluster.fNEntries = nEntries;
1592 return *this;
1593 }
1594
1595 RResult<void> CommitColumnRange(ROOT::DescriptorId_t physicalId, std::uint64_t firstElementIndex,
1596 std::uint32_t compressionSettings, const RClusterDescriptor::RPageRange &pageRange);
1597
1598 /// Books the given column ID as being suppressed in this cluster. The correct first element index and number of
1599 /// elements need to be set by CommitSuppressedColumnRanges() once all the calls to CommitColumnRange() and
1600 /// MarkSuppressedColumnRange() took place.
1602
1603 /// Sets the first element index and number of elements for all the suppressed column ranges.
1604 /// The information is taken from the corresponding columns from the primary representation.
1605 /// Needs to be called when all the columns (suppressed and regular) where added.
1607
1608 /// Add column and page ranges for columns created during late model extension missing in this cluster. The locator
1609 /// type for the synthesized page ranges is `kTypePageZero`. All the page sources must be able to populate the
1610 /// 'zero' page from such locator. Any call to CommitColumnRange() and CommitSuppressedColumnRanges()
1611 /// should happen before calling this function.
1613
1615 {
1616 return fCluster.GetColumnRange(physicalId);
1617 }
1618
1619 /// Move out the full cluster descriptor including page locations
1621};
1622
1623// clang-format off
1624/**
1625\class ROOT::Internal::RClusterGroupDescriptorBuilder
1626\ingroup NTuple
1627\brief A helper class for piece-wise construction of an RClusterGroupDescriptor
1628*/
1629// clang-format on
1631private:
1633
1634public:
1637
1639 {
1640 fClusterGroup.fClusterGroupId = clusterGroupId;
1641 return *this;
1642 }
1644 {
1645 fClusterGroup.fPageListLocator = pageListLocator;
1646 return *this;
1647 }
1648 RClusterGroupDescriptorBuilder &PageListLength(std::uint64_t pageListLength)
1649 {
1650 fClusterGroup.fPageListLength = pageListLength;
1651 return *this;
1652 }
1654 {
1655 fClusterGroup.fMinEntry = minEntry;
1656 return *this;
1657 }
1659 {
1660 fClusterGroup.fEntrySpan = entrySpan;
1661 return *this;
1662 }
1664 {
1665 fClusterGroup.fNClusters = nClusters;
1666 return *this;
1667 }
1668 void AddSortedClusters(const std::vector<ROOT::DescriptorId_t> &clusterIds)
1669 {
1670 if (clusterIds.size() != fClusterGroup.GetNClusters())
1671 throw RException(R__FAIL("mismatch of number of clusters"));
1672 fClusterGroup.fClusterIds = clusterIds;
1673 }
1674
1676};
1677
1678// clang-format off
1679/**
1680\class ROOT::Internal::RExtraTypeInfoDescriptorBuilder
1681\ingroup NTuple
1682\brief A helper class for piece-wise construction of an RExtraTypeInfoDescriptor
1683*/
1684// clang-format on
1686private:
1688
1689public:
1691
1693 {
1694 fExtraTypeInfo.fContentId = contentId;
1695 return *this;
1696 }
1698 {
1699 fExtraTypeInfo.fTypeVersion = typeVersion;
1700 return *this;
1701 }
1702 RExtraTypeInfoDescriptorBuilder &TypeName(const std::string &typeName)
1703 {
1704 fExtraTypeInfo.fTypeName = typeName;
1705 return *this;
1706 }
1707 RExtraTypeInfoDescriptorBuilder &Content(const std::string &content)
1708 {
1709 fExtraTypeInfo.fContent = content;
1710 return *this;
1711 }
1712
1714};
1715
1716// clang-format off
1717/**
1718\class ROOT::Internal::RNTupleDescriptorBuilder
1719\ingroup NTuple
1720\brief A helper class for piece-wise construction of an RNTupleDescriptor
1721
1722Used by RPageStorage implementations in order to construct the RNTupleDescriptor from the various header parts.
1723*/
1724// clang-format on
1726private:
1729
1730public:
1731 /// Checks whether invariants hold:
1732 /// * RNTuple epoch is valid
1733 /// * RNTuple name is valid
1734 /// * Fields have valid parents
1735 /// * Number of columns is constant across column representations
1739
1740 /// Copies the "schema" part of `descriptor` into the builder's descriptor.
1741 /// This resets the builder's descriptor.
1742 void SetSchemaFromExisting(const RNTupleDescriptor &descriptor);
1743
1744 void SetVersion(std::uint16_t versionEpoch, std::uint16_t versionMajor, std::uint16_t versionMinor,
1745 std::uint16_t versionPatch);
1746 void SetVersionForWriting();
1747
1748 void SetNTuple(const std::string_view name, const std::string_view description);
1749 /// Sets the `flag`-th bit of the feature flag to 1.
1750 /// Note that `flag` itself is not a bitmask, just the bit index of the flag to enable.
1751 void SetFeature(unsigned int flag);
1752
1753 void SetOnDiskHeaderXxHash3(std::uint64_t xxhash3) { fDescriptor.fOnDiskHeaderXxHash3 = xxhash3; }
1754 void SetOnDiskHeaderSize(std::uint64_t size) { fDescriptor.fOnDiskHeaderSize = size; }
1755 /// The real footer size also include the page list envelopes
1756 void AddToOnDiskFooterSize(std::uint64_t size) { fDescriptor.fOnDiskFooterSize += size; }
1757
1758 void AddField(const RFieldDescriptor &fieldDesc);
1761
1762 // The field that the column belongs to has to be already available. For fields with multiple columns,
1763 // the columns need to be added in order of the column index
1765
1768
1770 void ReplaceExtraTypeInfo(RExtraTypeInfoDescriptor &&extraTypeInfoDesc);
1771
1773
1774 /// Mark the beginning of the header extension; any fields and columns added after a call to this function are
1775 /// annotated as begin part of the header extension.
1776 void BeginHeaderExtension();
1777
1778 /// \brief Shift column IDs of alias columns by `offset`
1779 ///
1780 /// If the descriptor is constructed in pieces consisting of physical and alias columns
1781 /// (regular and projected fields), the natural column order would be
1782 /// - Physical and alias columns of piece one
1783 /// - Physical and alias columns of piece two
1784 /// - etc.
1785 /// What we want, however, are first all physical column IDs and then all alias column IDs.
1786 /// This method adds `offset` to the logical column IDs of all alias columns and fixes up the corresponding
1787 /// column IDs in the projected field descriptors. In this way, a new piece of physical and alias columns can
1788 /// first shift the existing alias columns by the number of new physical columns, resulting in the following order
1789 /// - Physical columns of piece one
1790 /// - Physical columns of piece two
1791 /// - ...
1792 // - Logical columns of piece one
1793 /// - Logical columns of piece two
1794 /// - ...
1795 void ShiftAliasColumns(std::uint32_t offset);
1796};
1797
1799{
1800 return desc.CloneSchema();
1801}
1802
1803/// Tells if the field describes a user-defined enum type.
1804/// The dictionary does not need to be available for this method.
1805/// Needs the full descriptor to look up sub fields.
1806bool IsCustomEnumFieldDesc(const RNTupleDescriptor &desc, const RFieldDescriptor &fieldDesc);
1807
1808/// Tells if the field describes a std::atomic<T> type
1809bool IsStdAtomicFieldDesc(const RFieldDescriptor &fieldDesc);
1810
1811} // namespace Internal
1812
1813} // namespace ROOT
1814
1815#endif // ROOT_RNTupleDescriptor
#define R__FAIL(msg)
Short-hand to return an RResult<T> in an error state; the RError is implicitly converted into RResult...
Definition RError.hxx:299
size_t size(const MatrixT &matrix)
retrieve the size of a square matrix
#define R__ASSERT(e)
Checks condition e and reports a fatal error if it's false.
Definition TError.h:125
XFontStruct * id
Definition TGX11.cxx:147
char name[80]
Definition TGX11.cxx:148
@ kInvalid
Definition TSystem.h:87
RColumnDescriptorBuilder()=default
Make an empty column descriptor builder.
RFieldDescriptorBuilder()=default
Make an empty dangling field descriptor.
RNTupleAttrSetDescriptorBuilder & AnchorLocator(const RNTupleLocator &loc)
RNTupleAttrSetDescriptorBuilder & SchemaVersion(std::uint16_t major, std::uint16_t minor)
RResult< ROOT::Experimental::RNTupleAttrSetDescriptor > MoveDescriptor()
Attempt to make an AttributeSet descriptor.
RNTupleAttrSetDescriptorBuilder & Name(std::string_view name)
RNTupleAttrSetDescriptorBuilder & AnchorLength(std::uint32_t length)
std::vector< RNTupleAttrSetDescriptor >::const_iterator Iter_t
Used to loop over all the Attribute Sets linked to an RNTuple.
const RNTupleDescriptor & fNTuple
The associated RNTuple for this range.
RNTupleAttrSetDescriptorIterable(const RNTupleDescriptor &ntuple)
Metadata stored for every Attribute Set linked to an RNTuple.
RNTupleAttrSetDescriptor & operator=(const RNTupleAttrSetDescriptor &other)=delete
bool operator==(const RNTupleAttrSetDescriptor &other) const
std::uint32_t fAnchorLength
uncompressed size of the linked anchor
RNTupleAttrSetDescriptor(const RNTupleAttrSetDescriptor &other)=delete
RNTupleAttrSetDescriptor & operator=(RNTupleAttrSetDescriptor &&other)=default
const RNTupleLocator & GetAnchorLocator() const
bool operator!=(const RNTupleAttrSetDescriptor &other) const
RNTupleAttrSetDescriptor(RNTupleAttrSetDescriptor &&other)=default
A helper class for piece-wise construction of an RClusterDescriptor.
RResult< void > MarkSuppressedColumnRange(ROOT::DescriptorId_t physicalId)
Books the given column ID as being suppressed in this cluster.
RResult< void > CommitColumnRange(ROOT::DescriptorId_t physicalId, std::uint64_t firstElementIndex, std::uint32_t compressionSettings, const RClusterDescriptor::RPageRange &pageRange)
RClusterDescriptorBuilder & AddExtendedColumnRanges(const RNTupleDescriptor &desc)
Add column and page ranges for columns created during late model extension missing in this cluster.
RClusterDescriptorBuilder & NEntries(std::uint64_t nEntries)
RResult< void > CommitSuppressedColumnRanges(const RNTupleDescriptor &desc)
Sets the first element index and number of elements for all the suppressed column ranges.
RResult< RClusterDescriptor > MoveDescriptor()
Move out the full cluster descriptor including page locations.
const RClusterDescriptor::RColumnRange & GetColumnRange(ROOT::DescriptorId_t physicalId)
RClusterDescriptorBuilder & ClusterId(ROOT::DescriptorId_t clusterId)
RClusterDescriptorBuilder & FirstEntryIndex(std::uint64_t firstEntryIndex)
A helper class for piece-wise construction of an RClusterGroupDescriptor.
RClusterGroupDescriptorBuilder & EntrySpan(std::uint64_t entrySpan)
RClusterGroupDescriptorBuilder & PageListLocator(const RNTupleLocator &pageListLocator)
static RClusterGroupDescriptorBuilder FromSummary(const RClusterGroupDescriptor &clusterGroupDesc)
RClusterGroupDescriptorBuilder & PageListLength(std::uint64_t pageListLength)
RClusterGroupDescriptorBuilder & MinEntry(std::uint64_t minEntry)
void AddSortedClusters(const std::vector< ROOT::DescriptorId_t > &clusterIds)
RResult< RClusterGroupDescriptor > MoveDescriptor()
RClusterGroupDescriptorBuilder & ClusterGroupId(ROOT::DescriptorId_t clusterGroupId)
RClusterGroupDescriptorBuilder & NClusters(std::uint32_t nClusters)
A helper class for piece-wise construction of an RColumnDescriptor.
ROOT::DescriptorId_t GetRepresentationIndex() const
RColumnDescriptorBuilder & SetSuppressedDeferred()
RColumnDescriptorBuilder & LogicalColumnId(ROOT::DescriptorId_t logicalColumnId)
RResult< RColumnDescriptor > MakeDescriptor() const
Attempt to make a column descriptor.
RColumnDescriptorBuilder & FieldId(ROOT::DescriptorId_t fieldId)
RColumnDescriptorBuilder & BitsOnStorage(std::uint16_t bitsOnStorage)
RColumnDescriptorBuilder & ValueRange(double min, double max)
RColumnDescriptorBuilder()=default
Make an empty column descriptor builder.
RColumnDescriptorBuilder & ValueRange(std::optional< RColumnDescriptor::RValueRange > valueRange)
RColumnDescriptorBuilder & Type(ROOT::ENTupleColumnType type)
RColumnDescriptorBuilder & PhysicalColumnId(ROOT::DescriptorId_t physicalColumnId)
RColumnDescriptorBuilder & FirstElementIndex(std::uint64_t firstElementIdx)
RColumnDescriptorBuilder & Index(std::uint32_t index)
RColumnDescriptorBuilder & RepresentationIndex(std::uint16_t representationIndex)
A column element encapsulates the translation between basic C++ types and their column representation...
A helper class for piece-wise construction of an RExtraTypeInfoDescriptor.
RResult< RExtraTypeInfoDescriptor > MoveDescriptor()
RExtraTypeInfoDescriptorBuilder & ContentId(EExtraTypeInfoIds contentId)
RExtraTypeInfoDescriptorBuilder & TypeName(const std::string &typeName)
RExtraTypeInfoDescriptorBuilder & Content(const std::string &content)
RExtraTypeInfoDescriptorBuilder & TypeVersion(std::uint32_t typeVersion)
A helper class for piece-wise construction of an RFieldDescriptor.
RFieldDescriptorBuilder & NRepetitions(std::uint64_t nRepetitions)
RFieldDescriptorBuilder & Structure(const ROOT::ENTupleStructure &structure)
RFieldDescriptorBuilder()=default
Make an empty dangling field descriptor.
RFieldDescriptorBuilder & TypeAlias(const std::string &typeAlias)
RFieldDescriptorBuilder & ProjectionSourceId(ROOT::DescriptorId_t id)
RFieldDescriptorBuilder & TypeVersion(std::uint32_t typeVersion)
RFieldDescriptorBuilder & IsSoACollection(bool val)
RFieldDescriptorBuilder & TypeChecksum(const std::optional< std::uint32_t > typeChecksum)
RResult< RFieldDescriptor > MakeDescriptor() const
Attempt to make a field descriptor.
RFieldDescriptorBuilder & ParentId(ROOT::DescriptorId_t id)
static RFieldDescriptorBuilder FromField(const ROOT::RFieldBase &field)
Make a new RFieldDescriptorBuilder based off a live RNTuple field.
RFieldDescriptorBuilder & FieldDescription(const std::string &fieldDescription)
RFieldDescriptorBuilder & FieldVersion(std::uint32_t fieldVersion)
RFieldDescriptorBuilder & FieldName(const std::string &fieldName)
RFieldDescriptorBuilder & FieldId(ROOT::DescriptorId_t fieldId)
RFieldDescriptorBuilder & TypeName(const std::string &typeName)
A helper class for piece-wise construction of an RNTupleDescriptor.
void SetNTuple(const std::string_view name, const std::string_view description)
void SetSchemaFromExisting(const RNTupleDescriptor &descriptor)
Copies the "schema" part of descriptor into the builder's descriptor.
RResult< void > AddColumn(RColumnDescriptor &&columnDesc)
RResult< void > AddAttributeSet(Experimental::RNTupleAttrSetDescriptor &&attrSetDesc)
RResult< void > AddFieldProjection(ROOT::DescriptorId_t sourceId, ROOT::DescriptorId_t targetId)
void ReplaceExtraTypeInfo(RExtraTypeInfoDescriptor &&extraTypeInfoDesc)
RResult< void > AddExtraTypeInfo(RExtraTypeInfoDescriptor &&extraTypeInfoDesc)
void ShiftAliasColumns(std::uint32_t offset)
Shift column IDs of alias columns by offset.
void SetVersion(std::uint16_t versionEpoch, std::uint16_t versionMajor, std::uint16_t versionMinor, std::uint16_t versionPatch)
const RNTupleDescriptor & GetDescriptor() const
void BeginHeaderExtension()
Mark the beginning of the header extension; any fields and columns added after a call to this functio...
RResult< void > AddCluster(RClusterDescriptor &&clusterDesc)
RResult< void > EnsureValidDescriptor() const
Checks whether invariants hold:
RResult< void > AddFieldLink(ROOT::DescriptorId_t fieldId, ROOT::DescriptorId_t linkId)
void AddField(const RFieldDescriptor &fieldDesc)
RResult< void > AddClusterGroup(RClusterGroupDescriptor &&clusterGroup)
void SetOnDiskHeaderXxHash3(std::uint64_t xxhash3)
RResult< void > EnsureFieldExists(ROOT::DescriptorId_t fieldId) const
void SetFeature(unsigned int flag)
Sets the flag-th bit of the feature flag to 1.
void AddToOnDiskFooterSize(std::uint64_t size)
The real footer size also include the page list envelopes.
std::unordered_map< ROOT::DescriptorId_t, RColumnRange >::const_iterator Iter_t
RColumnRangeIterable(const RClusterDescriptor &desc)
The window of element indexes of a particular column in a particular cluster.
void SetCompressionSettings(std::optional< std::uint32_t > comp)
bool fIsSuppressed
Suppressed columns have an empty page range and unknown compression settings.
void SetPhysicalColumnId(ROOT::DescriptorId_t id)
ROOT::DescriptorId_t GetPhysicalColumnId() const
bool operator==(const RColumnRange &other) const
void SetFirstElementIndex(ROOT::NTupleSize_t idx)
ROOT::NTupleSize_t fFirstElementIndex
The global index of the first column element in the cluster.
std::optional< std::uint32_t > GetCompressionSettings() const
std::optional< std::uint32_t > fCompressionSettings
The usual format for ROOT compression settings (see Compression.h).
ROOT::NTupleSize_t GetFirstElementIndex() const
RColumnRange(ROOT::DescriptorId_t physicalColumnId, ROOT::NTupleSize_t firstElementIndex, ROOT::NTupleSize_t nElements, std::optional< std::uint32_t > compressionSettings, bool suppressed=false)
void IncrementFirstElementIndex(ROOT::NTupleSize_t by)
bool Contains(ROOT::NTupleSize_t index) const
ROOT::NTupleSize_t fNElements
The number of column elements in the cluster.
void IncrementNElements(ROOT::NTupleSize_t by)
Records the partition of data into pages for a particular column in a particular cluster.
RPageRange & operator=(const RPageRange &other)=delete
std::unique_ptr< std::vector< ROOT::NTupleSize_t > > fCumulativeNElements
Has the same length than fPageInfos and stores the sum of the number of elements of all the pages up ...
RPageRange(RPageRange &&other)=default
static constexpr std::size_t kLargeRangeThreshold
Create the fCumulativeNElements only when its needed, i.e. when there are many pages to search throug...
RPageRange(const RPageRange &other)=delete
const std::vector< RPageInfo > & GetPageInfos() const
RPageInfoExtended Find(ROOT::NTupleSize_t idxInCluster) const
Find the page in the RPageRange that contains the given element. The element must exist.
bool operator==(const RPageRange &other) const
ROOT::DescriptorId_t GetPhysicalColumnId() const
void SetPhysicalColumnId(ROOT::DescriptorId_t id)
std::size_t ExtendToFitColumnRange(const RColumnRange &columnRange, const ROOT::Internal::RColumnElementBase &element, std::size_t pageSize)
Extend this RPageRange to fit the given RColumnRange.
RPageRange & operator=(RPageRange &&other)=default
std::vector< RPageInfo > & GetPageInfos()
Metadata for RNTuple clusters.
ROOT::NTupleSize_t GetNEntries() const
ROOT::NTupleSize_t fFirstEntryIndex
Clusters can be swapped by adjusting the entry offsets of the cluster and all ranges.
RClusterDescriptor & operator=(const RClusterDescriptor &other)=delete
ROOT::DescriptorId_t GetId() const
const RPageRange & GetPageRange(ROOT::DescriptorId_t physicalId) const
RClusterDescriptor(RClusterDescriptor &&other)=default
std::unordered_map< ROOT::DescriptorId_t, RColumnRange > fColumnRanges
ROOT::DescriptorId_t fClusterId
bool ContainsColumn(ROOT::DescriptorId_t physicalId) const
RClusterDescriptor & operator=(RClusterDescriptor &&other)=default
RClusterDescriptor Clone() const
bool operator==(const RClusterDescriptor &other) const
const RColumnRange & GetColumnRange(ROOT::DescriptorId_t physicalId) const
RColumnRangeIterable GetColumnRangeIterable() const
Returns an iterator over pairs { columnId, columnRange }. The iteration order is unspecified.
ROOT::NTupleSize_t GetFirstEntryIndex() const
std::unordered_map< ROOT::DescriptorId_t, RPageRange > fPageRanges
RClusterDescriptor(const RClusterDescriptor &other)=delete
std::uint64_t GetNBytesOnStorage() const
Clusters are bundled in cluster groups.
RNTupleLocator fPageListLocator
The page list that corresponds to the cluster group.
RClusterGroupDescriptor & operator=(const RClusterGroupDescriptor &other)=delete
RClusterGroupDescriptor Clone() const
std::vector< ROOT::DescriptorId_t > fClusterIds
The cluster IDs can be empty if the corresponding page list is not loaded.
ROOT::DescriptorId_t GetId() const
RClusterGroupDescriptor(RClusterGroupDescriptor &&other)=default
std::uint64_t fMinEntry
The minimum first entry number of the clusters in the cluster group.
bool HasClusterDetails() const
A cluster group is loaded in two stages.
std::uint32_t fNClusters
Number of clusters is always known even if the cluster IDs are not (yet) populated.
RClusterGroupDescriptor & operator=(RClusterGroupDescriptor &&other)=default
const std::vector< ROOT::DescriptorId_t > & GetClusterIds() const
std::uint64_t fPageListLength
Uncompressed size of the page list.
std::uint64_t GetPageListLength() const
RNTupleLocator GetPageListLocator() const
RClusterGroupDescriptor(const RClusterGroupDescriptor &other)=delete
std::uint64_t fEntrySpan
Number of entries that are (partially for sharded clusters) covered by this cluster group.
bool operator==(const RClusterGroupDescriptor &other) const
RClusterGroupDescriptor CloneSummary() const
Creates a clone without the cluster IDs.
Metadata stored for every column of an RNTuple.
std::optional< RValueRange > GetValueRange() const
ROOT::DescriptorId_t fPhysicalColumnId
Usually identical to the logical column ID, except for alias columns where it references the shadowed...
bool operator==(const RColumnDescriptor &other) const
ROOT::DescriptorId_t fLogicalColumnId
The actual column identifier, which is the link to the corresponding field.
RColumnDescriptor(const RColumnDescriptor &other)=delete
std::uint64_t GetFirstElementIndex() const
ROOT::DescriptorId_t fFieldId
Every column belongs to one and only one field.
std::int64_t fFirstElementIndex
The absolute value specifies the index for the first stored element for this column.
ROOT::DescriptorId_t GetFieldId() const
RColumnDescriptor(RColumnDescriptor &&other)=default
std::uint32_t fIndex
A field can be serialized into several columns, which are numbered from zero to $n$.
RColumnDescriptor & operator=(RColumnDescriptor &&other)=default
std::uint32_t GetIndex() const
std::uint16_t fBitsOnStorage
The size in bits of elements of this column.
std::uint16_t fRepresentationIndex
A field may use multiple column representations, which are numbered from zero to $m$.
ROOT::ENTupleColumnType fType
The on-disk column type.
ROOT::ENTupleColumnType GetType() const
ROOT::DescriptorId_t GetPhysicalId() const
std::uint16_t GetRepresentationIndex() const
std::optional< RValueRange > fValueRange
Optional value range (used e.g. by quantized real fields).
std::uint16_t GetBitsOnStorage() const
RColumnDescriptor Clone() const
Get a copy of the descriptor.
RColumnDescriptor & operator=(const RColumnDescriptor &other)=delete
ROOT::DescriptorId_t GetLogicalId() const
Base class for all ROOT issued exceptions.
Definition RError.hxx:78
Field specific extra type information from the header / extenstion header.
RExtraTypeInfoDescriptor & operator=(RExtraTypeInfoDescriptor &&other)=default
RExtraTypeInfoDescriptor & operator=(const RExtraTypeInfoDescriptor &other)=delete
bool operator==(const RExtraTypeInfoDescriptor &other) const
RExtraTypeInfoDescriptor Clone() const
RExtraTypeInfoDescriptor(const RExtraTypeInfoDescriptor &other)=delete
EExtraTypeInfoIds fContentId
Specifies the meaning of the extra information.
std::string fTypeName
The type name the extra information refers to; empty for RNTuple-wide extra information.
std::string fContent
The content format depends on the content ID and may be binary.
const std::string & GetContent() const
const std::string & GetTypeName() const
RExtraTypeInfoDescriptor(RExtraTypeInfoDescriptor &&other)=default
EExtraTypeInfoIds GetContentId() const
std::uint32_t fTypeVersion
Type version the extra type information is bound to.
A field translates read and write calls from/to underlying columns to/from tree values.
Metadata stored for every field of an RNTuple.
const std::string & GetTypeAlias() const
std::unique_ptr< ROOT::RFieldBase > CreateField(const RNTupleDescriptor &ntplDesc, const ROOT::RCreateFieldOptions &options={}) const
In general, we create a field simply from the C++ type name.
std::uint32_t fFieldVersion
The version of the C++-type-to-column translation mechanics.
ROOT::DescriptorId_t fFieldId
RFieldDescriptor Clone() const
Get a copy of the descriptor.
ROOT::DescriptorId_t GetId() const
std::uint64_t fNRepetitions
The number of elements per entry for fixed-size arrays.
std::uint32_t GetFieldVersion() const
const std::vector< ROOT::DescriptorId_t > & GetLogicalColumnIds() const
std::uint32_t fColumnCardinality
The number of columns in the column representations of the field.
ROOT::DescriptorId_t fProjectionSourceId
For projected fields, the source field ID.
ROOT::ENTupleStructure GetStructure() const
bool operator==(const RFieldDescriptor &other) const
RFieldDescriptor(const RFieldDescriptor &other)=delete
std::uint32_t GetColumnCardinality() const
std::string fFieldDescription
Free text set by the user.
RFieldDescriptor & operator=(const RFieldDescriptor &other)=delete
RFieldDescriptor & operator=(RFieldDescriptor &&other)=default
const std::vector< ROOT::DescriptorId_t > & GetLinkIds() const
ROOT::DescriptorId_t fParentId
Establishes sub field relationships, such as classes and collections.
ROOT::DescriptorId_t GetParentId() const
std::string fTypeAlias
A typedef or using directive that resolved to the type name during field creation.
ROOT::ENTupleStructure fStructure
The structural information carried by this field in the data model tree.
std::uint64_t GetNRepetitions() const
RFieldDescriptor(RFieldDescriptor &&other)=default
std::vector< ROOT::DescriptorId_t > fLinkIds
The pointers in the other direction from parent to children.
std::string fFieldName
The leaf name, not including parent fields.
const std::string & GetFieldDescription() const
std::optional< std::uint32_t > GetTypeChecksum() const
ROOT::DescriptorId_t GetProjectionSourceId() const
bool fIsSoACollection
Indicates if this is a collection that should be represented in memory by a SoA layout.
std::uint32_t fTypeVersion
The version of the C++ type itself.
std::string fTypeName
The C++ type that was used when writing the field.
std::uint32_t GetTypeVersion() const
const std::string & GetFieldName() const
std::vector< ROOT::DescriptorId_t > fLogicalColumnIds
The ordered list of columns attached to this field: first by representation index then by column inde...
const std::string & GetTypeName() const
std::optional< std::uint32_t > fTypeChecksum
For custom classes, we store the ROOT TClass reported checksum to facilitate the use of I/O rules tha...
std::unordered_map< ROOT::DescriptorId_t, RClusterDescriptor >::const_iterator Iter_t
Used to loop over all the clusters of an RNTuple (in unspecified order).
const RNTupleDescriptor & fNTuple
The associated RNTuple for this range.
RClusterDescriptorIterable(const RNTupleDescriptor &ntuple)
std::unordered_map< ROOT::DescriptorId_t, RClusterGroupDescriptor >::const_iterator Iter_t
Used to loop over all the cluster groups of an RNTuple (in unspecified order).
const RNTupleDescriptor & fNTuple
The associated RNTuple for this range.
const RNTupleDescriptor & fNTuple
The enclosing range's RNTuple.
const std::vector< ROOT::DescriptorId_t > & fColumns
The enclosing range's descriptor id list.
RIterator(const RNTupleDescriptor &ntuple, const std::vector< ROOT::DescriptorId_t > &columns, std::size_t index)
Used to loop over a field's associated columns.
const RNTupleDescriptor & fNTuple
The associated RNTuple for this range.
std::vector< ROOT::DescriptorId_t > fColumns
The descriptor ids of the columns ordered by field, representation, and column index.
RColumnDescriptorIterable(const RNTupleDescriptor &ntuple, const RFieldDescriptor &fieldDesc)
std::vector< RExtraTypeInfoDescriptor >::const_iterator Iter_t
Used to loop over all the extra type info record of an RNTuple (in unspecified order).
const RNTupleDescriptor & fNTuple
The associated RNTuple for this range.
RIterator(const RNTupleDescriptor &ntuple, const std::vector< ROOT::DescriptorId_t > &fieldChildren, std::size_t index)
const std::vector< ROOT::DescriptorId_t > & fFieldChildren
The enclosing range's descriptor id list.
const RNTupleDescriptor & fNTuple
The enclosing range's RNTuple.
Used to loop over a field's child fields.
const RNTupleDescriptor & fNTuple
The associated RNTuple for this range.
RFieldDescriptorIterable(const RNTupleDescriptor &ntuple, const RFieldDescriptor &field, const std::function< bool(ROOT::DescriptorId_t, ROOT::DescriptorId_t)> &comparator)
Sort the range using an arbitrary comparison function.
RFieldDescriptorIterable(const RNTupleDescriptor &ntuple, const RFieldDescriptor &field)
std::vector< ROOT::DescriptorId_t > fFieldChildren
The descriptor IDs of the child fields.
Summarizes information about fields and the corresponding columns that were added after the header ha...
const std::vector< ROOT::DescriptorId_t > & GetExtendedColumnRepresentations() const
std::unordered_set< ROOT::DescriptorId_t > fFieldIdsLookup
All field IDs of late model extensions for efficient lookup.
std::vector< ROOT::DescriptorId_t > GetTopMostFields(const RNTupleDescriptor &desc) const
Return a vector containing the IDs of the top-level fields defined in the extension header,...
std::uint32_t fNLogicalColumns
Number of logical and physical columns; updated by the descriptor builder when columns are added.
std::vector< ROOT::DescriptorId_t > fExtendedColumnRepresentations
All logical column IDs of columns that extend, with additional column representations,...
bool ContainsExtendedColumnRepresentation(ROOT::DescriptorId_t columnId) const
void MarkExtendedField(const RFieldDescriptor &fieldDesc)
Marks fieldDesc as an extended field, i.e.
std::vector< ROOT::DescriptorId_t > fFieldIdsOrder
All field IDs of late model extensions, in the order of field addition.
bool ContainsField(ROOT::DescriptorId_t fieldId) const
void MarkExtendedColumn(const RColumnDescriptor &columnDesc)
Marks columnDesc as an extended column, i.e.
The on-storage metadata of an RNTuple.
std::uint64_t GetGeneration() const
RNTupleDescriptor(RNTupleDescriptor &&other)=default
const RClusterGroupDescriptor & GetClusterGroupDescriptor(ROOT::DescriptorId_t clusterGroupId) const
const RColumnDescriptor & GetColumnDescriptor(ROOT::DescriptorId_t columnId) const
ROOT::DescriptorId_t FindNextClusterId(ROOT::DescriptorId_t clusterId) const
RFieldDescriptorIterable GetFieldIterable(const RFieldDescriptor &fieldDesc) const
std::set< unsigned int > fFeatureFlags
std::unordered_map< ROOT::DescriptorId_t, RClusterGroupDescriptor > fClusterGroupDescriptors
const RFieldDescriptor & GetFieldDescriptor(ROOT::DescriptorId_t fieldId) const
RNTupleDescriptor & operator=(RNTupleDescriptor &&other)=default
std::uint64_t fNPhysicalColumns
Updated by the descriptor builder when columns are added.
std::vector< Experimental::RNTupleAttrSetDescriptor > fAttributeSets
List of AttributeSets linked to this RNTuple.
ROOT::DescriptorId_t fFieldZeroId
Set by the descriptor builder.
std::uint64_t fNEntries
Updated by the descriptor builder when the cluster groups are added.
std::size_t GetNExtraTypeInfos() const
std::uint64_t GetOnDiskFooterSize() const
RClusterGroupDescriptorIterable GetClusterGroupIterable() const
std::size_t GetNActiveClusters() const
RColumnDescriptorIterable GetColumnIterable() const
bool operator==(const RNTupleDescriptor &other) const
const std::string & GetName() const
std::uint64_t fOnDiskFooterSize
Like fOnDiskHeaderSize, contains both cluster summaries and page locations.
std::uint16_t fVersionMinor
Set by the descriptor builder when deserialized.
ROOT::DescriptorId_t FindClusterId(ROOT::NTupleSize_t entryIdx) const
std::vector< std::uint64_t > GetFeatureFlags() const
ROOT::NTupleSize_t GetNEntries() const
We know the number of entries from adding the cluster summaries.
ROOT::DescriptorId_t GetFieldZeroId() const
Returns the logical parent of all top-level RNTuple data fields.
std::unique_ptr< ROOT::RNTupleModel > CreateModel(const RCreateModelOptions &options=RCreateModelOptions()) const
Re-create the C++ model from the stored metadata.
std::string GetTypeNameForComparison(const RFieldDescriptor &fieldDesc) const
Adjust the type name of the passed RFieldDescriptor for comparison with another renormalized type nam...
std::unordered_map< ROOT::DescriptorId_t, RClusterDescriptor > fClusterDescriptors
Potentially a subset of all the available clusters.
std::size_t GetNAttributeSets() const
std::size_t GetNClusters() const
ROOT::DescriptorId_t FindPhysicalColumnId(ROOT::DescriptorId_t fieldId, std::uint32_t columnIndex, std::uint16_t representationIndex) const
RExtraTypeInfoDescriptorIterable GetExtraTypeInfoIterable() const
std::size_t GetNPhysicalColumns() const
EFeatureFlags
All known feature flags.
@ kFeatureFlag_Test
Reserved for forward-compatibility testing.
const RHeaderExtension * GetHeaderExtension() const
Return header extension information; if the descriptor does not have a header extension,...
std::uint64_t GetVersion() const
void PrintInfo(std::ostream &output) const
std::uint64_t fNClusters
Updated by the descriptor builder when the cluster groups are added.
std::uint64_t fOnDiskHeaderXxHash3
Set by the descriptor builder when deserialized.
const RClusterDescriptor & GetClusterDescriptor(ROOT::DescriptorId_t clusterId) const
ROOT::DescriptorId_t FindFieldId(std::string_view fieldName, ROOT::DescriptorId_t parentId) const
std::string fName
The RNTuple name needs to be unique in a given storage location (file).
RNTupleDescriptor(const RNTupleDescriptor &other)=delete
std::uint64_t fOnDiskHeaderSize
Set by the descriptor builder when deserialized.
std::uint64_t GetOnDiskHeaderXxHash3() const
RResult< void > DropClusterGroupDetails(ROOT::DescriptorId_t clusterGroupId)
std::uint16_t fVersionMajor
Set by the descriptor builder when deserialized.
std::vector< ROOT::DescriptorId_t > fSortedClusterGroupIds
References cluster groups sorted by entry range and thus allows for binary search.
std::unordered_map< ROOT::DescriptorId_t, RColumnDescriptor > fColumnDescriptors
ROOT::DescriptorId_t FindLogicalColumnId(ROOT::DescriptorId_t fieldId, std::uint32_t columnIndex, std::uint16_t representationIndex) const
std::unordered_map< ROOT::DescriptorId_t, RFieldDescriptor > fFieldDescriptors
std::size_t GetNFields() const
ROOT::NTupleSize_t GetNElements(ROOT::DescriptorId_t physicalColumnId) const
RResult< void > AddClusterGroupDetails(ROOT::DescriptorId_t clusterGroupId, std::vector< RClusterDescriptor > &clusterDescs)
Methods to load and drop cluster group details (cluster IDs and page locations).
bool HasFeature(unsigned int flag) const
std::uint16_t fVersionPatch
Set by the descriptor builder when deserialized.
std::uint64_t GetOnDiskHeaderSize() const
std::string fDescription
Free text from the user.
ROOT::Experimental::RNTupleAttrSetDescriptorIterable GetAttrSetIterable() const
RFieldDescriptorIterable GetTopLevelFields() const
std::uint16_t fVersionEpoch
Set by the descriptor builder when deserialized.
std::vector< RExtraTypeInfoDescriptor > fExtraTypeInfoDescriptors
RNTupleDescriptor Clone() const
std::size_t GetNLogicalColumns() const
RNTupleDescriptor & operator=(const RNTupleDescriptor &other)=delete
std::size_t GetNClusterGroups() const
std::string GetQualifiedFieldName(ROOT::DescriptorId_t fieldId) const
Walks up the parents of the field ID and returns a field name of the form a.b.c.d In case of invalid ...
RClusterDescriptorIterable GetClusterIterable() const
RNTupleDescriptor CloneSchema() const
Creates a descriptor containing only the schema information about this RNTuple, i....
const std::string & GetDescription() const
std::uint64_t fGeneration
The generation of the descriptor.
ROOT::DescriptorId_t FindPrevClusterId(ROOT::DescriptorId_t clusterId) const
std::unique_ptr< RHeaderExtension > fHeaderExtension
const RFieldDescriptor & GetFieldZero() const
Generic information about the physical location of data.
The RNTupleModel encapulates the schema of an RNTuple.
The class is used as a return type for operations that can fail; wraps a value of type T or an RError...
Definition RError.hxx:197
const Int_t n
Definition legend1.C:16
Namespace for ROOT features in testing.
Definition TROOT.h:100
ROOT::DescriptorId_t CallFindClusterIdOn(const ROOT::RNTupleDescriptor &desc, ROOT::NTupleSize_t entryIdx)
RNTupleDescriptor CloneDescriptorSchema(const RNTupleDescriptor &desc)
bool IsCustomEnumFieldDesc(const RNTupleDescriptor &desc, const RFieldDescriptor &fieldDesc)
Tells if the field describes a user-defined enum type.
std::vector< ROOT::Internal::RNTupleClusterBoundaries > GetClusterBoundaries(const RNTupleDescriptor &desc)
Return the cluster boundaries for each cluster in this RNTuple.
bool IsStdAtomicFieldDesc(const RFieldDescriptor &fieldDesc)
Tells if the field describes a std::atomic<T> type.
EExtraTypeInfoIds
Used in RExtraTypeInfoDescriptor.
std::uint64_t DescriptorId_t
Distriniguishes elements of the same type within a descriptor, e.g. different fields.
constexpr NTupleSize_t kInvalidNTupleIndex
std::uint64_t NTupleSize_t
Integer type long enough to hold the maximum number of entries in a column.
constexpr DescriptorId_t kInvalidDescriptorId
ENTupleStructure
The fields in the RNTuple data model tree can carry different structural information about the type s...
ENTupleColumnType
Additional information about a page in an in-memory RPageRange.
RPageInfoExtended(const RPageInfo &pageInfo, ROOT::NTupleSize_t firstElementIndex, ROOT::NTupleSize_t pageNumber)
void SetFirstElementIndex(ROOT::NTupleSize_t firstInPage)
ROOT::NTupleSize_t fPageNumber
Page number in the corresponding RPageRange.
ROOT::NTupleSize_t fFirstElementIndex
Index (in cluster) of the first element in page.
void SetPageNumber(ROOT::NTupleSize_t pageNumber)
bool fHasChecksum
If true, the 8 bytes following the serialized page are an xxhash of the on-disk page data.
void SetLocator(const RNTupleLocator &locator)
bool operator==(const RPageInfo &other) const
std::uint32_t fNElements
The sum of the elements of all the pages must match the corresponding fNElements field in fColumnRang...
const RNTupleLocator & GetLocator() const
RNTupleLocator fLocator
The meaning of fLocator depends on the storage backend.
RPageInfo(std::uint32_t nElements, const RNTupleLocator &locator, bool hasChecksum)
bool operator==(RValueRange other) const
RValueRange(std::pair< double, double > range)
bool operator!=(RValueRange other) const
bool fForwardCompatible
By default, creating a model will fail if any of the reconstructed fields contains an unknown column ...
bool fCreateBare
If true, the model will be created without a default entry (bare model).
bool fReconstructProjections
If set to true, projected fields will be reconstructed as such.
bool fEmulateUnknownTypes
If true, fields with a user defined type that have no available dictionaries will be reconstructed as...