Logo ROOT  
Reference Guide
 
Loading...
Searching...
No Matches
RNTupleDescriptor.hxx
Go to the documentation of this file.
1/// \file ROOT/RNTupleDescriptor.hxx
2/// \ingroup NTuple
3/// \author Jakob Blomer <jblomer@cern.ch>
4/// \author Javier Lopez-Gomez <javier.lopez.gomez@cern.ch>
5/// \date 2018-07-19
6
7/*************************************************************************
8 * Copyright (C) 1995-2019, Rene Brun and Fons Rademakers. *
9 * All rights reserved. *
10 * *
11 * For the licensing terms see $ROOTSYS/LICENSE. *
12 * For the list of contributors see $ROOTSYS/README/CREDITS. *
13 *************************************************************************/
14
15#ifndef ROOT_RNTupleDescriptor
16#define ROOT_RNTupleDescriptor
17
19#include <ROOT/RError.hxx>
21#include <ROOT/RNTupleTypes.hxx>
22#include <ROOT/RSpan.hxx>
23
24#include <TError.h>
25
26#include <algorithm>
27#include <chrono>
28#include <cmath>
29#include <functional>
30#include <iterator>
31#include <map>
32#include <memory>
33#include <optional>
34#include <ostream>
35#include <vector>
36#include <set>
37#include <string>
38#include <string_view>
39#include <unordered_map>
40#include <unordered_set>
41
42namespace ROOT {
43
44class RFieldBase;
45class RNTupleModel;
46
47namespace Internal {
48class RColumnElementBase;
49}
50
51class RNTupleDescriptor;
52
53namespace Internal {
54class RColumnDescriptorBuilder;
55class RClusterDescriptorBuilder;
56class RClusterGroupDescriptorBuilder;
57class RExtraTypeInfoDescriptorBuilder;
58class RFieldDescriptorBuilder;
59class RNTupleDescriptorBuilder;
60
61RNTupleDescriptor CloneDescriptorSchema(const RNTupleDescriptor &desc);
66
67std::vector<ROOT::Internal::RNTupleClusterBoundaries> GetClusterBoundaries(const RNTupleDescriptor &desc);
68} // namespace Internal
69
70namespace Experimental {
71
72// clang-format off
73/**
74\class ROOT::Experimental::RNTupleAttrSetDescriptor
75\ingroup NTuple
76\brief Metadata stored for every Attribute Set linked to an RNTuple.
77*/
78// clang-format on
81
82 std::uint16_t fSchemaVersionMajor = 0;
83 std::uint16_t fSchemaVersionMinor = 0;
84 std::uint32_t fAnchorLength = 0; ///< uncompressed size of the linked anchor
85 // The locator of the AttributeSet anchor.
86 // In case of kTypeFile, it points to the beginning of the Anchor's payload.
87 // NOTE: Only kTypeFile is supported at the moment.
89 std::string fName;
90
91public:
97
98 bool operator==(const RNTupleAttrSetDescriptor &other) const;
99 bool operator!=(const RNTupleAttrSetDescriptor &other) const { return !(*this == other); }
100
101 const std::string &GetName() const { return fName; }
102 std::uint16_t GetSchemaVersionMajor() const { return fSchemaVersionMajor; }
103 std::uint16_t GetSchemaVersionMinor() const { return fSchemaVersionMinor; }
104 std::uint32_t GetAnchorLength() const { return fAnchorLength; }
106
108};
109
110class RNTupleAttrSetDescriptorIterable;
111
112} // namespace Experimental
113
114// clang-format off
115/**
116\class ROOT::RFieldDescriptor
117\ingroup NTuple
118\brief Metadata stored for every field of an RNTuple
119*/
120// clang-format on
124
125private:
127 /// The version of the C++-type-to-column translation mechanics
128 std::uint32_t fFieldVersion = 0;
129 /// The version of the C++ type itself
130 std::uint32_t fTypeVersion = 0;
131 /// The leaf name, not including parent fields
132 std::string fFieldName;
133 /// Free text set by the user
134 std::string fFieldDescription;
135 /// The C++ type that was used when writing the field
136 std::string fTypeName;
137 /// A typedef or using directive that resolved to the type name during field creation
138 std::string fTypeAlias;
139 /// The number of elements per entry for fixed-size arrays
140 std::uint64_t fNRepetitions = 0;
141 /// The structural information carried by this field in the data model tree
143 /// Establishes sub field relationships, such as classes and collections
145 /// For projected fields, the source field ID
147 /// The pointers in the other direction from parent to children. They are serialized, too, to keep the
148 /// order of sub fields.
149 std::vector<ROOT::DescriptorId_t> fLinkIds;
150 /// The number of columns in the column representations of the field. The column cardinality helps to navigate the
151 /// list of logical column ids. For example, the second column of the third column representation is
152 /// fLogicalColumnIds[2 * fColumnCardinality + 1]
153 std::uint32_t fColumnCardinality = 0;
154 /// The ordered list of columns attached to this field: first by representation index then by column index.
155 std::vector<ROOT::DescriptorId_t> fLogicalColumnIds;
156 /// For custom classes, we store the ROOT TClass reported checksum to facilitate the use of I/O rules that
157 /// identify types by their checksum
158 std::optional<std::uint32_t> fTypeChecksum;
159
160public:
161 RFieldDescriptor() = default;
166
167 bool operator==(const RFieldDescriptor &other) const;
168 /// Get a copy of the descriptor
169 RFieldDescriptor Clone() const;
170
171 /// In general, we create a field simply from the C++ type name. For untyped fields, however, we potentially need
172 /// access to sub fields, which is provided by the RNTupleDescriptor argument.
173 std::unique_ptr<ROOT::RFieldBase>
174 CreateField(const RNTupleDescriptor &ntplDesc, const ROOT::RCreateFieldOptions &options = {}) const;
175
177 std::uint32_t GetFieldVersion() const { return fFieldVersion; }
178 std::uint32_t GetTypeVersion() const { return fTypeVersion; }
179 const std::string &GetFieldName() const { return fFieldName; }
180 const std::string &GetFieldDescription() const { return fFieldDescription; }
181 const std::string &GetTypeName() const { return fTypeName; }
182 const std::string &GetTypeAlias() const { return fTypeAlias; }
183 std::uint64_t GetNRepetitions() const { return fNRepetitions; }
187 const std::vector<ROOT::DescriptorId_t> &GetLinkIds() const { return fLinkIds; }
188 const std::vector<ROOT::DescriptorId_t> &GetLogicalColumnIds() const { return fLogicalColumnIds; }
189 std::uint32_t GetColumnCardinality() const { return fColumnCardinality; }
190 std::optional<std::uint32_t> GetTypeChecksum() const { return fTypeChecksum; }
192
196};
197
198// clang-format off
199/**
200\class ROOT::RColumnDescriptor
201\ingroup NTuple
202\brief Metadata stored for every column of an RNTuple
203*/
204// clang-format on
208
209public:
210 struct RValueRange {
211 double fMin = 0, fMax = 0;
212
213 RValueRange() = default;
214 RValueRange(double min, double max) : fMin(min), fMax(max) {}
215 RValueRange(std::pair<double, double> range) : fMin(range.first), fMax(range.second) {}
216
217 bool operator==(RValueRange other) const { return fMin == other.fMin && fMax == other.fMax; }
218 bool operator!=(RValueRange other) const { return !(*this == other); }
219 };
220
221private:
222 /// The actual column identifier, which is the link to the corresponding field
224 /// Usually identical to the logical column ID, except for alias columns where it references the shadowed column
226 /// Every column belongs to one and only one field
228 /// The absolute value specifies the index for the first stored element for this column.
229 /// For deferred columns the absolute value is larger than zero.
230 /// Negative values specify a suppressed and deferred column.
231 std::int64_t fFirstElementIndex = 0U;
232 /// A field can be serialized into several columns, which are numbered from zero to $n$
233 std::uint32_t fIndex = 0;
234 /// A field may use multiple column representations, which are numbered from zero to $m$.
235 /// Every representation has the same number of columns.
236 std::uint16_t fRepresentationIndex = 0;
237 /// The size in bits of elements of this column. Most columns have the size fixed by their type
238 /// but low-precision float columns have variable bit widths.
239 std::uint16_t fBitsOnStorage = 0;
240 /// The on-disk column type
242 /// Optional value range (used e.g. by quantized real fields)
243 std::optional<RValueRange> fValueRange;
244
245public:
246 RColumnDescriptor() = default;
251
252 bool operator==(const RColumnDescriptor &other) const;
253 /// Get a copy of the descriptor
254 RColumnDescriptor Clone() const;
255
256 ROOT::DescriptorId_t GetLogicalId() const { return fLogicalColumnId; }
257 ROOT::DescriptorId_t GetPhysicalId() const { return fPhysicalColumnId; }
259 std::uint32_t GetIndex() const { return fIndex; }
260 std::uint16_t GetRepresentationIndex() const { return fRepresentationIndex; }
261 std::uint64_t GetFirstElementIndex() const { return std::abs(fFirstElementIndex); }
262 std::uint16_t GetBitsOnStorage() const { return fBitsOnStorage; }
263 ROOT::ENTupleColumnType GetType() const { return fType; }
264 std::optional<RValueRange> GetValueRange() const { return fValueRange; }
265 bool IsAliasColumn() const { return fPhysicalColumnId != fLogicalColumnId; }
266 bool IsDeferredColumn() const { return fFirstElementIndex != 0; }
267 bool IsSuppressedDeferredColumn() const { return fFirstElementIndex < 0; }
268};
269
270// clang-format off
271/**
272\class ROOT::RClusterDescriptor
273\ingroup NTuple
274\brief Metadata for RNTuple clusters
275
276The cluster descriptor is built in two phases. In a first phase, the descriptor has only an ID.
277In a second phase, the event range, column group, page locations and column ranges are added.
278Both phases are populated by the RClusterDescriptorBuilder.
279Clusters span across all available columns in the RNTuple.
280*/
281// clang-format on
284
285public:
286 // clang-format off
287 /**
288 \class ROOT::RClusterDescriptor::RColumnRange
289 \ingroup NTuple
290 \brief The window of element indexes of a particular column in a particular cluster
291 */
292 // clang-format on
295 /// The global index of the first column element in the cluster
297 /// The number of column elements in the cluster
299 /// The usual format for ROOT compression settings (see Compression.h).
300 /// The pages of a particular column in a particular cluster are all compressed with the same settings.
301 /// If unset, the compression settings are undefined (deferred columns, suppressed columns).
302 std::optional<std::uint32_t> fCompressionSettings;
303 /// Suppressed columns have an empty page range and unknown compression settings.
304 /// Their element index range, however, is aligned with the corresponding column of the
305 /// primary column representation (see Section "Suppressed Columns" in the specification)
306 bool fIsSuppressed = false;
307
308 // TODO(jblomer): we perhaps want to store summary information, such as average, min/max, etc.
309 // Should this be done on the field level?
310
311 public:
312 RColumnRange() = default;
313
315 ROOT::NTupleSize_t nElements, std::optional<std::uint32_t> compressionSettings,
316 bool suppressed = false)
317 : fPhysicalColumnId(physicalColumnId),
318 fFirstElementIndex(firstElementIndex),
319 fNElements(nElements),
320 fCompressionSettings(compressionSettings),
321 fIsSuppressed(suppressed)
322 {
323 }
324
325 ROOT::DescriptorId_t GetPhysicalColumnId() const { return fPhysicalColumnId; }
326 void SetPhysicalColumnId(ROOT::DescriptorId_t id) { fPhysicalColumnId = id; }
327
328 ROOT::NTupleSize_t GetFirstElementIndex() const { return fFirstElementIndex; }
329 void SetFirstElementIndex(ROOT::NTupleSize_t idx) { fFirstElementIndex = idx; }
330 void IncrementFirstElementIndex(ROOT::NTupleSize_t by) { fFirstElementIndex += by; }
331
332 ROOT::NTupleSize_t GetNElements() const { return fNElements; }
333 void SetNElements(ROOT::NTupleSize_t n) { fNElements = n; }
335
336 std::optional<std::uint32_t> GetCompressionSettings() const { return fCompressionSettings; }
337 void SetCompressionSettings(std::optional<std::uint32_t> comp) { fCompressionSettings = comp; }
338
339 bool IsSuppressed() const { return fIsSuppressed; }
340 void SetIsSuppressed(bool suppressed) { fIsSuppressed = suppressed; }
341
342 bool operator==(const RColumnRange &other) const
343 {
344 return fPhysicalColumnId == other.fPhysicalColumnId && fFirstElementIndex == other.fFirstElementIndex &&
345 fNElements == other.fNElements && fCompressionSettings == other.fCompressionSettings &&
346 fIsSuppressed == other.fIsSuppressed;
347 }
348
350 {
351 return (fFirstElementIndex <= index && (fFirstElementIndex + fNElements) > index);
352 }
353 };
354
355 // clang-format off
356 /**
357 \class ROOT::RClusterDescriptor::RPageInfo
358 \ingroup NTuple
359 \brief Information about a single page in the context of a cluster's page range.
360 */
361 // clang-format on
362 // NOTE: We do not need to store the element size / uncompressed page size because we know to which column
363 // the page belongs
364 struct RPageInfo {
365 private:
366 /// The meaning of `fLocator` depends on the storage backend.
368 /// The sum of the elements of all the pages must match the corresponding `fNElements` field in `fColumnRanges`
369 std::uint32_t fNElements = std::uint32_t(-1);
370 /// If true, the 8 bytes following the serialized page are an xxhash of the on-disk page data
371 bool fHasChecksum = false;
372
373 public:
374 RPageInfo() = default;
376 : fLocator(locator), fNElements(nElements), fHasChecksum(hasChecksum)
377 {
378 }
379
380 bool operator==(const RPageInfo &other) const
381 {
382 return fLocator == other.fLocator && fNElements == other.fNElements;
383 }
384
385 const RNTupleLocator &GetLocator() const { return fLocator; }
386 RNTupleLocator &GetLocator() { return fLocator; }
387 void SetLocator(const RNTupleLocator &locator) { fLocator = locator; }
388
389 std::uint32_t GetNElements() const { return fNElements; }
390 void SetNElements(std::uint32_t n) { fNElements = n; }
391
392 bool HasChecksum() const { return fHasChecksum; }
393 void SetHasChecksum(bool hasChecksum) { fHasChecksum = hasChecksum; }
394 };
395
396 // clang-format off
397 /**
398 \class ROOT::RClusterDescriptor::RPageInfoExtended
399 \ingroup NTuple
400 \brief Additional information about a page in an in-memory RPageRange.
401
402 Used by RPageRange::Find() to return information relative to the RPageRange. This information is not stored on disk
403 and we don't need to keep it in memory because it can be easily recomputed.
404 */
405 // clang-format on
407 private:
408 /// Index (in cluster) of the first element in page.
409 ROOT::NTupleSize_t fFirstElementIndex = 0;
410 /// Page number in the corresponding RPageRange.
411 ROOT::NTupleSize_t fPageNumber = 0;
412
413 public:
414 RPageInfoExtended() = default;
419
420 ROOT::NTupleSize_t GetFirstElementIndex() const { return fFirstElementIndex; }
422
423 ROOT::NTupleSize_t GetPageNumber() const { return fPageNumber; }
425 };
426
427 // clang-format off
428 /**
429 \class ROOT::RClusterDescriptor::RPageRange
430 \ingroup NTuple
431 \brief Records the partition of data into pages for a particular column in a particular cluster
432 */
433 // clang-format on
436
437 private:
438 /// \brief Extend this RPageRange to fit the given RColumnRange.
439 ///
440 /// To do so, prepend as many synthetic RPageInfos as needed to cover the range in `columnRange`.
441 /// RPageInfos are constructed to contain as many elements of type `element` given a page size
442 /// limit of `pageSize` (in bytes); the locator for the referenced pages is `kTypePageZero`.
443 /// This function is used to make up RPageRanges for clusters that contain deferred columns.
444 /// \return The number of column elements covered by the synthesized RPageInfos
445 std::size_t ExtendToFitColumnRange(const RColumnRange &columnRange,
447
448 std::vector<RPageInfo> fPageInfos;
449
450 /// Has the same length than fPageInfos and stores the sum of the number of elements of all the pages
451 /// up to and including a given index. Used for binary search in Find().
452 /// This vector is only created if fPageInfos has at least kLargeRangeThreshold elements.
453 std::unique_ptr<std::vector<ROOT::NTupleSize_t>> fCumulativeNElements;
454
456
457 public:
458 /// Create the fCumulativeNElements only when its needed, i.e. when there are many pages to search through.
459 static constexpr std::size_t kLargeRangeThreshold = 10;
460
461 RPageRange() = default;
462 RPageRange(const RPageRange &other) = delete;
466
468 {
469 RPageRange clone;
470 clone.fPhysicalColumnId = fPhysicalColumnId;
471 clone.fPageInfos = fPageInfos;
472 if (fCumulativeNElements) {
473 clone.fCumulativeNElements = std::make_unique<std::vector<ROOT::NTupleSize_t>>(*fCumulativeNElements);
474 }
475 return clone;
476 }
477
478 /// Find the page in the RPageRange that contains the given element. The element must exist.
480
481 ROOT::DescriptorId_t GetPhysicalColumnId() const { return fPhysicalColumnId; }
482 void SetPhysicalColumnId(ROOT::DescriptorId_t id) { fPhysicalColumnId = id; }
483
484 const std::vector<RPageInfo> &GetPageInfos() const { return fPageInfos; }
485 std::vector<RPageInfo> &GetPageInfos() { return fPageInfos; }
486
487 bool operator==(const RPageRange &other) const
488 {
489 return fPhysicalColumnId == other.fPhysicalColumnId && fPageInfos == other.fPageInfos;
490 }
491 };
492
493private:
495 /// Clusters can be swapped by adjusting the entry offsets of the cluster and all ranges
498
499 std::unordered_map<ROOT::DescriptorId_t, RColumnRange> fColumnRanges;
500 std::unordered_map<ROOT::DescriptorId_t, RPageRange> fPageRanges;
501
502public:
504
510
511 RClusterDescriptor Clone() const;
512
513 bool operator==(const RClusterDescriptor &other) const;
514
515 ROOT::DescriptorId_t GetId() const { return fClusterId; }
516 ROOT::NTupleSize_t GetFirstEntryIndex() const { return fFirstEntryIndex; }
517 ROOT::NTupleSize_t GetNEntries() const { return fNEntries; }
518 const RColumnRange &GetColumnRange(ROOT::DescriptorId_t physicalId) const { return fColumnRanges.at(physicalId); }
519 const RPageRange &GetPageRange(ROOT::DescriptorId_t physicalId) const { return fPageRanges.at(physicalId); }
520 /// Returns an iterator over pairs { columnId, columnRange }. The iteration order is unspecified.
521 RColumnRangeIterable GetColumnRangeIterable() const;
523 {
524 return fColumnRanges.find(physicalId) != fColumnRanges.end();
525 }
526 std::uint64_t GetNBytesOnStorage() const;
527};
528
530private:
532
533public:
535 private:
536 using Iter_t = std::unordered_map<ROOT::DescriptorId_t, RColumnRange>::const_iterator;
537 /// The wrapped map iterator
539
540 public:
541 using iterator_category = std::forward_iterator_tag;
544 using difference_type = std::ptrdiff_t;
545 using pointer = const RColumnRange *;
546 using reference = const RColumnRange &;
547
548 RIterator(Iter_t iter) : fIter(iter) {}
549 iterator &operator++() /* prefix */
550 {
551 ++fIter;
552 return *this;
553 }
554 iterator operator++(int) /* postfix */
555 {
556 auto old = *this;
557 operator++();
558 return old;
559 }
560 reference operator*() const { return fIter->second; }
561 pointer operator->() const { return &fIter->second; }
562 bool operator!=(const iterator &rh) const { return fIter != rh.fIter; }
563 bool operator==(const iterator &rh) const { return fIter == rh.fIter; }
564 };
565
566 explicit RColumnRangeIterable(const RClusterDescriptor &desc) : fDesc(desc) {}
567
568 RIterator begin() { return RIterator{fDesc.fColumnRanges.cbegin()}; }
569 RIterator end() { return RIterator{fDesc.fColumnRanges.cend()}; }
570 size_t size() { return fDesc.fColumnRanges.size(); }
571};
572
573// clang-format off
574/**
575\class ROOT::RClusterGroupDescriptor
576\ingroup NTuple
577\brief Clusters are bundled in cluster groups.
578
579Very large RNTuples can contain multiple cluster groups to organize cluster metadata.
580Every RNTuple has at least one cluster group. The clusters in a cluster group are ordered
581corresponding to their first entry number.
582*/
583// clang-format on
586
587private:
589 /// The cluster IDs can be empty if the corresponding page list is not loaded.
590 /// Otherwise, cluster ids are sorted by first entry number.
591 std::vector<ROOT::DescriptorId_t> fClusterIds;
592 /// The page list that corresponds to the cluster group
594 /// Uncompressed size of the page list
595 std::uint64_t fPageListLength = 0;
596 /// The minimum first entry number of the clusters in the cluster group
597 std::uint64_t fMinEntry = 0;
598 /// Number of entries that are (partially for sharded clusters) covered by this cluster group.
599 std::uint64_t fEntrySpan = 0;
600 /// Number of clusters is always known even if the cluster IDs are not (yet) populated
601 std::uint32_t fNClusters = 0;
602
603public:
609
610 RClusterGroupDescriptor Clone() const;
611 /// Creates a clone without the cluster IDs
612 RClusterGroupDescriptor CloneSummary() const;
613
614 bool operator==(const RClusterGroupDescriptor &other) const;
615
616 ROOT::DescriptorId_t GetId() const { return fClusterGroupId; }
617 std::uint32_t GetNClusters() const { return fNClusters; }
618 RNTupleLocator GetPageListLocator() const { return fPageListLocator; }
619 std::uint64_t GetPageListLength() const { return fPageListLength; }
620 const std::vector<ROOT::DescriptorId_t> &GetClusterIds() const { return fClusterIds; }
621 std::uint64_t GetMinEntry() const { return fMinEntry; }
622 std::uint64_t GetEntrySpan() const { return fEntrySpan; }
623 /// A cluster group is loaded in two stages. Stage one loads only the summary information.
624 /// Stage two loads the list of cluster IDs.
625 bool HasClusterDetails() const { return !fClusterIds.empty(); }
626};
627
628/// Used in RExtraTypeInfoDescriptor
630 kInvalid,
632};
633
634// clang-format off
635/**
636\class ROOT::RExtraTypeInfoDescriptor
637\ingroup NTuple
638\brief Field specific extra type information from the header / extenstion header
639
640Currently only used by streamer fields to store RNTuple-wide list of streamer info records.
641*/
642// clang-format on
645
646private:
647 /// Specifies the meaning of the extra information
648 EExtraTypeInfoIds fContentId = EExtraTypeInfoIds::kInvalid;
649 /// Type version the extra type information is bound to
650 std::uint32_t fTypeVersion = 0;
651 /// The type name the extra information refers to; empty for RNTuple-wide extra information
652 std::string fTypeName;
653 /// The content format depends on the content ID and may be binary
654 std::string fContent;
655
656public:
662
663 bool operator==(const RExtraTypeInfoDescriptor &other) const;
664
665 RExtraTypeInfoDescriptor Clone() const;
666
667 EExtraTypeInfoIds GetContentId() const { return fContentId; }
668 std::uint32_t GetTypeVersion() const { return fTypeVersion; }
669 const std::string &GetTypeName() const { return fTypeName; }
670 const std::string &GetContent() const { return fContent; }
671};
672
673namespace Internal {
674// Used by the RNTupleReader to activate/deactivate entries. Needs to adapt when we have sharded clusters.
676} // namespace Internal
677
678// clang-format off
679/**
680\class ROOT::RNTupleDescriptor
681\ingroup NTuple
682\brief The on-storage metadata of an RNTuple
683
684Represents the on-disk (on storage) information about an RNTuple. The metadata consists of a header, a footer, and
685potentially multiple page lists.
686The header carries the RNTuple schema, i.e. the fields and the associated columns and their relationships.
687The footer carries information about one or several cluster groups and links to their page lists.
688For every cluster group, a page list envelope stores cluster summaries and page locations.
689For every cluster, it stores for every column the range of element indexes as well as a list of pages and page
690locations.
691
692The descriptor provides machine-independent (de-)serialization of headers and footers, and it provides lookup routines
693for RNTuple objects (pages, clusters, ...). It is supposed to be usable by all RPageStorage implementations.
694
695The serialization does not use standard ROOT streamers in order to not let it depend on libCore. The serialization uses
696the concept of envelopes and frames: header, footer, and page list envelopes have a preamble with a type ID and length.
697Substructures are serialized in frames and have a size and number of items (for list frames). This allows for forward
698and backward compatibility when the metadata evolves.
699*/
700// clang-format on
703 friend RNTupleDescriptor Internal::CloneDescriptorSchema(const RNTupleDescriptor &desc);
704 friend DescriptorId_t Internal::CallFindClusterIdOn(const RNTupleDescriptor &desc, NTupleSize_t entryIdx);
705
706public:
707 class RHeaderExtension;
708
709private:
710 /// The RNTuple name needs to be unique in a given storage location (file)
711 std::string fName;
712 /// Free text from the user
713 std::string fDescription;
714
715 ROOT::DescriptorId_t fFieldZeroId = ROOT::kInvalidDescriptorId; ///< Set by the descriptor builder
716
717 std::uint64_t fNPhysicalColumns = 0; ///< Updated by the descriptor builder when columns are added
718
719 std::set<unsigned int> fFeatureFlags;
720 std::unordered_map<ROOT::DescriptorId_t, RFieldDescriptor> fFieldDescriptors;
721 std::unordered_map<ROOT::DescriptorId_t, RColumnDescriptor> fColumnDescriptors;
722
723 std::vector<RExtraTypeInfoDescriptor> fExtraTypeInfoDescriptors;
724 std::unique_ptr<RHeaderExtension> fHeaderExtension;
725
726 //// All fields above are part of the schema and are cloned when creating a new descriptor from a given one
727 //// (see CloneSchema())
728
729 std::uint16_t fVersionEpoch = 0; ///< Set by the descriptor builder when deserialized
730 std::uint16_t fVersionMajor = 0; ///< Set by the descriptor builder when deserialized
731 std::uint16_t fVersionMinor = 0; ///< Set by the descriptor builder when deserialized
732 std::uint16_t fVersionPatch = 0; ///< Set by the descriptor builder when deserialized
733
734 std::uint64_t fOnDiskHeaderSize = 0; ///< Set by the descriptor builder when deserialized
735 std::uint64_t fOnDiskHeaderXxHash3 = 0; ///< Set by the descriptor builder when deserialized
736 std::uint64_t fOnDiskFooterSize = 0; ///< Like fOnDiskHeaderSize, contains both cluster summaries and page locations
737
738 std::uint64_t fNEntries = 0; ///< Updated by the descriptor builder when the cluster groups are added
739 std::uint64_t fNClusters = 0; ///< Updated by the descriptor builder when the cluster groups are added
740
741 /// \brief The generation of the descriptor
742 ///
743 /// Once constructed by an RNTupleDescriptorBuilder, the descriptor is mostly immutable except for the set of
744 /// active page locations. During the lifetime of the descriptor, page location information for clusters
745 /// can be added or removed. When this happens, the generation should be increased, so that users of the
746 /// descriptor know that the information changed. The generation is increased, e.g., by the page source's
747 /// exclusive lock guard around the descriptor. It is used, e.g., by the descriptor cache in RNTupleReader.
748 std::uint64_t fGeneration = 0;
749
750 std::unordered_map<ROOT::DescriptorId_t, RClusterGroupDescriptor> fClusterGroupDescriptors;
751 /// References cluster groups sorted by entry range and thus allows for binary search.
752 /// Note that this list is empty during the descriptor building process and will only be
753 /// created when the final descriptor is extracted from the builder.
754 std::vector<ROOT::DescriptorId_t> fSortedClusterGroupIds;
755 /// Potentially a subset of all the available clusters
756 std::unordered_map<ROOT::DescriptorId_t, RClusterDescriptor> fClusterDescriptors;
757 /// List of AttributeSets linked to this RNTuple
758 std::vector<Experimental::RNTupleAttrSetDescriptor> fAttributeSets;
759
760 // We don't expose this publicly because when we add sharded clusters, this interface does not make sense anymore
762
763 /// Creates a descriptor containing only the schema information about this RNTuple, i.e. all the information needed
764 /// to create a new RNTuple with the same schema as this one but not necessarily the same clustering. This is used
765 /// when merging two RNTuples.
766 RNTupleDescriptor CloneSchema() const;
767
768public:
769 static constexpr unsigned int kFeatureFlagTest = 137; // Bit reserved for forward-compatibility testing
770
777
778 /// Modifiers passed to CreateModel()
780 private:
781 /// If set to true, projected fields will be reconstructed as such. This will prevent the model to be used
782 /// with an RNTupleReader, but it is useful, e.g., to accurately merge data.
783 bool fReconstructProjections = false;
784 /// By default, creating a model will fail if any of the reconstructed fields contains an unknown column type
785 /// or an unknown field structural role.
786 /// If this option is enabled, the model will be created and all fields containing unknown data (directly
787 /// or indirectly) will be skipped instead.
788 bool fForwardCompatible = false;
789 /// If true, the model will be created without a default entry (bare model).
790 bool fCreateBare = false;
791 /// If true, fields with a user defined type that have no available dictionaries will be reconstructed
792 /// as record fields from the on-disk information; otherwise, they will cause an error.
793 bool fEmulateUnknownTypes = false;
794
795 public:
796 RCreateModelOptions() {} // Work around compiler bug, see https://gcc.gnu.org/bugzilla/show_bug.cgi?id=88165
797
798 void SetReconstructProjections(bool v) { fReconstructProjections = v; }
799 bool GetReconstructProjections() const { return fReconstructProjections; }
800
801 void SetForwardCompatible(bool v) { fForwardCompatible = v; }
802 bool GetForwardCompatible() const { return fForwardCompatible; }
803
804 void SetCreateBare(bool v) { fCreateBare = v; }
805 bool GetCreateBare() const { return fCreateBare; }
806
807 void SetEmulateUnknownTypes(bool v) { fEmulateUnknownTypes = v; }
808 bool GetEmulateUnknownTypes() const { return fEmulateUnknownTypes; }
809 };
810
811 RNTupleDescriptor() = default;
816
817 RNTupleDescriptor Clone() const;
818
819 bool operator==(const RNTupleDescriptor &other) const;
820
821 std::uint64_t GetOnDiskHeaderXxHash3() const { return fOnDiskHeaderXxHash3; }
822 std::uint64_t GetOnDiskHeaderSize() const { return fOnDiskHeaderSize; }
823 std::uint64_t GetOnDiskFooterSize() const { return fOnDiskFooterSize; }
824
826 {
827 return fFieldDescriptors.at(fieldId);
828 }
830 {
831 return fColumnDescriptors.at(columnId);
832 }
834 {
835 return fClusterGroupDescriptors.at(clusterGroupId);
836 }
838 {
839 return fClusterDescriptors.at(clusterId);
840 }
841
842 RFieldDescriptorIterable GetFieldIterable(const RFieldDescriptor &fieldDesc) const;
843 RFieldDescriptorIterable
844 GetFieldIterable(const RFieldDescriptor &fieldDesc,
845 const std::function<bool(ROOT::DescriptorId_t, ROOT::DescriptorId_t)> &comparator) const;
846 RFieldDescriptorIterable GetFieldIterable(ROOT::DescriptorId_t fieldId) const;
847 RFieldDescriptorIterable
848 GetFieldIterable(ROOT::DescriptorId_t fieldId,
849 const std::function<bool(ROOT::DescriptorId_t, ROOT::DescriptorId_t)> &comparator) const;
850
851 RFieldDescriptorIterable GetTopLevelFields() const;
852 RFieldDescriptorIterable
853 GetTopLevelFields(const std::function<bool(ROOT::DescriptorId_t, ROOT::DescriptorId_t)> &comparator) const;
854
855 RColumnDescriptorIterable GetColumnIterable() const;
856 RColumnDescriptorIterable GetColumnIterable(const RFieldDescriptor &fieldDesc) const;
857 RColumnDescriptorIterable GetColumnIterable(ROOT::DescriptorId_t fieldId) const;
858
859 RClusterGroupDescriptorIterable GetClusterGroupIterable() const;
860
861 RClusterDescriptorIterable GetClusterIterable() const;
862
863 RExtraTypeInfoDescriptorIterable GetExtraTypeInfoIterable() const;
864
866
867 const std::string &GetName() const { return fName; }
868 const std::string &GetDescription() const { return fDescription; }
869
870 std::size_t GetNFields() const { return fFieldDescriptors.size(); }
871 std::size_t GetNLogicalColumns() const { return fColumnDescriptors.size(); }
872 std::size_t GetNPhysicalColumns() const { return fNPhysicalColumns; }
873 std::size_t GetNClusterGroups() const { return fClusterGroupDescriptors.size(); }
874 std::size_t GetNClusters() const { return fNClusters; }
875 std::size_t GetNActiveClusters() const { return fClusterDescriptors.size(); }
876 std::size_t GetNExtraTypeInfos() const { return fExtraTypeInfoDescriptors.size(); }
877 std::size_t GetNAttributeSets() const { return fAttributeSets.size(); }
878
879 /// We know the number of entries from adding the cluster summaries
880 ROOT::NTupleSize_t GetNEntries() const { return fNEntries; }
882
883 /// Returns the logical parent of all top-level RNTuple data fields.
884 ROOT::DescriptorId_t GetFieldZeroId() const { return fFieldZeroId; }
885 const RFieldDescriptor &GetFieldZero() const { return GetFieldDescriptor(GetFieldZeroId()); }
886 ROOT::DescriptorId_t FindFieldId(std::string_view fieldName, ROOT::DescriptorId_t parentId) const;
887 /// Searches for a top-level field
888 ROOT::DescriptorId_t FindFieldId(std::string_view fieldName) const;
889 ROOT::DescriptorId_t FindLogicalColumnId(ROOT::DescriptorId_t fieldId, std::uint32_t columnIndex,
890 std::uint16_t representationIndex) const;
891 ROOT::DescriptorId_t FindPhysicalColumnId(ROOT::DescriptorId_t fieldId, std::uint32_t columnIndex,
892 std::uint16_t representationIndex) const;
894 ROOT::DescriptorId_t FindNextClusterId(ROOT::DescriptorId_t clusterId) const;
895 ROOT::DescriptorId_t FindPrevClusterId(ROOT::DescriptorId_t clusterId) const;
896
897 /// Walks up the parents of the field ID and returns a field name of the form a.b.c.d
898 /// In case of invalid field ID, an empty string is returned.
899 std::string GetQualifiedFieldName(ROOT::DescriptorId_t fieldId) const;
900
901 /// Adjust the type name of the passed RFieldDescriptor for comparison with another renormalized type name.
902 std::string GetTypeNameForComparison(const RFieldDescriptor &fieldDesc) const;
903
904 bool HasFeature(unsigned int flag) const { return fFeatureFlags.count(flag) > 0; }
905 std::vector<std::uint64_t> GetFeatureFlags() const;
906
907 /// Return header extension information; if the descriptor does not have a header extension, return `nullptr`
908 const RHeaderExtension *GetHeaderExtension() const { return fHeaderExtension.get(); }
909
910 /// Methods to load and drop cluster group details (cluster IDs and page locations)
912 AddClusterGroupDetails(ROOT::DescriptorId_t clusterGroupId, std::vector<RClusterDescriptor> &clusterDescs);
913 RResult<void> DropClusterGroupDetails(ROOT::DescriptorId_t clusterGroupId);
914
915 std::uint64_t GetGeneration() const { return fGeneration; }
916 void IncGeneration() { fGeneration++; }
917
918 /// Re-create the C++ model from the stored metadata
919 std::unique_ptr<ROOT::RNTupleModel> CreateModel(const RCreateModelOptions &options = RCreateModelOptions()) const;
920 void PrintInfo(std::ostream &output) const;
921};
922
923// clang-format off
924/**
925\class ROOT::RNTupleDescriptor::RColumnDescriptorIterable
926\ingroup NTuple
927\brief Used to loop over a field's associated columns
928*/
929// clang-format on
931private:
932 /// The associated RNTuple for this range.
934 /// The descriptor ids of the columns ordered by field, representation, and column index
935 std::vector<ROOT::DescriptorId_t> fColumns = {};
936
937public:
939 private:
940 /// The enclosing range's RNTuple.
942 /// The enclosing range's descriptor id list.
943 const std::vector<ROOT::DescriptorId_t> &fColumns;
944 std::size_t fIndex = 0;
945
946 public:
947 using iterator_category = std::forward_iterator_tag;
950 using difference_type = std::ptrdiff_t;
951 using pointer = const RColumnDescriptor *;
953
954 RIterator(const RNTupleDescriptor &ntuple, const std::vector<ROOT::DescriptorId_t> &columns, std::size_t index)
955 : fNTuple(ntuple), fColumns(columns), fIndex(index)
956 {
957 }
958 iterator &operator++() /* prefix */
959 {
960 ++fIndex;
961 return *this;
962 }
963 iterator operator++(int) /* postfix */
964 {
965 auto old = *this;
966 operator++();
967 return old;
968 }
969 reference operator*() const { return fNTuple.GetColumnDescriptor(fColumns.at(fIndex)); }
970 pointer operator->() const { return &fNTuple.GetColumnDescriptor(fColumns.at(fIndex)); }
971 bool operator!=(const iterator &rh) const { return fIndex != rh.fIndex; }
972 bool operator==(const iterator &rh) const { return fIndex == rh.fIndex; }
973 };
974
977
978 RIterator begin() { return RIterator(fNTuple, fColumns, 0); }
979 RIterator end() { return RIterator(fNTuple, fColumns, fColumns.size()); }
980 size_t size() { return fColumns.size(); }
981};
982
983// clang-format off
984/**
985\class ROOT::RNTupleDescriptor::RFieldDescriptorIterable
986\ingroup NTuple
987\brief Used to loop over a field's child fields
988*/
989// clang-format on
991private:
992 /// The associated RNTuple for this range.
994 /// The descriptor IDs of the child fields. These may be sorted using
995 /// a comparison function.
996 std::vector<ROOT::DescriptorId_t> fFieldChildren = {};
997
998public:
1000 private:
1001 /// The enclosing range's RNTuple.
1003 /// The enclosing range's descriptor id list.
1004 const std::vector<ROOT::DescriptorId_t> &fFieldChildren;
1005 std::size_t fIndex = 0;
1006
1007 public:
1008 using iterator_category = std::forward_iterator_tag;
1011 using difference_type = std::ptrdiff_t;
1012 using pointer = const RFieldDescriptor *;
1014
1015 RIterator(const RNTupleDescriptor &ntuple, const std::vector<ROOT::DescriptorId_t> &fieldChildren,
1016 std::size_t index)
1017 : fNTuple(ntuple), fFieldChildren(fieldChildren), fIndex(index)
1018 {
1019 }
1020 iterator &operator++() /* prefix */
1021 {
1022 ++fIndex;
1023 return *this;
1024 }
1025 iterator operator++(int) /* postfix */
1026 {
1027 auto old = *this;
1028 operator++();
1029 return old;
1030 }
1031 reference operator*() const { return fNTuple.GetFieldDescriptor(fFieldChildren.at(fIndex)); }
1032 pointer operator->() const { return &fNTuple.GetFieldDescriptor(fFieldChildren.at(fIndex)); }
1033 bool operator!=(const iterator &rh) const { return fIndex != rh.fIndex; }
1034 bool operator==(const iterator &rh) const { return fIndex == rh.fIndex; }
1035 };
1037 : fNTuple(ntuple), fFieldChildren(field.GetLinkIds())
1038 {
1039 }
1040 /// Sort the range using an arbitrary comparison function.
1042 const std::function<bool(ROOT::DescriptorId_t, ROOT::DescriptorId_t)> &comparator)
1043 : fNTuple(ntuple), fFieldChildren(field.GetLinkIds())
1044 {
1045 std::sort(fFieldChildren.begin(), fFieldChildren.end(), comparator);
1046 }
1047 RIterator begin() { return RIterator(fNTuple, fFieldChildren, 0); }
1048 RIterator end() { return RIterator(fNTuple, fFieldChildren, fFieldChildren.size()); }
1049};
1050
1051// clang-format off
1052/**
1053\class ROOT::RNTupleDescriptor::RClusterGroupDescriptorIterable
1054\ingroup NTuple
1055\brief Used to loop over all the cluster groups of an RNTuple (in unspecified order)
1056
1057Enumerate all cluster group IDs from the descriptor. No specific order can be assumed.
1058*/
1059// clang-format on
1061private:
1062 /// The associated RNTuple for this range.
1064
1065public:
1067 private:
1068 using Iter_t = std::unordered_map<ROOT::DescriptorId_t, RClusterGroupDescriptor>::const_iterator;
1069 /// The wrapped map iterator
1071
1072 public:
1073 using iterator_category = std::forward_iterator_tag;
1076 using difference_type = std::ptrdiff_t;
1079
1080 RIterator(Iter_t iter) : fIter(iter) {}
1081 iterator &operator++() /* prefix */
1082 {
1083 ++fIter;
1084 return *this;
1085 }
1086 iterator operator++(int) /* postfix */
1087 {
1088 auto old = *this;
1089 operator++();
1090 return old;
1091 }
1092 reference operator*() const { return fIter->second; }
1093 pointer operator->() const { return &fIter->second; }
1094 bool operator!=(const iterator &rh) const { return fIter != rh.fIter; }
1095 bool operator==(const iterator &rh) const { return fIter == rh.fIter; }
1096 };
1097
1099 RIterator begin() { return RIterator(fNTuple.fClusterGroupDescriptors.cbegin()); }
1100 RIterator end() { return RIterator(fNTuple.fClusterGroupDescriptors.cend()); }
1101};
1102
1103// clang-format off
1104/**
1105\class ROOT::RNTupleDescriptor::RClusterDescriptorIterable
1106\ingroup NTuple
1107\brief Used to loop over all the clusters of an RNTuple (in unspecified order)
1108
1109Enumerate all cluster IDs from all cluster descriptors. No specific order can be assumed, use
1110RNTupleDescriptor::FindNextClusterId() and RNTupleDescriptor::FindPrevClusterId() to traverse
1111clusters by entry number.
1112*/
1113// clang-format on
1115private:
1116 /// The associated RNTuple for this range.
1118
1119public:
1121 private:
1122 using Iter_t = std::unordered_map<ROOT::DescriptorId_t, RClusterDescriptor>::const_iterator;
1123 /// The wrapped map iterator
1125
1126 public:
1127 using iterator_category = std::forward_iterator_tag;
1130 using difference_type = std::ptrdiff_t;
1133
1134 RIterator(Iter_t iter) : fIter(iter) {}
1135 iterator &operator++() /* prefix */
1136 {
1137 ++fIter;
1138 return *this;
1139 }
1140 iterator operator++(int) /* postfix */
1141 {
1142 auto old = *this;
1143 operator++();
1144 return old;
1145 }
1146 reference operator*() const { return fIter->second; }
1147 pointer operator->() const { return &fIter->second; }
1148 bool operator!=(const iterator &rh) const { return fIter != rh.fIter; }
1149 bool operator==(const iterator &rh) const { return fIter == rh.fIter; }
1150 };
1151
1153 RIterator begin() { return RIterator(fNTuple.fClusterDescriptors.cbegin()); }
1154 RIterator end() { return RIterator(fNTuple.fClusterDescriptors.cend()); }
1155};
1156
1157// clang-format off
1158/**
1159\class ROOT::RNTupleDescriptor::RExtraTypeInfoDescriptorIterable
1160\ingroup NTuple
1161\brief Used to loop over all the extra type info record of an RNTuple (in unspecified order)
1162*/
1163// clang-format on
1165private:
1166 /// The associated RNTuple for this range.
1168
1169public:
1171 private:
1172 using Iter_t = std::vector<RExtraTypeInfoDescriptor>::const_iterator;
1173 /// The wrapped vector iterator
1175
1176 public:
1177 using iterator_category = std::forward_iterator_tag;
1180 using difference_type = std::ptrdiff_t;
1183
1184 RIterator(Iter_t iter) : fIter(iter) {}
1185 iterator &operator++() /* prefix */
1186 {
1187 ++fIter;
1188 return *this;
1189 }
1190 iterator operator++(int) /* postfix */
1191 {
1192 auto old = *this;
1193 operator++();
1194 return old;
1195 }
1196 reference operator*() const { return *fIter; }
1197 pointer operator->() const { return &*fIter; }
1198 bool operator!=(const iterator &rh) const { return fIter != rh.fIter; }
1199 bool operator==(const iterator &rh) const { return fIter == rh.fIter; }
1200 };
1201
1203 RIterator begin() { return RIterator(fNTuple.fExtraTypeInfoDescriptors.cbegin()); }
1204 RIterator end() { return RIterator(fNTuple.fExtraTypeInfoDescriptors.cend()); }
1205};
1206
1207namespace Experimental {
1208// clang-format off
1209/**
1210\class ROOT::Experimental::RNTupleAttrSetDescriptorIterable
1211\ingroup NTuple
1212\brief Used to loop over all the Attribute Sets linked to an RNTuple
1213*/
1214// clang-format on
1215// TODO: move this to RNTupleDescriptor::RNTupleAttrSetDescriptorIterable when it moves out of Experimental.
1217private:
1218 /// The associated RNTuple for this range.
1220
1221public:
1223 private:
1224 using Iter_t = std::vector<RNTupleAttrSetDescriptor>::const_iterator;
1225 /// The wrapped vector iterator
1227
1228 public:
1229 using iterator_category = std::forward_iterator_tag;
1232 using difference_type = std::ptrdiff_t;
1233 using pointer = const value_type *;
1234 using reference = const value_type &;
1235
1236 RIterator(Iter_t iter) : fIter(iter) {}
1237 iterator &operator++() /* prefix */
1238 {
1239 ++fIter;
1240 return *this;
1241 }
1242 iterator operator++(int) /* postfix */
1243 {
1244 auto old = *this;
1245 operator++();
1246 return old;
1247 }
1248 reference operator*() const { return *fIter; }
1249 pointer operator->() const { return &*fIter; }
1250 bool operator!=(const iterator &rh) const { return fIter != rh.fIter; }
1251 bool operator==(const iterator &rh) const { return fIter == rh.fIter; }
1252 };
1253
1255 RIterator begin() { return RIterator(fNTuple.fAttributeSets.cbegin()); }
1256 RIterator end() { return RIterator(fNTuple.fAttributeSets.cend()); }
1257};
1258} // namespace Experimental
1259
1260// clang-format off
1261/**
1262\class ROOT::RNTupleDescriptor::RHeaderExtension
1263\ingroup NTuple
1264\brief Summarizes information about fields and the corresponding columns that were added after the header has been serialized
1265*/
1266// clang-format on
1269
1270private:
1271 /// All field IDs of late model extensions, in the order of field addition. This is necessary to serialize the
1272 /// the fields in that order.
1273 std::vector<ROOT::DescriptorId_t> fFieldIdsOrder;
1274 /// All field IDs of late model extensions for efficient lookup. When a column gets added to the extension
1275 /// header, this enables us to determine if the column belongs to a field of the header extension of if it
1276 /// belongs to a field of the regular header that gets extended by additional column representations.
1277 std::unordered_set<ROOT::DescriptorId_t> fFieldIdsLookup;
1278 /// All logical column IDs of columns that extend, with additional column representations, fields of the regular
1279 /// header. During serialization, these columns are not picked up as columns of `fFieldIdsOrder`. But instead
1280 /// these columns need to be serialized in the extension header without re-serializing the field.
1281 std::vector<ROOT::DescriptorId_t> fExtendedColumnRepresentations;
1282 /// Number of logical and physical columns; updated by the descriptor builder when columns are added
1283 std::uint32_t fNLogicalColumns = 0;
1284 std::uint32_t fNPhysicalColumns = 0;
1285
1286 /// Marks `fieldDesc` as an extended field, i.e. a field that appears in the Header Extension (e.g. having been added
1287 /// through late model extension). Note that the field descriptor should also have been added to the RNTuple
1288 /// Descriptor alongside non-extended fields.
1290 {
1291 fFieldIdsOrder.emplace_back(fieldDesc.GetId());
1292 fFieldIdsLookup.insert(fieldDesc.GetId());
1293 }
1294
1295 /// Marks `columnDesc` as an extended column, i.e. a column that appears in the Header Extension (e.g. having been
1296 /// added through late model extension as an additional representation of an existing column). Note that the column
1297 /// descriptor should also have been added to the RNTuple Descriptor alongside non-extended columns.
1299 {
1300 fNLogicalColumns++;
1301 if (!columnDesc.IsAliasColumn())
1302 fNPhysicalColumns++;
1303 if (fFieldIdsLookup.count(columnDesc.GetFieldId()) == 0) {
1304 fExtendedColumnRepresentations.emplace_back(columnDesc.GetLogicalId());
1305 }
1306 }
1307
1308public:
1309 std::size_t GetNFields() const { return fFieldIdsOrder.size(); }
1310 std::size_t GetNLogicalColumns() const { return fNLogicalColumns; }
1311 std::size_t GetNPhysicalColumns() const { return fNPhysicalColumns; }
1312 const std::vector<ROOT::DescriptorId_t> &GetExtendedColumnRepresentations() const
1313 {
1314 return fExtendedColumnRepresentations;
1315 }
1316 /// Return a vector containing the IDs of the top-level fields defined in the extension header, in the order
1317 /// of their addition. Note that these fields are not necessarily top-level fields in the overall schema.
1318 /// If a nested field is extended, it will return the top-most field of the extended subtree.
1319 /// We cannot create this vector when building the fFields because at the time when AddExtendedField is called,
1320 /// the field is not yet linked into the schema tree.
1321 std::vector<ROOT::DescriptorId_t> GetTopMostFields(const RNTupleDescriptor &desc) const;
1322
1324 {
1325 return fFieldIdsLookup.find(fieldId) != fFieldIdsLookup.end();
1326 }
1328 {
1329 return std::find(fExtendedColumnRepresentations.begin(), fExtendedColumnRepresentations.end(), columnId) !=
1330 fExtendedColumnRepresentations.end();
1331 }
1332};
1333
1334namespace Experimental::Internal {
1337
1338public:
1340 {
1341 fDesc.fName = name;
1342 return *this;
1343 }
1345 {
1346 fDesc.fSchemaVersionMajor = major;
1347 fDesc.fSchemaVersionMinor = minor;
1348 return *this;
1349 }
1351 {
1352 fDesc.fAnchorLocator = loc;
1353 return *this;
1354 }
1356 {
1357 fDesc.fAnchorLength = length;
1358 return *this;
1359 }
1360
1361 /// Attempt to make an AttributeSet descriptor. This may fail if the builder
1362 /// was not given enough information to make a proper descriptor.
1364};
1365} // namespace Experimental::Internal
1366
1367namespace Internal {
1368
1369// clang-format off
1370/**
1371\class ROOT::Internal::RColumnDescriptorBuilder
1372\ingroup NTuple
1373\brief A helper class for piece-wise construction of an RColumnDescriptor
1374
1375Dangling column descriptors can become actual descriptors when added to an
1376RNTupleDescriptorBuilder instance and then linked to their fields.
1377*/
1378// clang-format on
1380private:
1382
1383public:
1384 /// Make an empty column descriptor builder.
1386
1398 {
1399 fColumn.fBitsOnStorage = bitsOnStorage;
1400 return *this;
1401 }
1403 {
1404 fColumn.fType = type;
1405 return *this;
1406 }
1408 {
1409 fColumn.fFieldId = fieldId;
1410 return *this;
1411 }
1413 {
1414 fColumn.fIndex = index;
1415 return *this;
1416 }
1418 {
1420 return *this;
1421 }
1423 {
1424 R__ASSERT(fColumn.fFirstElementIndex != 0);
1425 if (fColumn.fFirstElementIndex > 0)
1426 fColumn.fFirstElementIndex = -fColumn.fFirstElementIndex;
1427 return *this;
1428 }
1430 {
1432 return *this;
1433 }
1434 RColumnDescriptorBuilder &ValueRange(double min, double max)
1435 {
1436 fColumn.fValueRange = {min, max};
1437 return *this;
1438 }
1439 RColumnDescriptorBuilder &ValueRange(std::optional<RColumnDescriptor::RValueRange> valueRange)
1440 {
1441 fColumn.fValueRange = valueRange;
1442 return *this;
1443 }
1444 ROOT::DescriptorId_t GetFieldId() const { return fColumn.fFieldId; }
1446 /// Attempt to make a column descriptor. This may fail if the column
1447 /// was not given enough information to make a proper descriptor.
1448 RResult<RColumnDescriptor> MakeDescriptor() const;
1449};
1450
1451// clang-format off
1452/**
1453\class ROOT::Internal::RFieldDescriptorBuilder
1454\ingroup NTuple
1455\brief A helper class for piece-wise construction of an RFieldDescriptor
1456
1457Dangling field descriptors describe a single field in isolation. They are
1458missing the necessary relationship information (parent field, any child fields)
1459required to describe a real RNTuple field.
1460
1461Dangling field descriptors can only become actual descriptors when added to an
1462RNTupleDescriptorBuilder instance and then linked to other fields.
1463*/
1464// clang-format on
1466private:
1468
1469public:
1470 /// Make an empty dangling field descriptor.
1472
1473 /// Make a new RFieldDescriptorBuilder based off a live RNTuple field.
1474 static RFieldDescriptorBuilder FromField(const ROOT::RFieldBase &field);
1475
1477 {
1478 fField.fFieldId = fieldId;
1479 return *this;
1480 }
1482 {
1483 fField.fFieldVersion = fieldVersion;
1484 return *this;
1485 }
1487 {
1488 fField.fTypeVersion = typeVersion;
1489 return *this;
1490 }
1492 {
1493 fField.fParentId = id;
1494 return *this;
1495 }
1497 {
1498 fField.fProjectionSourceId = id;
1499 return *this;
1500 }
1502 {
1503 fField.fFieldName = fieldName;
1504 return *this;
1505 }
1507 {
1509 return *this;
1510 }
1511 RFieldDescriptorBuilder &TypeName(const std::string &typeName)
1512 {
1513 fField.fTypeName = typeName;
1514 return *this;
1515 }
1517 {
1518 fField.fTypeAlias = typeAlias;
1519 return *this;
1520 }
1522 {
1523 fField.fNRepetitions = nRepetitions;
1524 return *this;
1525 }
1527 {
1528 fField.fStructure = structure;
1529 return *this;
1530 }
1531 RFieldDescriptorBuilder &TypeChecksum(const std::optional<std::uint32_t> typeChecksum)
1532 {
1533 fField.fTypeChecksum = typeChecksum;
1534 return *this;
1535 }
1536 ROOT::DescriptorId_t GetParentId() const { return fField.fParentId; }
1537 /// Attempt to make a field descriptor. This may fail if the dangling field
1538 /// was not given enough information to make a proper descriptor.
1539 RResult<RFieldDescriptor> MakeDescriptor() const;
1540};
1541
1542// clang-format off
1543/**
1544\class ROOT::Internal::RClusterDescriptorBuilder
1545\ingroup NTuple
1546\brief A helper class for piece-wise construction of an RClusterDescriptor
1547
1548The cluster descriptor builder starts from a summary-only cluster descriptor and allows for the
1549piecewise addition of page locations.
1550*/
1551// clang-format on
1553private:
1555
1556public:
1558 {
1559 fCluster.fClusterId = clusterId;
1560 return *this;
1561 }
1562
1564 {
1566 return *this;
1567 }
1568
1570 {
1571 fCluster.fNEntries = nEntries;
1572 return *this;
1573 }
1574
1575 RResult<void> CommitColumnRange(ROOT::DescriptorId_t physicalId, std::uint64_t firstElementIndex,
1577
1578 /// Books the given column ID as being suppressed in this cluster. The correct first element index and number of
1579 /// elements need to be set by CommitSuppressedColumnRanges() once all the calls to CommitColumnRange() and
1580 /// MarkSuppressedColumnRange() took place.
1581 RResult<void> MarkSuppressedColumnRange(ROOT::DescriptorId_t physicalId);
1582
1583 /// Sets the first element index and number of elements for all the suppressed column ranges.
1584 /// The information is taken from the corresponding columns from the primary representation.
1585 /// Needs to be called when all the columns (suppressed and regular) where added.
1586 RResult<void> CommitSuppressedColumnRanges(const RNTupleDescriptor &desc);
1587
1588 /// Add column and page ranges for columns created during late model extension missing in this cluster. The locator
1589 /// type for the synthesized page ranges is `kTypePageZero`. All the page sources must be able to populate the
1590 /// 'zero' page from such locator. Any call to CommitColumnRange() and CommitSuppressedColumnRanges()
1591 /// should happen before calling this function.
1592 RClusterDescriptorBuilder &AddExtendedColumnRanges(const RNTupleDescriptor &desc);
1593
1598
1599 /// Move out the full cluster descriptor including page locations
1600 RResult<RClusterDescriptor> MoveDescriptor();
1601};
1602
1603// clang-format off
1604/**
1605\class ROOT::Internal::RClusterGroupDescriptorBuilder
1606\ingroup NTuple
1607\brief A helper class for piece-wise construction of an RClusterGroupDescriptor
1608*/
1609// clang-format on
1611private:
1613
1614public:
1617
1629 {
1630 fClusterGroup.fPageListLength = pageListLength;
1631 return *this;
1632 }
1634 {
1635 fClusterGroup.fMinEntry = minEntry;
1636 return *this;
1637 }
1639 {
1640 fClusterGroup.fEntrySpan = entrySpan;
1641 return *this;
1642 }
1644 {
1645 fClusterGroup.fNClusters = nClusters;
1646 return *this;
1647 }
1648 void AddSortedClusters(const std::vector<ROOT::DescriptorId_t> &clusterIds)
1649 {
1650 if (clusterIds.size() != fClusterGroup.GetNClusters())
1651 throw RException(R__FAIL("mismatch of number of clusters"));
1652 fClusterGroup.fClusterIds = clusterIds;
1653 }
1654
1655 RResult<RClusterGroupDescriptor> MoveDescriptor();
1656};
1657
1658// clang-format off
1659/**
1660\class ROOT::Internal::RExtraTypeInfoDescriptorBuilder
1661\ingroup NTuple
1662\brief A helper class for piece-wise construction of an RExtraTypeInfoDescriptor
1663*/
1664// clang-format on
1666private:
1668
1669public:
1671
1673 {
1674 fExtraTypeInfo.fContentId = contentId;
1675 return *this;
1676 }
1678 {
1679 fExtraTypeInfo.fTypeVersion = typeVersion;
1680 return *this;
1681 }
1682 RExtraTypeInfoDescriptorBuilder &TypeName(const std::string &typeName)
1683 {
1684 fExtraTypeInfo.fTypeName = typeName;
1685 return *this;
1686 }
1688 {
1689 fExtraTypeInfo.fContent = content;
1690 return *this;
1691 }
1692
1693 RResult<RExtraTypeInfoDescriptor> MoveDescriptor();
1694};
1695
1696// clang-format off
1697/**
1698\class ROOT::Internal::RNTupleDescriptorBuilder
1699\ingroup NTuple
1700\brief A helper class for piece-wise construction of an RNTupleDescriptor
1701
1702Used by RPageStorage implementations in order to construct the RNTupleDescriptor from the various header parts.
1703*/
1704// clang-format on
1706private:
1708 RResult<void> EnsureFieldExists(ROOT::DescriptorId_t fieldId) const;
1709
1710public:
1711 /// Checks whether invariants hold:
1712 /// * RNTuple epoch is valid
1713 /// * RNTuple name is valid
1714 /// * Fields have valid parents
1715 /// * Number of columns is constant across column representations
1716 RResult<void> EnsureValidDescriptor() const;
1717 const RNTupleDescriptor &GetDescriptor() const { return fDescriptor; }
1718 RNTupleDescriptor MoveDescriptor();
1719
1720 /// Copies the "schema" part of `descriptor` into the builder's descriptor.
1721 /// This resets the builder's descriptor.
1722 void SetSchemaFromExisting(const RNTupleDescriptor &descriptor);
1723
1724 void SetVersion(std::uint16_t versionEpoch, std::uint16_t versionMajor, std::uint16_t versionMinor,
1725 std::uint16_t versionPatch);
1726 void SetVersionForWriting();
1727
1728 void SetNTuple(const std::string_view name, const std::string_view description);
1729 void SetFeature(unsigned int flag);
1730
1731 void SetOnDiskHeaderXxHash3(std::uint64_t xxhash3) { fDescriptor.fOnDiskHeaderXxHash3 = xxhash3; }
1732 void SetOnDiskHeaderSize(std::uint64_t size) { fDescriptor.fOnDiskHeaderSize = size; }
1733 /// The real footer size also include the page list envelopes
1734 void AddToOnDiskFooterSize(std::uint64_t size) { fDescriptor.fOnDiskFooterSize += size; }
1735
1736 void AddField(const RFieldDescriptor &fieldDesc);
1739
1740 // The field that the column belongs to has to be already available. For fields with multiple columns,
1741 // the columns need to be added in order of the column index
1743
1746
1748 void ReplaceExtraTypeInfo(RExtraTypeInfoDescriptor &&extraTypeInfoDesc);
1749
1751
1752 /// Mark the beginning of the header extension; any fields and columns added after a call to this function are
1753 /// annotated as begin part of the header extension.
1754 void BeginHeaderExtension();
1755
1756 /// \brief Shift column IDs of alias columns by `offset`
1757 ///
1758 /// If the descriptor is constructed in pieces consisting of physical and alias columns
1759 /// (regular and projected fields), the natural column order would be
1760 /// - Physical and alias columns of piece one
1761 /// - Physical and alias columns of piece two
1762 /// - etc.
1763 /// What we want, however, are first all physical column IDs and then all alias column IDs.
1764 /// This method adds `offset` to the logical column IDs of all alias columns and fixes up the corresponding
1765 /// column IDs in the projected field descriptors. In this way, a new piece of physical and alias columns can
1766 /// first shift the existing alias columns by the number of new physical columns, resulting in the following order
1767 /// - Physical columns of piece one
1768 /// - Physical columns of piece two
1769 /// - ...
1770 // - Logical columns of piece one
1771 /// - Logical columns of piece two
1772 /// - ...
1773 void ShiftAliasColumns(std::uint32_t offset);
1774};
1775
1777{
1778 return desc.CloneSchema();
1779}
1780
1781/// Tells if the field describes a user-defined enum type.
1782/// The dictionary does not need to be available for this method.
1783/// Needs the full descriptor to look up sub fields.
1785
1786/// Tells if the field describes a std::atomic<T> type
1788
1789} // namespace Internal
1790
1791} // namespace ROOT
1792
1793#endif // ROOT_RNTupleDescriptor
#define R__FAIL(msg)
Short-hand to return an RResult<T> in an error state; the RError is implicitly converted into RResult...
Definition RError.hxx:300
size_t size(const MatrixT &matrix)
retrieve the size of a square matrix
ROOT::Detail::TRangeCast< T, true > TRangeDynCast
TRangeDynCast is an adapter class that allows the typed iteration through a TCollection.
Bool_t operator==(const TDatime &d1, const TDatime &d2)
Definition TDatime.h:102
#define R__ASSERT(e)
Checks condition e and reports a fatal error if it's false.
Definition TError.h:125
Option_t Option_t TPoint TPoint const char GetTextMagnitude GetFillStyle GetLineColor GetLineWidth GetMarkerStyle GetTextAlign GetTextColor GetTextSize void char Point_t Rectangle_t WindowAttributes_t Float_t Float_t Float_t Int_t Int_t UInt_t UInt_t Rectangle_t Int_t Int_t Window_t TString Int_t GCValues_t GetPrimarySelectionOwner GetDisplay GetScreen GetColormap GetNativeEvent const char const char dpyName wid window const char font_name cursor keysym reg const char only_if_exist regb h Point_t winding char text const char depth char const char Int_t count const char ColorStruct_t color const char Pixmap_t Pixmap_t PictureAttributes_t attr const char char ret_data h unsigned char height h offset
Option_t Option_t TPoint TPoint const char GetTextMagnitude GetFillStyle GetLineColor GetLineWidth GetMarkerStyle GetTextAlign GetTextColor GetTextSize void char Point_t Rectangle_t WindowAttributes_t index
Option_t Option_t TPoint TPoint const char GetTextMagnitude GetFillStyle GetLineColor GetLineWidth GetMarkerStyle GetTextAlign GetTextColor GetTextSize void char Point_t Rectangle_t WindowAttributes_t Float_t Float_t Float_t Int_t Int_t UInt_t UInt_t Rectangle_t Int_t Int_t Window_t TString Int_t GCValues_t GetPrimarySelectionOwner GetDisplay GetScreen GetColormap GetNativeEvent const char const char dpyName wid window const char font_name cursor keysym reg const char only_if_exist regb h Point_t winding char text const char depth char const char Int_t count const char ColorStruct_t color const char Pixmap_t Pixmap_t PictureAttributes_t attr const char char ret_data h unsigned char height h length
Option_t Option_t TPoint TPoint const char GetTextMagnitude GetFillStyle GetLineColor GetLineWidth GetMarkerStyle GetTextAlign GetTextColor GetTextSize id
Option_t Option_t TPoint TPoint const char GetTextMagnitude GetFillStyle GetLineColor GetLineWidth GetMarkerStyle GetTextAlign GetTextColor GetTextSize void char Point_t Rectangle_t WindowAttributes_t Float_t Float_t Float_t Int_t Int_t UInt_t UInt_t Rectangle_t Int_t Int_t Window_t TString Int_t GCValues_t GetPrimarySelectionOwner GetDisplay GetScreen GetColormap GetNativeEvent const char const char dpyName wid window const char font_name cursor keysym reg const char only_if_exist regb h Point_t winding char text const char depth char const char Int_t count const char ColorStruct_t color const char Pixmap_t Pixmap_t PictureAttributes_t attr const char char ret_data h unsigned char height h Atom_t Int_t ULong_t ULong_t unsigned char prop_list Atom_t Atom_t Atom_t Time_t type
char name[80]
Definition TGX11.cxx:110
The available trivial, native content types of a column.
RNTupleAttrSetDescriptorBuilder & AnchorLocator(const RNTupleLocator &loc)
RNTupleAttrSetDescriptorBuilder & SchemaVersion(std::uint16_t major, std::uint16_t minor)
RNTupleAttrSetDescriptorBuilder & Name(std::string_view name)
RNTupleAttrSetDescriptorBuilder & AnchorLength(std::uint32_t length)
std::vector< RNTupleAttrSetDescriptor >::const_iterator Iter_t
Used to loop over all the Attribute Sets linked to an RNTuple.
const RNTupleDescriptor & fNTuple
The associated RNTuple for this range.
RNTupleAttrSetDescriptorIterable(const RNTupleDescriptor &ntuple)
Metadata stored for every Attribute Set linked to an RNTuple.
RNTupleAttrSetDescriptor & operator=(const RNTupleAttrSetDescriptor &other)=delete
bool operator==(const RNTupleAttrSetDescriptor &other) const
std::uint32_t fAnchorLength
uncompressed size of the linked anchor
RNTupleAttrSetDescriptor(const RNTupleAttrSetDescriptor &other)=delete
RNTupleAttrSetDescriptor & operator=(RNTupleAttrSetDescriptor &&other)=default
const RNTupleLocator & GetAnchorLocator() const
bool operator!=(const RNTupleAttrSetDescriptor &other) const
RNTupleAttrSetDescriptor(RNTupleAttrSetDescriptor &&other)=default
A helper class for piece-wise construction of an RClusterDescriptor.
RClusterDescriptorBuilder & NEntries(std::uint64_t nEntries)
const RClusterDescriptor::RColumnRange & GetColumnRange(ROOT::DescriptorId_t physicalId)
RClusterDescriptorBuilder & ClusterId(ROOT::DescriptorId_t clusterId)
RClusterDescriptorBuilder & FirstEntryIndex(std::uint64_t firstEntryIndex)
A helper class for piece-wise construction of an RClusterGroupDescriptor.
RClusterGroupDescriptorBuilder & EntrySpan(std::uint64_t entrySpan)
RClusterGroupDescriptorBuilder & PageListLocator(const RNTupleLocator &pageListLocator)
RClusterGroupDescriptorBuilder & PageListLength(std::uint64_t pageListLength)
RClusterGroupDescriptorBuilder & MinEntry(std::uint64_t minEntry)
void AddSortedClusters(const std::vector< ROOT::DescriptorId_t > &clusterIds)
RClusterGroupDescriptorBuilder & ClusterGroupId(ROOT::DescriptorId_t clusterGroupId)
RClusterGroupDescriptorBuilder & NClusters(std::uint32_t nClusters)
A helper class for piece-wise construction of an RColumnDescriptor.
ROOT::DescriptorId_t GetRepresentationIndex() const
RColumnDescriptorBuilder & SetSuppressedDeferred()
RColumnDescriptorBuilder & LogicalColumnId(ROOT::DescriptorId_t logicalColumnId)
RColumnDescriptorBuilder & FieldId(ROOT::DescriptorId_t fieldId)
RColumnDescriptorBuilder & BitsOnStorage(std::uint16_t bitsOnStorage)
RColumnDescriptorBuilder & ValueRange(double min, double max)
RColumnDescriptorBuilder()=default
Make an empty column descriptor builder.
RColumnDescriptorBuilder & ValueRange(std::optional< RColumnDescriptor::RValueRange > valueRange)
RColumnDescriptorBuilder & Type(ROOT::ENTupleColumnType type)
RColumnDescriptorBuilder & PhysicalColumnId(ROOT::DescriptorId_t physicalColumnId)
RColumnDescriptorBuilder & FirstElementIndex(std::uint64_t firstElementIdx)
RColumnDescriptorBuilder & Index(std::uint32_t index)
RColumnDescriptorBuilder & RepresentationIndex(std::uint16_t representationIndex)
A column element encapsulates the translation between basic C++ types and their column representation...
A helper class for piece-wise construction of an RExtraTypeInfoDescriptor.
RExtraTypeInfoDescriptorBuilder & ContentId(EExtraTypeInfoIds contentId)
RExtraTypeInfoDescriptorBuilder & TypeName(const std::string &typeName)
RExtraTypeInfoDescriptorBuilder & Content(const std::string &content)
RExtraTypeInfoDescriptorBuilder & TypeVersion(std::uint32_t typeVersion)
A helper class for piece-wise construction of an RFieldDescriptor.
RFieldDescriptorBuilder & NRepetitions(std::uint64_t nRepetitions)
RFieldDescriptorBuilder & Structure(const ROOT::ENTupleStructure &structure)
RFieldDescriptorBuilder()=default
Make an empty dangling field descriptor.
RFieldDescriptorBuilder & TypeAlias(const std::string &typeAlias)
RFieldDescriptorBuilder & ProjectionSourceId(ROOT::DescriptorId_t id)
RFieldDescriptorBuilder & TypeVersion(std::uint32_t typeVersion)
RFieldDescriptorBuilder & TypeChecksum(const std::optional< std::uint32_t > typeChecksum)
RFieldDescriptorBuilder & ParentId(ROOT::DescriptorId_t id)
RFieldDescriptorBuilder & FieldDescription(const std::string &fieldDescription)
RFieldDescriptorBuilder & FieldVersion(std::uint32_t fieldVersion)
RFieldDescriptorBuilder & FieldName(const std::string &fieldName)
RFieldDescriptorBuilder & FieldId(ROOT::DescriptorId_t fieldId)
RFieldDescriptorBuilder & TypeName(const std::string &typeName)
A helper class for piece-wise construction of an RNTupleDescriptor.
const RNTupleDescriptor & GetDescriptor() const
void SetOnDiskHeaderXxHash3(std::uint64_t xxhash3)
void AddToOnDiskFooterSize(std::uint64_t size)
The real footer size also include the page list envelopes.
std::unordered_map< ROOT::DescriptorId_t, RColumnRange >::const_iterator Iter_t
RColumnRangeIterable(const RClusterDescriptor &desc)
The window of element indexes of a particular column in a particular cluster.
void SetCompressionSettings(std::optional< std::uint32_t > comp)
void SetPhysicalColumnId(ROOT::DescriptorId_t id)
ROOT::DescriptorId_t GetPhysicalColumnId() const
bool operator==(const RColumnRange &other) const
void SetFirstElementIndex(ROOT::NTupleSize_t idx)
std::optional< std::uint32_t > GetCompressionSettings() const
std::optional< std::uint32_t > fCompressionSettings
The usual format for ROOT compression settings (see Compression.h).
ROOT::NTupleSize_t GetFirstElementIndex() const
RColumnRange(ROOT::DescriptorId_t physicalColumnId, ROOT::NTupleSize_t firstElementIndex, ROOT::NTupleSize_t nElements, std::optional< std::uint32_t > compressionSettings, bool suppressed=false)
void IncrementFirstElementIndex(ROOT::NTupleSize_t by)
bool Contains(ROOT::NTupleSize_t index) const
void IncrementNElements(ROOT::NTupleSize_t by)
Records the partition of data into pages for a particular column in a particular cluster.
RPageRange & operator=(const RPageRange &other)=delete
std::unique_ptr< std::vector< ROOT::NTupleSize_t > > fCumulativeNElements
Has the same length than fPageInfos and stores the sum of the number of elements of all the pages up ...
RPageRange(RPageRange &&other)=default
RPageRange(const RPageRange &other)=delete
const std::vector< RPageInfo > & GetPageInfos() const
bool operator==(const RPageRange &other) const
ROOT::DescriptorId_t GetPhysicalColumnId() const
void SetPhysicalColumnId(ROOT::DescriptorId_t id)
RPageRange & operator=(RPageRange &&other)=default
std::vector< RPageInfo > & GetPageInfos()
Metadata for RNTuple clusters.
ROOT::NTupleSize_t GetNEntries() const
ROOT::NTupleSize_t fFirstEntryIndex
Clusters can be swapped by adjusting the entry offsets of the cluster and all ranges.
RClusterDescriptor & operator=(const RClusterDescriptor &other)=delete
ROOT::DescriptorId_t GetId() const
const RPageRange & GetPageRange(ROOT::DescriptorId_t physicalId) const
RClusterDescriptor(RClusterDescriptor &&other)=default
std::unordered_map< ROOT::DescriptorId_t, RColumnRange > fColumnRanges
ROOT::DescriptorId_t fClusterId
bool ContainsColumn(ROOT::DescriptorId_t physicalId) const
RClusterDescriptor & operator=(RClusterDescriptor &&other)=default
const RColumnRange & GetColumnRange(ROOT::DescriptorId_t physicalId) const
ROOT::NTupleSize_t GetFirstEntryIndex() const
std::unordered_map< ROOT::DescriptorId_t, RPageRange > fPageRanges
RClusterDescriptor(const RClusterDescriptor &other)=delete
Clusters are bundled in cluster groups.
RNTupleLocator fPageListLocator
The page list that corresponds to the cluster group.
RClusterGroupDescriptor & operator=(const RClusterGroupDescriptor &other)=delete
std::vector< ROOT::DescriptorId_t > fClusterIds
The cluster IDs can be empty if the corresponding page list is not loaded.
ROOT::DescriptorId_t GetId() const
RClusterGroupDescriptor(RClusterGroupDescriptor &&other)=default
std::uint64_t fMinEntry
The minimum first entry number of the clusters in the cluster group.
bool HasClusterDetails() const
A cluster group is loaded in two stages.
std::uint32_t fNClusters
Number of clusters is always known even if the cluster IDs are not (yet) populated.
RClusterGroupDescriptor & operator=(RClusterGroupDescriptor &&other)=default
const std::vector< ROOT::DescriptorId_t > & GetClusterIds() const
std::uint64_t fPageListLength
Uncompressed size of the page list.
std::uint64_t GetPageListLength() const
RNTupleLocator GetPageListLocator() const
RClusterGroupDescriptor(const RClusterGroupDescriptor &other)=delete
std::uint64_t fEntrySpan
Number of entries that are (partially for sharded clusters) covered by this cluster group.
Metadata stored for every column of an RNTuple.
std::optional< RValueRange > GetValueRange() const
ROOT::DescriptorId_t fPhysicalColumnId
Usually identical to the logical column ID, except for alias columns where it references the shadowed...
ROOT::DescriptorId_t fLogicalColumnId
The actual column identifier, which is the link to the corresponding field.
RColumnDescriptor(const RColumnDescriptor &other)=delete
std::uint64_t GetFirstElementIndex() const
ROOT::DescriptorId_t fFieldId
Every column belongs to one and only one field.
std::int64_t fFirstElementIndex
The absolute value specifies the index for the first stored element for this column.
ROOT::DescriptorId_t GetFieldId() const
RColumnDescriptor(RColumnDescriptor &&other)=default
std::uint32_t fIndex
A field can be serialized into several columns, which are numbered from zero to $n$.
RColumnDescriptor & operator=(RColumnDescriptor &&other)=default
std::uint32_t GetIndex() const
std::uint16_t fBitsOnStorage
The size in bits of elements of this column.
std::uint16_t fRepresentationIndex
A field may use multiple column representations, which are numbered from zero to $m$.
ROOT::ENTupleColumnType fType
The on-disk column type.
ROOT::ENTupleColumnType GetType() const
ROOT::DescriptorId_t GetPhysicalId() const
std::uint16_t GetRepresentationIndex() const
std::optional< RValueRange > fValueRange
Optional value range (used e.g. by quantized real fields)
std::uint16_t GetBitsOnStorage() const
RColumnDescriptor & operator=(const RColumnDescriptor &other)=delete
ROOT::DescriptorId_t GetLogicalId() const
Base class for all ROOT issued exceptions.
Definition RError.hxx:79
Field specific extra type information from the header / extenstion header.
RExtraTypeInfoDescriptor & operator=(RExtraTypeInfoDescriptor &&other)=default
RExtraTypeInfoDescriptor & operator=(const RExtraTypeInfoDescriptor &other)=delete
RExtraTypeInfoDescriptor(const RExtraTypeInfoDescriptor &other)=delete
EExtraTypeInfoIds fContentId
Specifies the meaning of the extra information.
std::string fTypeName
The type name the extra information refers to; empty for RNTuple-wide extra information.
std::string fContent
The content format depends on the content ID and may be binary.
const std::string & GetContent() const
const std::string & GetTypeName() const
RExtraTypeInfoDescriptor(RExtraTypeInfoDescriptor &&other)=default
EExtraTypeInfoIds GetContentId() const
std::uint32_t fTypeVersion
Type version the extra type information is bound to.
A field translates read and write calls from/to underlying columns to/from tree values.
Metadata stored for every field of an RNTuple.
const std::string & GetTypeAlias() const
std::unique_ptr< ROOT::RFieldBase > CreateField(const RNTupleDescriptor &ntplDesc, const ROOT::RCreateFieldOptions &options={}) const
In general, we create a field simply from the C++ type name.
std::uint32_t fFieldVersion
The version of the C++-type-to-column translation mechanics.
ROOT::DescriptorId_t fFieldId
RFieldDescriptor Clone() const
Get a copy of the descriptor.
ROOT::DescriptorId_t GetId() const
std::uint64_t fNRepetitions
The number of elements per entry for fixed-size arrays.
std::uint32_t GetFieldVersion() const
const std::vector< ROOT::DescriptorId_t > & GetLogicalColumnIds() const
std::uint32_t fColumnCardinality
The number of columns in the column representations of the field.
ROOT::DescriptorId_t fProjectionSourceId
For projected fields, the source field ID.
bool IsCustomEnum(const RNTupleDescriptor &desc) const R__DEPRECATED(6
ROOT::ENTupleStructure GetStructure() const
bool operator==(const RFieldDescriptor &other) const
RFieldDescriptor(const RFieldDescriptor &other)=delete
bool IsCustomClass() const R__DEPRECATED(6
std::uint32_t GetColumnCardinality() const
bool IsStdAtomic() const R__DEPRECATED(6
std::string fFieldDescription
Free text set by the user.
RFieldDescriptor & operator=(const RFieldDescriptor &other)=delete
RFieldDescriptor & operator=(RFieldDescriptor &&other)=default
const std::vector< ROOT::DescriptorId_t > & GetLinkIds() const
ROOT::DescriptorId_t fParentId
Establishes sub field relationships, such as classes and collections.
ROOT::DescriptorId_t GetParentId() const
std::string fTypeAlias
A typedef or using directive that resolved to the type name during field creation.
ROOT::ENTupleStructure fStructure
The structural information carried by this field in the data model tree.
std::uint64_t GetNRepetitions() const
RFieldDescriptor(RFieldDescriptor &&other)=default
std::vector< ROOT::DescriptorId_t > fLinkIds
The pointers in the other direction from parent to children.
std::string fFieldName
The leaf name, not including parent fields.
const std::string & GetFieldDescription() const
std::optional< std::uint32_t > GetTypeChecksum() const
ROOT::DescriptorId_t GetProjectionSourceId() const
std::uint32_t fTypeVersion
The version of the C++ type itself.
std::string fTypeName
The C++ type that was used when writing the field.
std::uint32_t GetTypeVersion() const
const std::string & GetFieldName() const
std::vector< ROOT::DescriptorId_t > fLogicalColumnIds
The ordered list of columns attached to this field: first by representation index then by column inde...
const std::string & GetTypeName() const
std::optional< std::uint32_t > fTypeChecksum
For custom classes, we store the ROOT TClass reported checksum to facilitate the use of I/O rules tha...
std::unordered_map< ROOT::DescriptorId_t, RClusterDescriptor >::const_iterator Iter_t
Used to loop over all the clusters of an RNTuple (in unspecified order)
const RNTupleDescriptor & fNTuple
The associated RNTuple for this range.
RClusterDescriptorIterable(const RNTupleDescriptor &ntuple)
std::unordered_map< ROOT::DescriptorId_t, RClusterGroupDescriptor >::const_iterator Iter_t
Used to loop over all the cluster groups of an RNTuple (in unspecified order)
const RNTupleDescriptor & fNTuple
The associated RNTuple for this range.
const RNTupleDescriptor & fNTuple
The enclosing range's RNTuple.
const std::vector< ROOT::DescriptorId_t > & fColumns
The enclosing range's descriptor id list.
RIterator(const RNTupleDescriptor &ntuple, const std::vector< ROOT::DescriptorId_t > &columns, std::size_t index)
Used to loop over a field's associated columns.
const RNTupleDescriptor & fNTuple
The associated RNTuple for this range.
std::vector< RExtraTypeInfoDescriptor >::const_iterator Iter_t
Used to loop over all the extra type info record of an RNTuple (in unspecified order)
const RNTupleDescriptor & fNTuple
The associated RNTuple for this range.
RIterator(const RNTupleDescriptor &ntuple, const std::vector< ROOT::DescriptorId_t > &fieldChildren, std::size_t index)
const std::vector< ROOT::DescriptorId_t > & fFieldChildren
The enclosing range's descriptor id list.
const RNTupleDescriptor & fNTuple
The enclosing range's RNTuple.
Used to loop over a field's child fields.
const RNTupleDescriptor & fNTuple
The associated RNTuple for this range.
RFieldDescriptorIterable(const RNTupleDescriptor &ntuple, const RFieldDescriptor &field, const std::function< bool(ROOT::DescriptorId_t, ROOT::DescriptorId_t)> &comparator)
Sort the range using an arbitrary comparison function.
RFieldDescriptorIterable(const RNTupleDescriptor &ntuple, const RFieldDescriptor &field)
Summarizes information about fields and the corresponding columns that were added after the header ha...
const std::vector< ROOT::DescriptorId_t > & GetExtendedColumnRepresentations() const
std::unordered_set< ROOT::DescriptorId_t > fFieldIdsLookup
All field IDs of late model extensions for efficient lookup.
std::vector< ROOT::DescriptorId_t > fExtendedColumnRepresentations
All logical column IDs of columns that extend, with additional column representations,...
bool ContainsExtendedColumnRepresentation(ROOT::DescriptorId_t columnId) const
void MarkExtendedField(const RFieldDescriptor &fieldDesc)
Marks fieldDesc as an extended field, i.e.
std::vector< ROOT::DescriptorId_t > fFieldIdsOrder
All field IDs of late model extensions, in the order of field addition.
bool ContainsField(ROOT::DescriptorId_t fieldId) const
void MarkExtendedColumn(const RColumnDescriptor &columnDesc)
Marks columnDesc as an extended column, i.e.
The on-storage metadata of an RNTuple.
std::uint64_t GetGeneration() const
RNTupleDescriptor(RNTupleDescriptor &&other)=default
const RClusterGroupDescriptor & GetClusterGroupDescriptor(ROOT::DescriptorId_t clusterGroupId) const
const RColumnDescriptor & GetColumnDescriptor(ROOT::DescriptorId_t columnId) const
std::set< unsigned int > fFeatureFlags
std::unordered_map< ROOT::DescriptorId_t, RClusterGroupDescriptor > fClusterGroupDescriptors
const RFieldDescriptor & GetFieldDescriptor(ROOT::DescriptorId_t fieldId) const
RNTupleDescriptor & operator=(RNTupleDescriptor &&other)=default
std::vector< Experimental::RNTupleAttrSetDescriptor > fAttributeSets
List of AttributeSets linked to this RNTuple.
std::size_t GetNExtraTypeInfos() const
std::uint64_t GetOnDiskFooterSize() const
std::size_t GetNActiveClusters() const
const std::string & GetName() const
std::uint64_t fOnDiskFooterSize
Like fOnDiskHeaderSize, contains both cluster summaries and page locations.
ROOT::NTupleSize_t GetNEntries() const
We know the number of entries from adding the cluster summaries.
ROOT::DescriptorId_t GetFieldZeroId() const
Returns the logical parent of all top-level RNTuple data fields.
std::unordered_map< ROOT::DescriptorId_t, RClusterDescriptor > fClusterDescriptors
Potentially a subset of all the available clusters.
std::size_t GetNAttributeSets() const
std::size_t GetNClusters() const
std::size_t GetNPhysicalColumns() const
const RHeaderExtension * GetHeaderExtension() const
Return header extension information; if the descriptor does not have a header extension,...
std::uint64_t fOnDiskHeaderXxHash3
Set by the descriptor builder when deserialized.
const RClusterDescriptor & GetClusterDescriptor(ROOT::DescriptorId_t clusterId) const
std::string fName
The RNTuple name needs to be unique in a given storage location (file)
RNTupleDescriptor(const RNTupleDescriptor &other)=delete
std::uint64_t fOnDiskHeaderSize
Set by the descriptor builder when deserialized.
std::uint64_t GetOnDiskHeaderXxHash3() const
std::vector< ROOT::DescriptorId_t > fSortedClusterGroupIds
References cluster groups sorted by entry range and thus allows for binary search.
std::unordered_map< ROOT::DescriptorId_t, RColumnDescriptor > fColumnDescriptors
std::unordered_map< ROOT::DescriptorId_t, RFieldDescriptor > fFieldDescriptors
std::size_t GetNFields() const
bool HasFeature(unsigned int flag) const
std::uint64_t GetOnDiskHeaderSize() const
std::string fDescription
Free text from the user.
std::vector< RExtraTypeInfoDescriptor > fExtraTypeInfoDescriptors
std::size_t GetNLogicalColumns() const
RNTupleDescriptor & operator=(const RNTupleDescriptor &other)=delete
std::size_t GetNClusterGroups() const
RNTupleDescriptor CloneSchema() const
Creates a descriptor containing only the schema information about this RNTuple, i....
const std::string & GetDescription() const
std::unique_ptr< RHeaderExtension > fHeaderExtension
const RFieldDescriptor & GetFieldZero() const
Generic information about the physical location of data.
const Int_t n
Definition legend1.C:16
RNTupleDescriptor CloneDescriptorSchema(const RNTupleDescriptor &desc)
bool IsCustomEnumFieldDesc(const RNTupleDescriptor &desc, const RFieldDescriptor &fieldDesc)
Tells if the field describes a user-defined enum type.
std::vector< ROOT::Internal::RNTupleClusterBoundaries > GetClusterBoundaries(const RNTupleDescriptor &desc)
Return the cluster boundaries for each cluster in this RNTuple.
bool IsStdAtomicFieldDesc(const RFieldDescriptor &fieldDesc)
Tells if the field describes a std::atomic<T> type.
EExtraTypeInfoIds
Used in RExtraTypeInfoDescriptor.
std::uint64_t DescriptorId_t
Distriniguishes elements of the same type within a descriptor, e.g. different fields.
constexpr NTupleSize_t kInvalidNTupleIndex
std::uint64_t NTupleSize_t
Integer type long enough to hold the maximum number of entries in a column.
constexpr DescriptorId_t kInvalidDescriptorId
ENTupleStructure
The fields in the RNTuple data model tree can carry different structural information about the type s...
Additional information about a page in an in-memory RPageRange.
RPageInfoExtended(const RPageInfo &pageInfo, ROOT::NTupleSize_t firstElementIndex, ROOT::NTupleSize_t pageNumber)
void SetFirstElementIndex(ROOT::NTupleSize_t firstInPage)
void SetPageNumber(ROOT::NTupleSize_t pageNumber)
Information about a single page in the context of a cluster's page range.
void SetLocator(const RNTupleLocator &locator)
bool operator==(const RPageInfo &other) const
const RNTupleLocator & GetLocator() const
RNTupleLocator fLocator
The meaning of fLocator depends on the storage backend.
RPageInfo(std::uint32_t nElements, const RNTupleLocator &locator, bool hasChecksum)
bool operator==(RValueRange other) const
RValueRange(std::pair< double, double > range)
bool operator!=(RValueRange other) const
static void output()