Logo ROOT  
Reference Guide
 
All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Properties Friends Macros Modules Pages
Loading...
Searching...
No Matches
RNTupleDescriptor.hxx
Go to the documentation of this file.
1/// \file ROOT/RNTupleDescriptor.hxx
2/// \ingroup NTuple ROOT7
3/// \author Jakob Blomer <jblomer@cern.ch>
4/// \author Javier Lopez-Gomez <javier.lopez.gomez@cern.ch>
5/// \date 2018-07-19
6/// \warning This is part of the ROOT 7 prototype! It will change without notice. It might trigger earthquakes. Feedback
7/// is welcome!
8
9/*************************************************************************
10 * Copyright (C) 1995-2019, Rene Brun and Fons Rademakers. *
11 * All rights reserved. *
12 * *
13 * For the licensing terms see $ROOTSYS/LICENSE. *
14 * For the list of contributors see $ROOTSYS/README/CREDITS. *
15 *************************************************************************/
16
17#ifndef ROOT7_RNTupleDescriptor
18#define ROOT7_RNTupleDescriptor
19
21#include <ROOT/RError.hxx>
23#include <ROOT/RNTupleUtil.hxx>
24#include <ROOT/RSpan.hxx>
25
26#include <TError.h>
27
28#include <algorithm>
29#include <chrono>
30#include <cmath>
31#include <functional>
32#include <iterator>
33#include <map>
34#include <memory>
35#include <optional>
36#include <ostream>
37#include <vector>
38#include <set>
39#include <string>
40#include <string_view>
41#include <unordered_map>
42#include <unordered_set>
43
44namespace ROOT {
45
46namespace Internal {
47class RColumnElementBase;
48}
49
50namespace Experimental {
51
52class RFieldBase;
53class RNTupleDescriptor;
54class RNTupleModel;
55
56namespace Internal {
57class RColumnDescriptorBuilder;
58class RClusterDescriptorBuilder;
59class RClusterGroupDescriptorBuilder;
60class RExtraTypeInfoDescriptorBuilder;
61class RFieldDescriptorBuilder;
62class RNTupleDescriptorBuilder;
63
64RNTupleDescriptor CloneDescriptorSchema(const RNTupleDescriptor &desc);
65} // namespace Internal
66
67// clang-format off
68/**
69\class ROOT::Experimental::RFieldDescriptor
70\ingroup NTuple
71\brief Meta-data stored for every field of an ntuple
72*/
73// clang-format on
77
78private:
80 /// The version of the C++-type-to-column translation mechanics
81 std::uint32_t fFieldVersion = 0;
82 /// The version of the C++ type itself
83 std::uint32_t fTypeVersion = 0;
84 /// The leaf name, not including parent fields
85 std::string fFieldName;
86 /// Free text set by the user
87 std::string fFieldDescription;
88 /// The C++ type that was used when writing the field
89 std::string fTypeName;
90 /// A typedef or using directive that resolved to the type name during field creation
91 std::string fTypeAlias;
92 /// The number of elements per entry for fixed-size arrays
93 std::uint64_t fNRepetitions = 0;
94 /// The structural information carried by this field in the data model tree
96 /// Establishes sub field relationships, such as classes and collections
98 /// For projected fields, the source field ID
100 /// The pointers in the other direction from parent to children. They are serialized, too, to keep the
101 /// order of sub fields.
102 std::vector<ROOT::DescriptorId_t> fLinkIds;
103 /// The number of columns in the column representations of the field. The column cardinality helps to navigate the
104 /// list of logical column ids. For example, the second column of the third column representation is
105 /// fLogicalColumnIds[2 * fColumnCardinality + 1]
106 std::uint32_t fColumnCardinality = 0;
107 /// The ordered list of columns attached to this field: first by representation index then by column index.
108 std::vector<ROOT::DescriptorId_t> fLogicalColumnIds;
109 /// For custom classes, we store the ROOT TClass reported checksum to facilitate the use of I/O rules that
110 /// identify types by their checksum
111 std::optional<std::uint32_t> fTypeChecksum;
112
113public:
114 RFieldDescriptor() = default;
119
120 bool operator==(const RFieldDescriptor &other) const;
121 /// Get a copy of the descriptor
122 RFieldDescriptor Clone() const;
123
124 /// In general, we create a field simply from the C++ type name. For untyped fields, however, we potentially need
125 /// access to sub fields, which is provided by the ntuple descriptor argument.
126 std::unique_ptr<RFieldBase>
127 CreateField(const RNTupleDescriptor &ntplDesc, const ROOT::RCreateFieldOptions &options = {}) const;
128
130 std::uint32_t GetFieldVersion() const { return fFieldVersion; }
131 std::uint32_t GetTypeVersion() const { return fTypeVersion; }
132 const std::string &GetFieldName() const { return fFieldName; }
133 const std::string &GetFieldDescription() const { return fFieldDescription; }
134 const std::string &GetTypeName() const { return fTypeName; }
135 const std::string &GetTypeAlias() const { return fTypeAlias; }
136 std::uint64_t GetNRepetitions() const { return fNRepetitions; }
140 const std::vector<ROOT::DescriptorId_t> &GetLinkIds() const { return fLinkIds; }
141 const std::vector<ROOT::DescriptorId_t> &GetLogicalColumnIds() const { return fLogicalColumnIds; }
142 std::uint32_t GetColumnCardinality() const { return fColumnCardinality; }
143 std::optional<std::uint32_t> GetTypeChecksum() const { return fTypeChecksum; }
145 /// Tells if the field describes a user-defined class rather than a fundamental type, a collection, or one of the
146 /// natively supported stdlib classes.
147 /// The dictionary does not need to be available for this method.
148 bool IsCustomClass() const;
149};
150
151// clang-format off
152/**
153\class ROOT::Experimental::RColumnDescriptor
154\ingroup NTuple
155\brief Meta-data stored for every column of an ntuple
156*/
157// clang-format on
161
162public:
163 struct RValueRange {
164 double fMin = 0, fMax = 0;
165
166 RValueRange() = default;
167 RValueRange(double min, double max) : fMin(min), fMax(max) {}
168 RValueRange(std::pair<double, double> range) : fMin(range.first), fMax(range.second) {}
169
170 bool operator==(RValueRange other) const { return fMin == other.fMin && fMax == other.fMax; }
171 bool operator!=(RValueRange other) const { return !(*this == other); }
172 };
173
174private:
175 /// The actual column identifier, which is the link to the corresponding field
177 /// Usually identical to the logical column ID, except for alias columns where it references the shadowed column
179 /// Every column belongs to one and only one field
181 /// The absolute value specifies the index for the first stored element for this column.
182 /// For deferred columns the absolute value is larger than zero.
183 /// Negative values specify a suppressed and deferred column.
184 std::int64_t fFirstElementIndex = 0U;
185 /// A field can be serialized into several columns, which are numbered from zero to $n$
186 std::uint32_t fIndex = 0;
187 /// A field may use multiple column representations, which are numbered from zero to $m$.
188 /// Every representation has the same number of columns.
189 std::uint16_t fRepresentationIndex = 0;
190 /// The size in bits of elements of this column. Most columns have the size fixed by their type
191 /// but low-precision float columns have variable bit widths.
192 std::uint16_t fBitsOnStorage = 0;
193 /// The on-disk column type
195 /// Optional value range (used e.g. by quantized real fields)
196 std::optional<RValueRange> fValueRange;
197
198public:
199 RColumnDescriptor() = default;
204
205 bool operator==(const RColumnDescriptor &other) const;
206 /// Get a copy of the descriptor
207 RColumnDescriptor Clone() const;
208
212 std::uint32_t GetIndex() const { return fIndex; }
213 std::uint16_t GetRepresentationIndex() const { return fRepresentationIndex; }
214 std::uint64_t GetFirstElementIndex() const { return std::abs(fFirstElementIndex); }
215 std::uint16_t GetBitsOnStorage() const { return fBitsOnStorage; }
217 std::optional<RValueRange> GetValueRange() const { return fValueRange; }
219 bool IsDeferredColumn() const { return fFirstElementIndex != 0; }
221};
222
223// clang-format off
224/**
225\class ROOT::Experimental::RClusterDescriptor
226\ingroup NTuple
227\brief Meta-data for a set of ntuple clusters
228
229The cluster descriptor is built in two phases. In a first phase, the descriptor has only an ID.
230In a second phase, the event range, column group, page locations and column ranges are added.
231Both phases are populated by the RClusterDescriptorBuilder.
232Clusters usually span across all available columns but in some cases they can describe only a subset of the columns,
233for instance when describing friend ntuples.
234*/
235// clang-format on
238
239public:
240 // clang-format off
241 /**
242 \class ROOT::Experimental::RClusterDescriptor::RColumnRange
243 \ingroup NTuple
244 \brief The window of element indexes of a particular column in a particular cluster
245 */
246 // clang-format on
249 /// The global index of the first column element in the cluster
251 /// The number of column elements in the cluster
253 /// The usual format for ROOT compression settings (see Compression.h).
254 /// The pages of a particular column in a particular cluster are all compressed with the same settings.
255 /// If unset, the compression settings are undefined (deferred columns, suppressed columns).
256 std::optional<std::uint32_t> fCompressionSettings;
257 /// Suppressed columns have an empty page range and unknown compression settings.
258 /// Their element index range, however, is aligned with the corresponding column of the
259 /// primary column representation (see Section "Suppressed Columns" in the specification)
260 bool fIsSuppressed = false;
261
262 // TODO(jblomer): we perhaps want to store summary information, such as average, min/max, etc.
263 // Should this be done on the field level?
264
265 public:
266 RColumnRange() = default;
267
278
281
285
289
290 std::optional<std::uint32_t> GetCompressionSettings() const { return fCompressionSettings; }
291 void SetCompressionSettings(std::optional<std::uint32_t> comp) { fCompressionSettings = comp; }
292
293 bool IsSuppressed() const { return fIsSuppressed; }
295
296 bool operator==(const RColumnRange &other) const
297 {
298 return fPhysicalColumnId == other.fPhysicalColumnId && fFirstElementIndex == other.fFirstElementIndex &&
299 fNElements == other.fNElements && fCompressionSettings == other.fCompressionSettings &&
300 fIsSuppressed == other.fIsSuppressed;
301 }
302
307 };
308
309 // clang-format off
310 /**
311 \class ROOT::Experimental::RClusterDescriptor::RPageRange
312 \ingroup NTuple
313 \brief Records the partition of data into pages for a particular column in a particular cluster
314 */
315 // clang-format on
318
319 public:
320 /// We do not need to store the element size / uncompressed page size because we know to which column
321 /// the page belongs
322 struct RPageInfo {
323 protected:
324 /// The sum of the elements of all the pages must match the corresponding fNElements field in fColumnRanges
325 std::uint32_t fNElements = std::uint32_t(-1);
326 /// The meaning of fLocator depends on the storage backend.
328 /// If true, the 8 bytes following the serialized page are an xxhash of the on-disk page data
329 bool fHasChecksum = false;
330
331 public:
332 RPageInfo() = default;
337
338 bool operator==(const RPageInfo &other) const
339 {
340 return fNElements == other.fNElements && fLocator == other.fLocator;
341 }
342
343 std::uint32_t GetNElements() const { return fNElements; }
344 void SetNElements(std::uint32_t n) { fNElements = n; }
345
346 const RNTupleLocator &GetLocator() const { return fLocator; }
349
350 bool HasChecksum() const { return fHasChecksum; }
352 };
353
375
376 private:
377 /// Extend this RPageRange to fit the given RColumnRange, i.e. prepend as many synthetic RPageInfos as needed to
378 /// cover the range in `columnRange`. `RPageInfo`s are constructed to contain as many elements of type `element`
379 /// given a page size limit of `pageSize` (in bytes); the locator for the referenced pages is `kTypePageZero`.
380 /// This function is used to make up `RPageRange`s for clusters that contain deferred columns.
381 /// \return The number of column elements covered by the synthesized RPageInfos
384
385 /// Has the same length than fPageInfos and stores the sum of the number of elements of all the pages
386 /// up to and including a given index. Used for binary search in Find().
387 std::vector<ROOT::NTupleSize_t> fCumulativeNElements;
388
390 std::vector<RPageInfo> fPageInfos;
391
392 public:
393 RPageRange() = default;
394 RPageRange(const RPageRange &other) = delete;
398
400 {
401 RPageRange clone;
403 clone.fPageInfos = fPageInfos;
405 return clone;
406 }
407
408 /// Find the page in the RPageRange that contains the given element. The element must exist.
409 RPageInfoExtended Find(ROOT::NTupleSize_t idxInCluster) const;
410
413
414 const std::vector<RPageInfo> &GetPageInfos() const { return fPageInfos; }
415 std::vector<RPageInfo> &GetPageInfos() { return fPageInfos; }
416
417 bool operator==(const RPageRange &other) const
418 {
419 return fPhysicalColumnId == other.fPhysicalColumnId && fPageInfos == other.fPageInfos;
420 }
421 };
422
423private:
425 /// Clusters can be swapped by adjusting the entry offsets
427 // TODO(jblomer): change to std::uint64_t
429
430 std::unordered_map<ROOT::DescriptorId_t, RColumnRange> fColumnRanges;
431 std::unordered_map<ROOT::DescriptorId_t, RPageRange> fPageRanges;
432
433public:
435
441
443
444 bool operator==(const RClusterDescriptor &other) const;
445
451 /// Returns an iterator over pairs { columnId, columnRange }. The iteration order is unspecified.
452 RColumnRangeIterable GetColumnRangeIterable() const;
454 {
455 return fColumnRanges.find(physicalId) != fColumnRanges.end();
456 }
457 std::uint64_t GetNBytesOnStorage() const;
458};
459
461private:
463
464public:
465 class RIterator {
466 private:
467 using Iter_t = std::unordered_map<ROOT::DescriptorId_t, RColumnRange>::const_iterator;
468 /// The wrapped map iterator
470
471 public:
472 using iterator_category = std::forward_iterator_tag;
475 using difference_type = std::ptrdiff_t;
476 using pointer = const RColumnRange *;
477 using reference = const RColumnRange &;
478
479 RIterator(Iter_t iter) : fIter(iter) {}
481 {
482 ++fIter;
483 return *this;
484 }
485 reference operator*() { return fIter->second; }
486 pointer operator->() { return &fIter->second; }
487 bool operator!=(const iterator &rh) const { return fIter != rh.fIter; }
488 bool operator==(const iterator &rh) const { return fIter == rh.fIter; }
489 };
490
491 explicit RColumnRangeIterable(const RClusterDescriptor &desc) : fDesc(desc) {}
492
493 RIterator begin() { return RIterator{fDesc.fColumnRanges.cbegin()}; }
494 RIterator end() { return fDesc.fColumnRanges.cend(); }
495 size_t size() { return fDesc.fColumnRanges.size(); }
496};
497
498// clang-format off
499/**
500\class ROOT::Experimental::RClusterGroupDescriptor
501\ingroup NTuple
502\brief Clusters are bundled in cluster groups.
503
504Very large ntuples or combined ntuples (chains, friends) contain multiple cluster groups. The cluster groups
505may contain sharded clusters.
506Every ntuple has at least one cluster group. The clusters in a cluster group are ordered corresponding to
507the order of page locations in the page list envelope that belongs to the cluster group (see format specification)
508*/
509// clang-format on
512
513private:
515 /// The cluster IDs can be empty if the corresponding page list is not loaded.
516 /// Otherwise, cluster ids are sorted by first entry number.
517 std::vector<ROOT::DescriptorId_t> fClusterIds;
518 /// The page list that corresponds to the cluster group
520 /// Uncompressed size of the page list
521 std::uint64_t fPageListLength = 0;
522 /// The minimum first entry number of the clusters in the cluster group
523 std::uint64_t fMinEntry = 0;
524 /// Number of entries that are (partially for sharded clusters) covered by this cluster group.
525 std::uint64_t fEntrySpan = 0;
526 /// Number of clusters is always known even if the cluster IDs are not (yet) populated
527 std::uint32_t fNClusters = 0;
528
529public:
535
537 // Creates a clone without the cluster IDs
539
540 bool operator==(const RClusterGroupDescriptor &other) const;
541
543 std::uint32_t GetNClusters() const { return fNClusters; }
545 std::uint64_t GetPageListLength() const { return fPageListLength; }
546 const std::vector<ROOT::DescriptorId_t> &GetClusterIds() const { return fClusterIds; }
547 std::uint64_t GetMinEntry() const { return fMinEntry; }
548 std::uint64_t GetEntrySpan() const { return fEntrySpan; }
549 /// A cluster group is loaded in two stages. Stage one loads only the summary information.
550 /// Stage two loads the list of cluster IDs.
551 bool HasClusterDetails() const { return !fClusterIds.empty(); }
552};
553
554/// Used in RExtraTypeInfoDescriptor
556 kInvalid,
558};
559
560// clang-format off
561/**
562\class ROOT::Experimental::RExtraTypeInfoDescriptor
563\ingroup NTuple
564\brief Field specific extra type information from the header / extenstion header
565
566Currently only used by streamer fields to store RNTuple-wide list of streamer info records.
567*/
568// clang-format on
571
572private:
573 /// Specifies the meaning of the extra information
575 /// Type version the extra type information is bound to
576 std::uint32_t fTypeVersion = 0;
577 /// The type name the extra information refers to; empty for RNTuple-wide extra information
578 std::string fTypeName;
579 /// The content format depends on the content ID and may be binary
580 std::string fContent;
581
582public:
588
589 bool operator==(const RExtraTypeInfoDescriptor &other) const;
590
592
594 std::uint32_t GetTypeVersion() const { return fTypeVersion; }
595 const std::string &GetTypeName() const { return fTypeName; }
596 const std::string &GetContent() const { return fContent; }
597};
598
599// clang-format off
600/**
601\class ROOT::Experimental::RNTupleDescriptor
602\ingroup NTuple
603\brief The on-storage meta-data of an ntuple
604
605Represents the on-disk (on storage) information about an ntuple. The meta-data consists of a header and one or
606several footers. The header carries the ntuple schema, i.e. the fields and the associated columns and their
607relationships. The footer(s) carry information about one or several clusters. For every cluster, a footer stores
608its location and size, and for every column the range of element indexes as well as a list of pages and page
609locations.
610
611The descriptor provide machine-independent (de-)serialization of headers and footers, and it provides lookup routines
612for ntuple objects (pages, clusters, ...). It is supposed to be usable by all RPageStorage implementations.
613
614The serialization does not use standard ROOT streamers in order to not let it depend on libCore. The serialization uses
615the concept of frames: header, footer, and substructures have a preamble with version numbers and the size of the
616writte struct. This allows for forward and backward compatibility when the meta-data evolves.
617*/
618// clang-format on
622
623public:
624 class RHeaderExtension;
625
626private:
627 /// The ntuple name needs to be unique in a given storage location (file)
628 std::string fName;
629 /// Free text from the user
630 std::string fDescription;
631
633
634 std::uint64_t fNPhysicalColumns = 0; ///< Updated by the descriptor builder when columns are added
635
636 std::set<unsigned int> fFeatureFlags;
637 std::unordered_map<ROOT::DescriptorId_t, RFieldDescriptor> fFieldDescriptors;
638 std::unordered_map<ROOT::DescriptorId_t, RColumnDescriptor> fColumnDescriptors;
639
640 std::vector<RExtraTypeInfoDescriptor> fExtraTypeInfoDescriptors;
641 std::unique_ptr<RHeaderExtension> fHeaderExtension;
642
643 //// All fields above are part of the schema and are cloned when creating a new descriptor from a given one
644 //// (see CloneSchema())
645
646 std::uint64_t fOnDiskHeaderSize = 0; ///< Set by the descriptor builder when deserialized
647 std::uint64_t fOnDiskHeaderXxHash3 = 0; ///< Set by the descriptor builder when deserialized
648 std::uint64_t fOnDiskFooterSize = 0; ///< Like fOnDiskHeaderSize, contains both cluster summaries and page locations
649
650 std::uint64_t fNEntries = 0; ///< Updated by the descriptor builder when the cluster groups are added
651 std::uint64_t fNClusters = 0; ///< Updated by the descriptor builder when the cluster groups are added
652
653 /**
654 * Once constructed by an RNTupleDescriptorBuilder, the descriptor is mostly immutable except for set of
655 * active the page locations. During the lifetime of the descriptor, page location information for clusters
656 * can be added or removed. When this happens, the generation should be increased, so that users of the
657 * descriptor know that the information changed. The generation is increased, e.g., by the page source's
658 * exclusive lock guard around the descriptor. It is used, e.g., by the descriptor cache in RNTupleReader.
659 */
660 std::uint64_t fGeneration = 0;
661
662 std::unordered_map<ROOT::DescriptorId_t, RClusterGroupDescriptor> fClusterGroupDescriptors;
663 /// References cluster groups sorted by entry range and thus allows for binary search.
664 /// Note that this list is empty during the descriptor building process and will only be
665 /// created when the final descriptor is extracted from the builder.
666 std::vector<ROOT::DescriptorId_t> fSortedClusterGroupIds;
667 /// May contain only a subset of all the available clusters, e.g. the clusters of the current file
668 /// from a chain of files
669 std::unordered_map<ROOT::DescriptorId_t, RClusterDescriptor> fClusterDescriptors;
670
671 // We don't expose this publicly because when we add sharded clusters, this interface does not make sense anymore
673
674 /// Creates a descriptor containing only the schema information about this RNTuple, i.e. all the information needed
675 /// to create a new RNTuple with the same schema as this one but not necessarily the same clustering. This is used
676 /// when merging two RNTuples.
678
679public:
680 static constexpr unsigned int kFeatureFlagTest = 137; // Bit reserved for forward-compatibility testing
681
687
688 /// Modifiers passed to `CreateModel`
690 private:
691 /// If set to true, projected fields will be reconstructed as such. This will prevent the model to be used
692 /// with an RNTupleReader, but it is useful, e.g., to accurately merge data.
694 /// Normally creating a model will fail if any of the reconstructed fields contains an unknown column type.
695 /// If this option is enabled, the model will be created and all fields containing unknown data (directly
696 /// or indirectly) will be skipped instead.
697 bool fForwardCompatible = false;
698 /// If true, the model will be created without a default entry (bare model).
699 bool fCreateBare = false;
700 /// If true, fields with a user defined type that have no available dictionaries will be reconstructed
701 /// as record fields from the on-disk information; otherwise, they will cause an error.
703
704 public:
705 RCreateModelOptions() {} // Work around compiler bug, see https://gcc.gnu.org/bugzilla/show_bug.cgi?id=88165
706
709
712
713 void SetCreateBare(bool v) { fCreateBare = v; }
714 bool GetCreateBare() const { return fCreateBare; }
715
718 };
719
720 RNTupleDescriptor() = default;
725
726 RNTupleDescriptor Clone() const;
727
728 bool operator==(const RNTupleDescriptor &other) const;
729
730 std::uint64_t GetOnDiskHeaderXxHash3() const { return fOnDiskHeaderXxHash3; }
731 std::uint64_t GetOnDiskHeaderSize() const { return fOnDiskHeaderSize; }
732 std::uint64_t GetOnDiskFooterSize() const { return fOnDiskFooterSize; }
733
750
751 RFieldDescriptorIterable GetFieldIterable(const RFieldDescriptor &fieldDesc) const;
752 RFieldDescriptorIterable
754 const std::function<bool(ROOT::DescriptorId_t, ROOT::DescriptorId_t)> &comparator) const;
755 RFieldDescriptorIterable GetFieldIterable(ROOT::DescriptorId_t fieldId) const;
756 RFieldDescriptorIterable
758 const std::function<bool(ROOT::DescriptorId_t, ROOT::DescriptorId_t)> &comparator) const;
759
760 RFieldDescriptorIterable GetTopLevelFields() const;
761 RFieldDescriptorIterable
762 GetTopLevelFields(const std::function<bool(ROOT::DescriptorId_t, ROOT::DescriptorId_t)> &comparator) const;
763
764 RColumnDescriptorIterable GetColumnIterable() const;
765 RColumnDescriptorIterable GetColumnIterable(const RFieldDescriptor &fieldDesc) const;
766 RColumnDescriptorIterable GetColumnIterable(ROOT::DescriptorId_t fieldId) const;
767
768 RClusterGroupDescriptorIterable GetClusterGroupIterable() const;
769
770 RClusterDescriptorIterable GetClusterIterable() const;
771
772 RExtraTypeInfoDescriptorIterable GetExtraTypeInfoIterable() const;
773
774 const std::string &GetName() const { return fName; }
775 const std::string &GetDescription() const { return fDescription; }
776
777 std::size_t GetNFields() const { return fFieldDescriptors.size(); }
778 std::size_t GetNLogicalColumns() const { return fColumnDescriptors.size(); }
779 std::size_t GetNPhysicalColumns() const { return fNPhysicalColumns; }
780 std::size_t GetNClusterGroups() const { return fClusterGroupDescriptors.size(); }
781 std::size_t GetNClusters() const { return fNClusters; }
782 std::size_t GetNActiveClusters() const { return fClusterDescriptors.size(); }
783 std::size_t GetNExtraTypeInfos() const { return fExtraTypeInfoDescriptors.size(); }
784
785 /// We know the number of entries from adding the cluster summaries
788
789 /// Returns the logical parent of all top-level NTuple data fields.
793 /// Searches for a top-level field
794 ROOT::DescriptorId_t FindFieldId(std::string_view fieldName) const;
796 std::uint16_t representationIndex) const;
798 std::uint16_t representationIndex) const;
802
803 /// Walks up the parents of the field ID and returns a field name of the form a.b.c.d
804 /// In case of invalid field ID, an empty string is returned.
806
807 bool HasFeature(unsigned int flag) const { return fFeatureFlags.count(flag) > 0; }
808 std::vector<std::uint64_t> GetFeatureFlags() const;
809
810 /// Return header extension information; if the descriptor does not have a header extension, return `nullptr`
811 const RHeaderExtension *GetHeaderExtension() const { return fHeaderExtension.get(); }
812
813 /// Methods to load and drop cluster group details (cluster IDs and page locations)
817
818 std::uint64_t GetGeneration() const { return fGeneration; }
820
821 /// Re-create the C++ model from the stored meta-data
822 std::unique_ptr<RNTupleModel> CreateModel(const RCreateModelOptions &options = RCreateModelOptions()) const;
823 void PrintInfo(std::ostream &output) const;
824};
825
826// clang-format off
827/**
828\class ROOT::Experimental::RNTupleDescriptor::RColumnDescriptorIterable
829\ingroup NTuple
830\brief Used to loop over a field's associated columns
831*/
832// clang-format on
834private:
835 /// The associated NTuple for this range.
837 /// The descriptor ids of the columns ordered by field, representation, and column index
838 std::vector<ROOT::DescriptorId_t> fColumns = {};
839
840public:
841 class RIterator {
842 private:
843 /// The enclosing range's NTuple.
845 /// The enclosing range's descriptor id list.
846 const std::vector<ROOT::DescriptorId_t> &fColumns;
847 std::size_t fIndex = 0;
848
849 public:
850 using iterator_category = std::forward_iterator_tag;
853 using difference_type = std::ptrdiff_t;
854 using pointer = const RColumnDescriptor *;
856
857 RIterator(const RNTupleDescriptor &ntuple, const std::vector<ROOT::DescriptorId_t> &columns, std::size_t index)
859 {
860 }
862 {
863 ++fIndex;
864 return *this;
865 }
866 reference operator*() { return fNTuple.GetColumnDescriptor(fColumns.at(fIndex)); }
867 pointer operator->() { return &fNTuple.GetColumnDescriptor(fColumns.at(fIndex)); }
868 bool operator!=(const iterator &rh) const { return fIndex != rh.fIndex; }
869 bool operator==(const iterator &rh) const { return fIndex == rh.fIndex; }
870 };
871
874
877 size_t size() { return fColumns.size(); }
878};
879
880// clang-format off
881/**
882\class ROOT::Experimental::RNTupleDescriptor::RFieldDescriptorIterable
883\ingroup NTuple
884\brief Used to loop over a field's child fields
885*/
886// clang-format on
888private:
889 /// The associated NTuple for this range.
891 /// The descriptor ids of the child fields. These may be sorted using
892 /// a comparison function.
893 std::vector<ROOT::DescriptorId_t> fFieldChildren = {};
894
895public:
896 class RIterator {
897 private:
898 /// The enclosing range's NTuple.
900 /// The enclosing range's descriptor id list.
901 const std::vector<ROOT::DescriptorId_t> &fFieldChildren;
902 std::size_t fIndex = 0;
903
904 public:
905 using iterator_category = std::forward_iterator_tag;
908 using difference_type = std::ptrdiff_t;
911
912 RIterator(const RNTupleDescriptor &ntuple, const std::vector<ROOT::DescriptorId_t> &fieldChildren,
913 std::size_t index)
915 {
916 }
918 {
919 ++fIndex;
920 return *this;
921 }
922 reference operator*() { return fNTuple.GetFieldDescriptor(fFieldChildren.at(fIndex)); }
923 bool operator!=(const iterator &rh) const { return fIndex != rh.fIndex; }
924 bool operator==(const iterator &rh) const { return fIndex == rh.fIndex; }
925 };
930 /// Sort the range using an arbitrary comparison function.
932 const std::function<bool(ROOT::DescriptorId_t, ROOT::DescriptorId_t)> &comparator)
933 : fNTuple(ntuple), fFieldChildren(field.GetLinkIds())
934 {
935 std::sort(fFieldChildren.begin(), fFieldChildren.end(), comparator);
936 }
939};
940
941// clang-format off
942/**
943\class ROOT::Experimental::RNTupleDescriptor::RClusterGroupDescriptorIterable
944\ingroup NTuple
945\brief Used to loop over all the cluster groups of an ntuple (in unspecified order)
946
947Enumerate all cluster group IDs from the cluster group descriptor. No specific order can be assumed, use
948FindNextClusterGroupId and FindPrevClusterGroupId to traverse clusters groups by entry number.
949*/
950// clang-format on
952private:
953 /// The associated NTuple for this range.
955
956public:
957 class RIterator {
958 private:
959 /// The enclosing range's NTuple.
961 std::size_t fIndex = 0;
962
963 public:
964 using iterator_category = std::forward_iterator_tag;
967 using difference_type = std::ptrdiff_t;
970
973 {
974 ++fIndex;
975 return *this;
976 }
978 {
979 auto it = fNTuple.fClusterGroupDescriptors.begin();
980 std::advance(it, fIndex);
981 return it->second;
982 }
983 bool operator!=(const iterator &rh) const { return fIndex != rh.fIndex; }
984 bool operator==(const iterator &rh) const { return fIndex == rh.fIndex; }
985 };
986
989 RIterator end() { return RIterator(fNTuple, fNTuple.GetNClusterGroups()); }
990};
991
992// clang-format off
993/**
994\class ROOT::Experimental::RNTupleDescriptor::RClusterDescriptorIterable
995\ingroup NTuple
996\brief Used to loop over all the clusters of an ntuple (in unspecified order)
997
998Enumerate all cluster IDs from the cluster descriptor. No specific order can be assumed, use
999FindNextClusterId and FindPrevClusterId to travers clusters by entry number.
1000*/
1001// clang-format on
1003private:
1004 /// The associated NTuple for this range.
1006
1007public:
1009 private:
1010 /// The enclosing range's NTuple.
1012 std::size_t fIndex = 0;
1013
1014 public:
1015 using iterator_category = std::forward_iterator_tag;
1018 using difference_type = std::ptrdiff_t;
1021
1024 {
1025 ++fIndex;
1026 return *this;
1027 }
1029 {
1030 auto it = fNTuple.fClusterDescriptors.begin();
1031 std::advance(it, fIndex);
1032 return it->second;
1033 }
1034 bool operator!=(const iterator &rh) const { return fIndex != rh.fIndex; }
1035 bool operator==(const iterator &rh) const { return fIndex == rh.fIndex; }
1036 };
1037
1040 RIterator end() { return RIterator(fNTuple, fNTuple.GetNActiveClusters()); }
1041};
1042
1043// clang-format off
1044/**
1045\class ROOT::Experimental::RNTupleDescriptor::RExtraTypeInfoDescriptorIterable
1046\ingroup NTuple
1047\brief Used to loop over all the extra type info record of an ntuple (in unspecified order)
1048*/
1049// clang-format on
1051private:
1052 /// The associated NTuple for this range.
1054
1055public:
1057 private:
1058 /// The enclosing range's NTuple.
1060 std::size_t fIndex = 0;
1061
1062 public:
1063 using iterator_category = std::forward_iterator_tag;
1066 using difference_type = std::ptrdiff_t;
1069
1072 {
1073 ++fIndex;
1074 return *this;
1075 }
1077 {
1078 auto it = fNTuple.fExtraTypeInfoDescriptors.begin();
1079 std::advance(it, fIndex);
1080 return *it;
1081 }
1082 bool operator!=(const iterator &rh) const { return fIndex != rh.fIndex; }
1083 bool operator==(const iterator &rh) const { return fIndex == rh.fIndex; }
1084 };
1085
1088 RIterator end() { return RIterator(fNTuple, fNTuple.GetNExtraTypeInfos()); }
1089};
1090
1091// clang-format off
1092/**
1093\class ROOT::Experimental::RNTupleDescriptor::RHeaderExtension
1094\ingroup NTuple
1095\brief Summarizes information about fields and the corresponding columns that were added after the header has been serialized
1096*/
1097// clang-format on
1100
1101private:
1102 /// All field IDs of late model extensions, in the order of field addition. This is necessary to serialize the
1103 /// the fields in that order.
1104 std::vector<ROOT::DescriptorId_t> fFieldIdsOrder;
1105 /// All field IDs of late model extensions for efficient lookup. When a column gets added to the extension
1106 /// header, this enables us to determine if the column belongs to a field of the header extension of if it
1107 /// belongs to a field of the regular header that gets extended by additional column representations.
1108 std::unordered_set<ROOT::DescriptorId_t> fFieldIdsLookup;
1109 /// All logical column IDs of columns that extend, with additional column representations, fields of the regular
1110 /// header. During serialization, these columns are not picked up as columns of `fFieldIdsOrder`. But instead
1111 /// these columns need to be serialized in the extension header without re-serializing the field.
1112 std::vector<ROOT::DescriptorId_t> fExtendedColumnRepresentations;
1113 /// Number of logical and physical columns; updated by the descriptor builder when columns are added
1114 std::uint32_t fNLogicalColumns = 0;
1115 std::uint32_t fNPhysicalColumns = 0;
1116
1117 /// Marks `fieldDesc` as an extended field, i.e. a field that appears in the Header Extension (e.g. having been added
1118 /// through late model extension). Note that the field descriptor should also have been added to the RNTuple
1119 /// Descriptor alongside non-extended fields.
1121 {
1122 fFieldIdsOrder.emplace_back(fieldDesc.GetId());
1123 fFieldIdsLookup.insert(fieldDesc.GetId());
1124 }
1125
1126 /// Marks `columnDesc` as an extended column, i.e. a column that appears in the Header Extension (e.g. having been
1127 /// added through late model extension as an additional representation of an existing column). Note that the column
1128 /// descriptor should also have been added to the RNTuple Descriptor alongside non-extended columns.
1130 {
1132 if (!columnDesc.IsAliasColumn())
1134 if (fFieldIdsLookup.count(columnDesc.GetFieldId()) == 0) {
1135 fExtendedColumnRepresentations.emplace_back(columnDesc.GetLogicalId());
1136 }
1137 }
1138
1139public:
1140 std::size_t GetNFields() const { return fFieldIdsOrder.size(); }
1141 std::size_t GetNLogicalColumns() const { return fNLogicalColumns; }
1142 std::size_t GetNPhysicalColumns() const { return fNPhysicalColumns; }
1143 const std::vector<ROOT::DescriptorId_t> &GetExtendedColumnRepresentations() const
1144 {
1146 }
1147 /// Return a vector containing the IDs of the top-level fields defined in the extension header, in the order
1148 /// of their addition.
1149 /// We cannot create this vector when building the fFields because at the time when AddExtendedField is called,
1150 /// the field is not yet linked into the schema tree.
1151 std::vector<ROOT::DescriptorId_t> GetTopLevelFields(const RNTupleDescriptor &desc) const;
1152
1154 {
1155 return fFieldIdsLookup.find(fieldId) != fFieldIdsLookup.end();
1156 }
1162};
1163
1164namespace Internal {
1165
1166// clang-format off
1167/**
1168\class ROOT::Experimental::Internal::RColumnDescriptorBuilder
1169\ingroup NTuple
1170\brief A helper class for piece-wise construction of an RColumnDescriptor
1171
1172Dangling column descriptors can become actual descriptors when added to an
1173RNTupleDescriptorBuilder instance and then linked to their fields.
1174*/
1175// clang-format on
1177private:
1179
1180public:
1181 /// Make an empty column descriptor builder.
1183
1195 {
1197 return *this;
1198 }
1200 {
1201 fColumn.fType = type;
1202 return *this;
1203 }
1210 {
1212 return *this;
1213 }
1231 RColumnDescriptorBuilder &ValueRange(double min, double max)
1232 {
1233 fColumn.fValueRange = {min, max};
1234 return *this;
1235 }
1236 RColumnDescriptorBuilder &ValueRange(std::optional<RColumnDescriptor::RValueRange> valueRange)
1237 {
1239 return *this;
1240 }
1243 /// Attempt to make a column descriptor. This may fail if the column
1244 /// was not given enough information to make a proper descriptor.
1246};
1247
1248// clang-format off
1249/**
1250\class ROOT::Experimental::Internal::RFieldDescriptorBuilder
1251\ingroup NTuple
1252\brief A helper class for piece-wise construction of an RFieldDescriptor
1253
1254Dangling field descriptors describe a single field in isolation. They are
1255missing the necessary relationship information (parent field, any child fields)
1256required to describe a real NTuple field.
1257
1258Dangling field descriptors can only become actual descriptors when added to an
1259RNTupleDescriptorBuilder instance and then linked to other fields.
1260*/
1261// clang-format on
1263private:
1265
1266public:
1267 /// Make an empty dangling field descriptor.
1269 /// Make a new RFieldDescriptorBuilder based off an existing descriptor.
1270 /// Relationship information is lost during the conversion to a
1271 /// dangling descriptor:
1272 /// * Parent id is reset to an invalid id.
1273 /// * Field children ids are forgotten.
1274 ///
1275 /// These properties must be set using RNTupleDescriptorBuilder::AddFieldLink().
1277
1278 /// Make a new RFieldDescriptorBuilder based off a live NTuple field.
1280
1287 {
1289 return *this;
1290 }
1292 {
1294 return *this;
1295 }
1297 {
1299 return *this;
1300 }
1307 {
1309 return *this;
1310 }
1312 {
1314 return *this;
1315 }
1316 RFieldDescriptorBuilder &TypeName(const std::string &typeName)
1317 {
1318 fField.fTypeName = typeName;
1319 return *this;
1320 }
1322 {
1324 return *this;
1325 }
1327 {
1329 return *this;
1330 }
1332 {
1333 fField.fStructure = structure;
1334 return *this;
1335 }
1336 RFieldDescriptorBuilder &TypeChecksum(const std::optional<std::uint32_t> typeChecksum)
1337 {
1339 return *this;
1340 }
1342 /// Attempt to make a field descriptor. This may fail if the dangling field
1343 /// was not given enough information to make a proper descriptor.
1345};
1346
1347// clang-format off
1348/**
1349\class ROOT::Experimental::Internal::RClusterDescriptorBuilder
1350\ingroup NTuple
1351\brief A helper class for piece-wise construction of an RClusterDescriptor
1352
1353The cluster descriptor builder starts from a summary-only cluster descriptor and allows for the
1354piecewise addition of page locations.
1355*/
1356// clang-format on
1358private:
1360
1361public:
1367
1373
1375 {
1377 return *this;
1378 }
1379
1382
1383 /// Books the given column ID as being suppressed in this cluster. The correct first element index and number of
1384 /// elements need to be set by CommitSuppressedColumnRanges() once all the calls to CommitColumnRange() and
1385 /// MarkSuppressedColumnRange() took place.
1387
1388 /// Sets the first element index and number of elements for all the suppressed column ranges.
1389 /// The information is taken from the corresponding columns from the primary representation.
1390 /// Needs to be called when all the columns (suppressed and regular) where added.
1392
1393 /// Add column and page ranges for columns created during late model extension missing in this cluster. The locator
1394 /// type for the synthesized page ranges is `kTypePageZero`. All the page sources must be able to populate the
1395 /// 'zero' page from such locator. Any call to `CommitColumnRange()` and `CommitSuppressedColumnRanges()`
1396 /// should happen before calling this function.
1398
1403
1404 /// Move out the full cluster descriptor including page locations
1406};
1407
1408// clang-format off
1409/**
1410\class ROOT::Experimental::Internal::RClusterGroupDescriptorBuilder
1411\ingroup NTuple
1412\brief A helper class for piece-wise construction of an RClusterGroupDescriptor
1413*/
1414// clang-format on
1416private:
1418
1419public:
1422
1439 {
1441 return *this;
1442 }
1444 {
1446 return *this;
1447 }
1449 {
1451 return *this;
1452 }
1453 void AddSortedClusters(const std::vector<ROOT::DescriptorId_t> &clusterIds)
1454 {
1455 if (clusterIds.size() != fClusterGroup.GetNClusters())
1456 throw RException(R__FAIL("mismatch of number of clusters"));
1458 }
1459
1461};
1462
1463// clang-format off
1464/**
1465\class ROOT::Experimental::Internal::RExtraTypeInfoDescriptorBuilder
1466\ingroup NTuple
1467\brief A helper class for piece-wise construction of an RExtraTypeInfoDescriptor
1468*/
1469// clang-format on
1471private:
1473
1474public:
1476
1483 {
1485 return *this;
1486 }
1487 RExtraTypeInfoDescriptorBuilder &TypeName(const std::string &typeName)
1488 {
1489 fExtraTypeInfo.fTypeName = typeName;
1490 return *this;
1491 }
1493 {
1495 return *this;
1496 }
1497
1499};
1500
1501// clang-format off
1502/**
1503\class ROOT::Experimental::Internal::RNTupleDescriptorBuilder
1504\ingroup NTuple
1505\brief A helper class for piece-wise construction of an RNTupleDescriptor
1506
1507Used by RPageStorage implementations in order to construct the RNTupleDescriptor from the various header parts.
1508*/
1509// clang-format on
1511private:
1514
1515public:
1516 /// Checks whether invariants hold:
1517 /// * NTuple name is valid
1518 /// * Fields have valid parents
1519 /// * Number of columns is constant across column representations
1523
1524 /// Copies the "schema" part of `descriptor` into the builder's descriptor.
1525 /// This resets the builder's descriptor.
1527
1528 void SetNTuple(const std::string_view name, const std::string_view description);
1529 void SetFeature(unsigned int flag);
1530
1533 /// The real footer size also include the page list envelopes
1535
1536 void AddField(const RFieldDescriptor &fieldDesc);
1539
1540 // The field that the column belongs to has to be already available. For fields with multiple columns,
1541 // the columns need to be added in order of the column index
1543
1546
1549
1550 /// Clears so-far stored clusters, fields, and columns and return to a pristine ntuple descriptor
1551 void Reset();
1552
1553 /// Mark the beginning of the header extension; any fields and columns added after a call to this function are
1554 /// annotated as begin part of the header extension.
1555 void BeginHeaderExtension();
1556
1557 /// If the descriptor is constructed in pieces consisting of physical and alias columns
1558 /// (regular and projected fields), the natural column order would be
1559 /// - Physical and alias columns of piece one
1560 /// - Physical and alias columns of piece two
1561 /// - etc.
1562 /// What we want, however, are first all physical column IDs and then all alias column IDs.
1563 /// This method adds `offset` to the logical column IDs of all alias columns and fixes up the corresponding
1564 /// column IDs in the projected field descriptors. In this way, a new piece of physical and alias columns can
1565 /// first shift the existing alias columns by the number of new physical columns, resulting in the following order
1566 /// - Physical columns of piece one
1567 /// - Physical columns of piece two
1568 /// - ...
1569 // - Logical columns of piece one
1570 /// - Logical columns of piece two
1571 /// - ...
1572 void ShiftAliasColumns(std::uint32_t offset);
1573
1574 /// Get the streamer info records for custom classes. Currently requires the corresponding dictionaries to be loaded.
1576};
1577
1579{
1580 return desc.CloneSchema();
1581}
1582
1583} // namespace Internal
1584} // namespace Experimental
1585} // namespace ROOT
1586
1587#endif // ROOT7_RNTupleDescriptor
#define R__FAIL(msg)
Short-hand to return an RResult<T> in an error state; the RError is implicitly converted into RResult...
Definition RError.hxx:299
size_t size(const MatrixT &matrix)
retrieve the size of a square matrix
ROOT::Detail::TRangeCast< T, true > TRangeDynCast
TRangeDynCast is an adapter class that allows the typed iteration through a TCollection.
#define R__ASSERT(e)
Checks condition e and reports a fatal error if it's false.
Definition TError.h:125
Option_t Option_t TPoint TPoint const char GetTextMagnitude GetFillStyle GetLineColor GetLineWidth GetMarkerStyle GetTextAlign GetTextColor GetTextSize void char Point_t Rectangle_t WindowAttributes_t Float_t Float_t Float_t Int_t Int_t UInt_t UInt_t Rectangle_t Int_t Int_t Window_t TString Int_t GCValues_t GetPrimarySelectionOwner GetDisplay GetScreen GetColormap GetNativeEvent const char const char dpyName wid window const char font_name cursor keysym reg const char only_if_exist regb h Point_t winding char text const char depth char const char Int_t count const char ColorStruct_t color const char Pixmap_t Pixmap_t PictureAttributes_t attr const char char ret_data h unsigned char height h offset
Option_t Option_t TPoint TPoint const char GetTextMagnitude GetFillStyle GetLineColor GetLineWidth GetMarkerStyle GetTextAlign GetTextColor GetTextSize void char Point_t Rectangle_t WindowAttributes_t index
Option_t Option_t TPoint TPoint const char GetTextMagnitude GetFillStyle GetLineColor GetLineWidth GetMarkerStyle GetTextAlign GetTextColor GetTextSize id
Option_t Option_t TPoint TPoint const char GetTextMagnitude GetFillStyle GetLineColor GetLineWidth GetMarkerStyle GetTextAlign GetTextColor GetTextSize void char Point_t Rectangle_t WindowAttributes_t Float_t Float_t Float_t Int_t Int_t UInt_t UInt_t Rectangle_t Int_t Int_t Window_t TString Int_t GCValues_t GetPrimarySelectionOwner GetDisplay GetScreen GetColormap GetNativeEvent const char const char dpyName wid window const char font_name cursor keysym reg const char only_if_exist regb h Point_t winding char text const char depth char const char Int_t count const char ColorStruct_t color const char Pixmap_t Pixmap_t PictureAttributes_t attr const char char ret_data h unsigned char height h Atom_t Int_t ULong_t ULong_t unsigned char prop_list Atom_t Atom_t Atom_t Time_t type
char name[80]
Definition TGX11.cxx:110
The available trivial, native content types of a column.
A helper class for piece-wise construction of an RClusterDescriptor.
RResult< void > CommitSuppressedColumnRanges(const RNTupleDescriptor &desc)
Sets the first element index and number of elements for all the suppressed column ranges.
RResult< RClusterDescriptor > MoveDescriptor()
Move out the full cluster descriptor including page locations.
RClusterDescriptorBuilder & NEntries(std::uint64_t nEntries)
RResult< void > MarkSuppressedColumnRange(ROOT::DescriptorId_t physicalId)
Books the given column ID as being suppressed in this cluster.
RClusterDescriptorBuilder & FirstEntryIndex(std::uint64_t firstEntryIndex)
RResult< void > CommitColumnRange(ROOT::DescriptorId_t physicalId, std::uint64_t firstElementIndex, std::uint32_t compressionSettings, const RClusterDescriptor::RPageRange &pageRange)
RClusterDescriptorBuilder & ClusterId(ROOT::DescriptorId_t clusterId)
RClusterDescriptorBuilder & AddExtendedColumnRanges(const RNTupleDescriptor &desc)
Add column and page ranges for columns created during late model extension missing in this cluster.
const RClusterDescriptor::RColumnRange & GetColumnRange(ROOT::DescriptorId_t physicalId)
A helper class for piece-wise construction of an RClusterGroupDescriptor.
void AddSortedClusters(const std::vector< ROOT::DescriptorId_t > &clusterIds)
RClusterGroupDescriptorBuilder & PageListLocator(const RNTupleLocator &pageListLocator)
RClusterGroupDescriptorBuilder & MinEntry(std::uint64_t minEntry)
RClusterGroupDescriptorBuilder & ClusterGroupId(ROOT::DescriptorId_t clusterGroupId)
RClusterGroupDescriptorBuilder & EntrySpan(std::uint64_t entrySpan)
RClusterGroupDescriptorBuilder & NClusters(std::uint32_t nClusters)
RClusterGroupDescriptorBuilder & PageListLength(std::uint64_t pageListLength)
static RClusterGroupDescriptorBuilder FromSummary(const RClusterGroupDescriptor &clusterGroupDesc)
A helper class for piece-wise construction of an RColumnDescriptor.
RColumnDescriptorBuilder & FieldId(ROOT::DescriptorId_t fieldId)
RColumnDescriptorBuilder & LogicalColumnId(ROOT::DescriptorId_t logicalColumnId)
RColumnDescriptorBuilder & BitsOnStorage(std::uint16_t bitsOnStorage)
RColumnDescriptorBuilder()=default
Make an empty column descriptor builder.
RColumnDescriptorBuilder & RepresentationIndex(std::uint16_t representationIndex)
RColumnDescriptorBuilder & Index(std::uint32_t index)
RColumnDescriptorBuilder & FirstElementIndex(std::uint64_t firstElementIdx)
RColumnDescriptorBuilder & Type(ROOT::ENTupleColumnType type)
RColumnDescriptorBuilder & ValueRange(std::optional< RColumnDescriptor::RValueRange > valueRange)
RResult< RColumnDescriptor > MakeDescriptor() const
Attempt to make a column descriptor.
RColumnDescriptorBuilder & PhysicalColumnId(ROOT::DescriptorId_t physicalColumnId)
RColumnDescriptorBuilder & ValueRange(double min, double max)
A helper class for piece-wise construction of an RExtraTypeInfoDescriptor.
RExtraTypeInfoDescriptorBuilder & Content(const std::string &content)
RExtraTypeInfoDescriptorBuilder & TypeVersion(std::uint32_t typeVersion)
RExtraTypeInfoDescriptorBuilder & TypeName(const std::string &typeName)
RExtraTypeInfoDescriptorBuilder & ContentId(EExtraTypeInfoIds contentId)
A helper class for piece-wise construction of an RFieldDescriptor.
RFieldDescriptorBuilder & ParentId(ROOT::DescriptorId_t id)
RFieldDescriptorBuilder & ProjectionSourceId(ROOT::DescriptorId_t id)
RFieldDescriptorBuilder & Structure(const ROOT::ENTupleStructure &structure)
RFieldDescriptorBuilder & TypeVersion(std::uint32_t typeVersion)
RFieldDescriptorBuilder & NRepetitions(std::uint64_t nRepetitions)
RFieldDescriptorBuilder & FieldVersion(std::uint32_t fieldVersion)
RFieldDescriptorBuilder & FieldId(ROOT::DescriptorId_t fieldId)
RFieldDescriptorBuilder & TypeName(const std::string &typeName)
static RFieldDescriptorBuilder FromField(const RFieldBase &field)
Make a new RFieldDescriptorBuilder based off a live NTuple field.
RFieldDescriptorBuilder & FieldName(const std::string &fieldName)
RResult< RFieldDescriptor > MakeDescriptor() const
Attempt to make a field descriptor.
RFieldDescriptorBuilder()=default
Make an empty dangling field descriptor.
RFieldDescriptorBuilder & TypeChecksum(const std::optional< std::uint32_t > typeChecksum)
RFieldDescriptorBuilder & TypeAlias(const std::string &typeAlias)
RFieldDescriptorBuilder & FieldDescription(const std::string &fieldDescription)
A helper class for piece-wise construction of an RNTupleDescriptor.
RNTupleSerializer::StreamerInfoMap_t BuildStreamerInfos() const
Get the streamer info records for custom classes. Currently requires the corresponding dictionaries t...
RResult< void > AddExtraTypeInfo(RExtraTypeInfoDescriptor &&extraTypeInfoDesc)
RResult< void > AddColumn(RColumnDescriptor &&columnDesc)
void BeginHeaderExtension()
Mark the beginning of the header extension; any fields and columns added after a call to this functio...
void ShiftAliasColumns(std::uint32_t offset)
If the descriptor is constructed in pieces consisting of physical and alias columns (regular and proj...
RResult< void > AddClusterGroup(RClusterGroupDescriptor &&clusterGroup)
void SetSchemaFromExisting(const RNTupleDescriptor &descriptor)
Copies the "schema" part of descriptor into the builder's descriptor.
RResult< void > AddFieldProjection(ROOT::DescriptorId_t sourceId, ROOT::DescriptorId_t targetId)
void ReplaceExtraTypeInfo(RExtraTypeInfoDescriptor &&extraTypeInfoDesc)
void AddToOnDiskFooterSize(std::uint64_t size)
The real footer size also include the page list envelopes.
void SetNTuple(const std::string_view name, const std::string_view description)
RResult< void > AddFieldLink(ROOT::DescriptorId_t fieldId, ROOT::DescriptorId_t linkId)
RResult< void > AddCluster(RClusterDescriptor &&clusterDesc)
RResult< void > EnsureFieldExists(ROOT::DescriptorId_t fieldId) const
RResult< void > EnsureValidDescriptor() const
Checks whether invariants hold:
void Reset()
Clears so-far stored clusters, fields, and columns and return to a pristine ntuple descriptor.
std::map< Int_t, TVirtualStreamerInfo * > StreamerInfoMap_t
std::unordered_map< ROOT::DescriptorId_t, RColumnRange >::const_iterator Iter_t
The window of element indexes of a particular column in a particular cluster.
std::optional< std::uint32_t > fCompressionSettings
The usual format for ROOT compression settings (see Compression.h).
ROOT::NTupleSize_t fNElements
The number of column elements in the cluster.
bool fIsSuppressed
Suppressed columns have an empty page range and unknown compression settings.
ROOT::NTupleSize_t fFirstElementIndex
The global index of the first column element in the cluster.
void SetCompressionSettings(std::optional< std::uint32_t > comp)
RColumnRange(ROOT::DescriptorId_t physicalColumnId, ROOT::NTupleSize_t firstElementIndex, ROOT::NTupleSize_t nElements, std::optional< std::uint32_t > compressionSettings, bool suppressed=false)
std::optional< std::uint32_t > GetCompressionSettings() const
Records the partition of data into pages for a particular column in a particular cluster.
RPageRange & operator=(RPageRange &&other)=default
std::vector< ROOT::NTupleSize_t > fCumulativeNElements
Has the same length than fPageInfos and stores the sum of the number of elements of all the pages up ...
std::size_t ExtendToFitColumnRange(const RColumnRange &columnRange, const ROOT::Internal::RColumnElementBase &element, std::size_t pageSize)
Extend this RPageRange to fit the given RColumnRange, i.e.
RPageInfoExtended Find(ROOT::NTupleSize_t idxInCluster) const
Find the page in the RPageRange that contains the given element. The element must exist.
RPageRange(const RPageRange &other)=delete
const std::vector< RPageInfo > & GetPageInfos() const
RPageRange & operator=(const RPageRange &other)=delete
Meta-data for a set of ntuple clusters.
RClusterDescriptor & operator=(RClusterDescriptor &&other)=default
RClusterDescriptor(RClusterDescriptor &&other)=default
RColumnRangeIterable GetColumnRangeIterable() const
Returns an iterator over pairs { columnId, columnRange }. The iteration order is unspecified.
ROOT::NTupleSize_t GetFirstEntryIndex() const
std::unordered_map< ROOT::DescriptorId_t, RPageRange > fPageRanges
RClusterDescriptor(const RClusterDescriptor &other)=delete
const RColumnRange & GetColumnRange(ROOT::DescriptorId_t physicalId) const
RClusterDescriptor & operator=(const RClusterDescriptor &other)=delete
const RPageRange & GetPageRange(ROOT::DescriptorId_t physicalId) const
bool ContainsColumn(ROOT::DescriptorId_t physicalId) const
bool operator==(const RClusterDescriptor &other) const
std::unordered_map< ROOT::DescriptorId_t, RColumnRange > fColumnRanges
ROOT::NTupleSize_t fFirstEntryIndex
Clusters can be swapped by adjusting the entry offsets.
Clusters are bundled in cluster groups.
std::uint64_t fMinEntry
The minimum first entry number of the clusters in the cluster group.
std::vector< ROOT::DescriptorId_t > fClusterIds
The cluster IDs can be empty if the corresponding page list is not loaded.
RClusterGroupDescriptor(const RClusterGroupDescriptor &other)=delete
RClusterGroupDescriptor & operator=(RClusterGroupDescriptor &&other)=default
std::uint64_t fEntrySpan
Number of entries that are (partially for sharded clusters) covered by this cluster group.
std::uint64_t fPageListLength
Uncompressed size of the page list.
const std::vector< ROOT::DescriptorId_t > & GetClusterIds() const
RClusterGroupDescriptor CloneSummary() const
RClusterGroupDescriptor & operator=(const RClusterGroupDescriptor &other)=delete
std::uint32_t fNClusters
Number of clusters is always known even if the cluster IDs are not (yet) populated.
RNTupleLocator fPageListLocator
The page list that corresponds to the cluster group.
bool HasClusterDetails() const
A cluster group is loaded in two stages.
bool operator==(const RClusterGroupDescriptor &other) const
RClusterGroupDescriptor(RClusterGroupDescriptor &&other)=default
Meta-data stored for every column of an ntuple.
std::optional< RValueRange > fValueRange
Optional value range (used e.g. by quantized real fields)
std::uint16_t fBitsOnStorage
The size in bits of elements of this column.
ROOT::ENTupleColumnType GetType() const
RColumnDescriptor & operator=(RColumnDescriptor &&other)=default
RColumnDescriptor(const RColumnDescriptor &other)=delete
RColumnDescriptor Clone() const
Get a copy of the descriptor.
ROOT::DescriptorId_t fLogicalColumnId
The actual column identifier, which is the link to the corresponding field.
RColumnDescriptor(RColumnDescriptor &&other)=default
RColumnDescriptor & operator=(const RColumnDescriptor &other)=delete
ROOT::DescriptorId_t GetLogicalId() const
ROOT::ENTupleColumnType fType
The on-disk column type.
std::optional< RValueRange > GetValueRange() const
ROOT::DescriptorId_t fPhysicalColumnId
Usually identical to the logical column ID, except for alias columns where it references the shadowed...
std::int64_t fFirstElementIndex
The absolute value specifies the index for the first stored element for this column.
ROOT::DescriptorId_t fFieldId
Every column belongs to one and only one field.
std::uint16_t fRepresentationIndex
A field may use multiple column representations, which are numbered from zero to $m$.
ROOT::DescriptorId_t GetPhysicalId() const
std::uint32_t fIndex
A field can be serialized into several columns, which are numbered from zero to $n$.
bool operator==(const RColumnDescriptor &other) const
ROOT::DescriptorId_t GetFieldId() const
Field specific extra type information from the header / extenstion header.
bool operator==(const RExtraTypeInfoDescriptor &other) const
RExtraTypeInfoDescriptor & operator=(RExtraTypeInfoDescriptor &&other)=default
std::uint32_t fTypeVersion
Type version the extra type information is bound to.
EExtraTypeInfoIds fContentId
Specifies the meaning of the extra information.
std::string fTypeName
The type name the extra information refers to; empty for RNTuple-wide extra information.
RExtraTypeInfoDescriptor & operator=(const RExtraTypeInfoDescriptor &other)=delete
RExtraTypeInfoDescriptor(RExtraTypeInfoDescriptor &&other)=default
RExtraTypeInfoDescriptor(const RExtraTypeInfoDescriptor &other)=delete
std::string fContent
The content format depends on the content ID and may be binary.
A field translates read and write calls from/to underlying columns to/from tree values.
Meta-data stored for every field of an ntuple.
RFieldDescriptor & operator=(RFieldDescriptor &&other)=default
ROOT::DescriptorId_t GetProjectionSourceId() const
const std::string & GetFieldName() const
const std::string & GetTypeName() const
const std::string & GetFieldDescription() const
ROOT::ENTupleStructure GetStructure() const
std::uint32_t fTypeVersion
The version of the C++ type itself.
std::uint32_t fColumnCardinality
The number of columns in the column representations of the field.
std::optional< std::uint32_t > fTypeChecksum
For custom classes, we store the ROOT TClass reported checksum to facilitate the use of I/O rules tha...
ROOT::DescriptorId_t fParentId
Establishes sub field relationships, such as classes and collections.
bool IsCustomClass() const
Tells if the field describes a user-defined class rather than a fundamental type, a collection,...
std::string fFieldDescription
Free text set by the user.
const std::string & GetTypeAlias() const
std::string fFieldName
The leaf name, not including parent fields.
std::uint32_t fFieldVersion
The version of the C++-type-to-column translation mechanics.
ROOT::DescriptorId_t fProjectionSourceId
For projected fields, the source field ID.
std::unique_ptr< RFieldBase > CreateField(const RNTupleDescriptor &ntplDesc, const ROOT::RCreateFieldOptions &options={}) const
In general, we create a field simply from the C++ type name.
RFieldDescriptor(const RFieldDescriptor &other)=delete
ROOT::DescriptorId_t GetParentId() const
std::vector< ROOT::DescriptorId_t > fLogicalColumnIds
The ordered list of columns attached to this field: first by representation index then by column inde...
RFieldDescriptor Clone() const
Get a copy of the descriptor.
bool operator==(const RFieldDescriptor &other) const
std::vector< ROOT::DescriptorId_t > fLinkIds
The pointers in the other direction from parent to children.
std::string fTypeAlias
A typedef or using directive that resolved to the type name during field creation.
ROOT::ENTupleStructure fStructure
The structural information carried by this field in the data model tree.
const std::vector< ROOT::DescriptorId_t > & GetLinkIds() const
const std::vector< ROOT::DescriptorId_t > & GetLogicalColumnIds() const
RFieldDescriptor & operator=(const RFieldDescriptor &other)=delete
RFieldDescriptor(RFieldDescriptor &&other)=default
std::optional< std::uint32_t > GetTypeChecksum() const
std::string fTypeName
The C++ type that was used when writing the field.
std::uint64_t fNRepetitions
The number of elements per entry for fixed-size arrays.
Used to loop over all the clusters of an ntuple (in unspecified order)
const RNTupleDescriptor & fNTuple
The associated NTuple for this range.
Used to loop over all the cluster groups of an ntuple (in unspecified order)
const RNTupleDescriptor & fNTuple
The associated NTuple for this range.
const std::vector< ROOT::DescriptorId_t > & fColumns
The enclosing range's descriptor id list.
RIterator(const RNTupleDescriptor &ntuple, const std::vector< ROOT::DescriptorId_t > &columns, std::size_t index)
const RNTupleDescriptor & fNTuple
The enclosing range's NTuple.
std::vector< ROOT::DescriptorId_t > fColumns
The descriptor ids of the columns ordered by field, representation, and column index.
const RNTupleDescriptor & fNTuple
The associated NTuple for this range.
RColumnDescriptorIterable(const RNTupleDescriptor &ntuple, const RFieldDescriptor &fieldDesc)
Used to loop over all the extra type info record of an ntuple (in unspecified order)
const RNTupleDescriptor & fNTuple
The associated NTuple for this range.
const RNTupleDescriptor & fNTuple
The enclosing range's NTuple.
const std::vector< ROOT::DescriptorId_t > & fFieldChildren
The enclosing range's descriptor id list.
RIterator(const RNTupleDescriptor &ntuple, const std::vector< ROOT::DescriptorId_t > &fieldChildren, std::size_t index)
const RNTupleDescriptor & fNTuple
The associated NTuple for this range.
RFieldDescriptorIterable(const RNTupleDescriptor &ntuple, const RFieldDescriptor &field, const std::function< bool(ROOT::DescriptorId_t, ROOT::DescriptorId_t)> &comparator)
Sort the range using an arbitrary comparison function.
RFieldDescriptorIterable(const RNTupleDescriptor &ntuple, const RFieldDescriptor &field)
std::vector< ROOT::DescriptorId_t > fFieldChildren
The descriptor ids of the child fields.
Summarizes information about fields and the corresponding columns that were added after the header ha...
bool ContainsField(ROOT::DescriptorId_t fieldId) const
void MarkExtendedField(const RFieldDescriptor &fieldDesc)
Marks fieldDesc as an extended field, i.e.
std::vector< ROOT::DescriptorId_t > fExtendedColumnRepresentations
All logical column IDs of columns that extend, with additional column representations,...
std::uint32_t fNLogicalColumns
Number of logical and physical columns; updated by the descriptor builder when columns are added.
void MarkExtendedColumn(const RColumnDescriptor &columnDesc)
Marks columnDesc as an extended column, i.e.
std::vector< ROOT::DescriptorId_t > fFieldIdsOrder
All field IDs of late model extensions, in the order of field addition.
bool ContainsExtendedColumnRepresentation(ROOT::DescriptorId_t columnId) const
std::unordered_set< ROOT::DescriptorId_t > fFieldIdsLookup
All field IDs of late model extensions for efficient lookup.
const std::vector< ROOT::DescriptorId_t > & GetExtendedColumnRepresentations() const
The on-storage meta-data of an ntuple.
std::uint64_t fNPhysicalColumns
Updated by the descriptor builder when columns are added.
std::uint64_t fGeneration
Once constructed by an RNTupleDescriptorBuilder, the descriptor is mostly immutable except for set of...
ROOT::DescriptorId_t FindPhysicalColumnId(ROOT::DescriptorId_t fieldId, std::uint32_t columnIndex, std::uint16_t representationIndex) const
std::uint64_t fOnDiskFooterSize
Like fOnDiskHeaderSize, contains both cluster summaries and page locations.
std::uint64_t fNEntries
Updated by the descriptor builder when the cluster groups are added.
const RClusterGroupDescriptor & GetClusterGroupDescriptor(ROOT::DescriptorId_t clusterGroupId) const
ROOT::NTupleSize_t GetNElements(ROOT::DescriptorId_t physicalColumnId) const
std::vector< RExtraTypeInfoDescriptor > fExtraTypeInfoDescriptors
RResult< void > DropClusterGroupDetails(ROOT::DescriptorId_t clusterGroupId)
RResult< void > AddClusterGroupDetails(ROOT::DescriptorId_t clusterGroupId, std::vector< RClusterDescriptor > &clusterDescs)
Methods to load and drop cluster group details (cluster IDs and page locations)
std::unique_ptr< RNTupleModel > CreateModel(const RCreateModelOptions &options=RCreateModelOptions()) const
Re-create the C++ model from the stored meta-data.
std::vector< ROOT::DescriptorId_t > fSortedClusterGroupIds
References cluster groups sorted by entry range and thus allows for binary search.
ROOT::DescriptorId_t GetFieldZeroId() const
Returns the logical parent of all top-level NTuple data fields.
RColumnDescriptorIterable GetColumnIterable() const
const RColumnDescriptor & GetColumnDescriptor(ROOT::DescriptorId_t columnId) const
ROOT::DescriptorId_t FindPrevClusterId(ROOT::DescriptorId_t clusterId) const
const RFieldDescriptor & GetFieldDescriptor(ROOT::DescriptorId_t fieldId) const
RNTupleDescriptor(RNTupleDescriptor &&other)=default
std::uint64_t fNClusters
Updated by the descriptor builder when the cluster groups are added.
std::string fName
The ntuple name needs to be unique in a given storage location (file)
ROOT::DescriptorId_t FindLogicalColumnId(ROOT::DescriptorId_t fieldId, std::uint32_t columnIndex, std::uint16_t representationIndex) const
std::unordered_map< ROOT::DescriptorId_t, RFieldDescriptor > fFieldDescriptors
RNTupleDescriptor(const RNTupleDescriptor &other)=delete
static constexpr unsigned int kFeatureFlagTest
RNTupleDescriptor & operator=(RNTupleDescriptor &&other)=default
std::uint64_t fOnDiskHeaderXxHash3
Set by the descriptor builder when deserialized.
std::unordered_map< ROOT::DescriptorId_t, RColumnDescriptor > fColumnDescriptors
bool operator==(const RNTupleDescriptor &other) const
ROOT::DescriptorId_t FindFieldId(std::string_view fieldName, ROOT::DescriptorId_t parentId) const
ROOT::DescriptorId_t FindNextClusterId(ROOT::DescriptorId_t clusterId) const
ROOT::DescriptorId_t fFieldZeroId
Set by the descriptor builder.
ROOT::NTupleSize_t GetNEntries() const
We know the number of entries from adding the cluster summaries.
RExtraTypeInfoDescriptorIterable GetExtraTypeInfoIterable() const
std::unordered_map< ROOT::DescriptorId_t, RClusterDescriptor > fClusterDescriptors
May contain only a subset of all the available clusters, e.g.
std::unique_ptr< RHeaderExtension > fHeaderExtension
RClusterGroupDescriptorIterable GetClusterGroupIterable() const
bool HasFeature(unsigned int flag) const
RNTupleDescriptor & operator=(const RNTupleDescriptor &other)=delete
std::unordered_map< ROOT::DescriptorId_t, RClusterGroupDescriptor > fClusterGroupDescriptors
const std::string & GetDescription() const
RNTupleDescriptor CloneSchema() const
Creates a descriptor containing only the schema information about this RNTuple, i....
RClusterDescriptorIterable GetClusterIterable() const
std::string fDescription
Free text from the user.
RFieldDescriptorIterable GetTopLevelFields() const
const RClusterDescriptor & GetClusterDescriptor(ROOT::DescriptorId_t clusterId) const
ROOT::DescriptorId_t FindClusterId(ROOT::NTupleSize_t entryIdx) const
std::string GetQualifiedFieldName(ROOT::DescriptorId_t fieldId) const
Walks up the parents of the field ID and returns a field name of the form a.b.c.d In case of invalid ...
const RHeaderExtension * GetHeaderExtension() const
Return header extension information; if the descriptor does not have a header extension,...
std::uint64_t fOnDiskHeaderSize
Set by the descriptor builder when deserialized.
const RFieldDescriptor & GetFieldZero() const
void PrintInfo(std::ostream &output) const
RFieldDescriptorIterable GetFieldIterable(const RFieldDescriptor &fieldDesc) const
std::vector< std::uint64_t > GetFeatureFlags() const
A column element encapsulates the translation between basic C++ types and their column representation...
Base class for all ROOT issued exceptions.
Definition RError.hxx:79
Generic information about the physical location of data.
const_iterator begin() const
const Int_t n
Definition legend1.C:16
RNTupleDescriptor CloneDescriptorSchema(const RNTupleDescriptor &desc)
EExtraTypeInfoIds
Used in RExtraTypeInfoDescriptor.
tbb::task_arena is an alias of tbb::interface7::task_arena, which doesn't allow to forward declare tb...
std::uint64_t DescriptorId_t
Distriniguishes elements of the same type within a descriptor, e.g. different fields.
constexpr NTupleSize_t kInvalidNTupleIndex
std::uint64_t NTupleSize_t
Integer type long enough to hold the maximum number of entries in a column.
constexpr DescriptorId_t kInvalidDescriptorId
ENTupleStructure
The fields in the ntuple model tree can carry different structural information about the type system.
ROOT::NTupleSize_t fPageNumber
Page number in the corresponding RPageRange.
ROOT::NTupleSize_t fFirstElementIndex
Index (in cluster) of the first element in page.
RPageInfoExtended(const RPageInfo &pageInfo, ROOT::NTupleSize_t firstElementIndex, ROOT::NTupleSize_t pageNumber)
We do not need to store the element size / uncompressed page size because we know to which column the...
std::uint32_t fNElements
The sum of the elements of all the pages must match the corresponding fNElements field in fColumnRang...
bool fHasChecksum
If true, the 8 bytes following the serialized page are an xxhash of the on-disk page data.
RPageInfo(std::uint32_t nElements, const RNTupleLocator &locator, bool hasChecksum)
RNTupleLocator fLocator
The meaning of fLocator depends on the storage backend.
bool fCreateBare
If true, the model will be created without a default entry (bare model).
bool fReconstructProjections
If set to true, projected fields will be reconstructed as such.
bool fEmulateUnknownTypes
If true, fields with a user defined type that have no available dictionaries will be reconstructed as...
bool fForwardCompatible
Normally creating a model will fail if any of the reconstructed fields contains an unknown column typ...
static void output()