Logo ROOT  
Reference Guide
 
Loading...
Searching...
No Matches
RNTupleDescriptor.hxx
Go to the documentation of this file.
1/// \file ROOT/RNTupleDescriptor.hxx
2/// \ingroup NTuple ROOT7
3/// \author Jakob Blomer <jblomer@cern.ch>
4/// \author Javier Lopez-Gomez <javier.lopez.gomez@cern.ch>
5/// \date 2018-07-19
6/// \warning This is part of the ROOT 7 prototype! It will change without notice. It might trigger earthquakes. Feedback
7/// is welcome!
8
9/*************************************************************************
10 * Copyright (C) 1995-2019, Rene Brun and Fons Rademakers. *
11 * All rights reserved. *
12 * *
13 * For the licensing terms see $ROOTSYS/LICENSE. *
14 * For the list of contributors see $ROOTSYS/README/CREDITS. *
15 *************************************************************************/
16
17#ifndef ROOT7_RNTupleDescriptor
18#define ROOT7_RNTupleDescriptor
19
20#include <ROOT/RError.hxx>
22#include <ROOT/RNTupleUtil.hxx>
23#include <ROOT/RSpan.hxx>
24
25#include <TError.h>
26
27#include <algorithm>
28#include <chrono>
29#include <cmath>
30#include <functional>
31#include <iterator>
32#include <map>
33#include <memory>
34#include <optional>
35#include <ostream>
36#include <vector>
37#include <set>
38#include <string>
39#include <string_view>
40#include <unordered_map>
41#include <unordered_set>
42
43namespace ROOT {
44namespace Experimental {
45
46class RFieldBase;
47class RNTupleDescriptor;
48class RNTupleModel;
49
50namespace Internal {
51class RColumnElementBase;
52} // namespace Internal
53
54namespace Internal {
55class RColumnDescriptorBuilder;
56class RClusterDescriptorBuilder;
57class RClusterGroupDescriptorBuilder;
58class RExtraTypeInfoDescriptorBuilder;
59class RFieldDescriptorBuilder;
60class RNTupleDescriptorBuilder;
61} // namespace Internal
62
63// clang-format off
64/**
65\class ROOT::Experimental::RFieldDescriptor
66\ingroup NTuple
67\brief Meta-data stored for every field of an ntuple
68*/
69// clang-format on
73
74private:
76 /// The version of the C++-type-to-column translation mechanics
77 std::uint32_t fFieldVersion = 0;
78 /// The version of the C++ type itself
79 std::uint32_t fTypeVersion = 0;
80 /// The leaf name, not including parent fields
81 std::string fFieldName;
82 /// Free text set by the user
83 std::string fFieldDescription;
84 /// The C++ type that was used when writing the field
85 std::string fTypeName;
86 /// A typedef or using directive that resolved to the type name during field creation
87 std::string fTypeAlias;
88 /// The number of elements per entry for fixed-size arrays
89 std::uint64_t fNRepetitions = 0;
90 /// The structural information carried by this field in the data model tree
92 /// Establishes sub field relationships, such as classes and collections
94 /// For projected fields, the source field ID
96 /// The pointers in the other direction from parent to children. They are serialized, too, to keep the
97 /// order of sub fields.
98 std::vector<DescriptorId_t> fLinkIds;
99 /// The number of columns in the column representations of the field. The column cardinality helps to navigate the
100 /// list of logical column ids. For example, the second column of the third column representation is
101 /// fLogicalColumnIds[2 * fColumnCardinality + 1]
102 std::uint32_t fColumnCardinality = 0;
103 /// The ordered list of columns attached to this field: first by representation index then by column index.
104 std::vector<DescriptorId_t> fLogicalColumnIds;
105 /// For custom classes, we store the ROOT TClass reported checksum to facilitate the use of I/O rules that
106 /// identify types by their checksum
107 std::optional<std::uint32_t> fTypeChecksum;
108
109public:
110 RFieldDescriptor() = default;
111 RFieldDescriptor(const RFieldDescriptor &other) = delete;
115
116 bool operator==(const RFieldDescriptor &other) const;
117 /// Get a copy of the descriptor
118 RFieldDescriptor Clone() const;
119 /// In general, we create a field simply from the C++ type name. For untyped fields, however, we potentially need
120 /// access to sub fields, which is provided by the ntuple descriptor argument.
121 std::unique_ptr<RFieldBase> CreateField(const RNTupleDescriptor &ntplDesc, bool continueOnError = false) const;
122
123 DescriptorId_t GetId() const { return fFieldId; }
124 std::uint32_t GetFieldVersion() const { return fFieldVersion; }
125 std::uint32_t GetTypeVersion() const { return fTypeVersion; }
126 const std::string &GetFieldName() const { return fFieldName; }
127 const std::string &GetFieldDescription() const { return fFieldDescription; }
128 const std::string &GetTypeName() const { return fTypeName; }
129 const std::string &GetTypeAlias() const { return fTypeAlias; }
130 std::uint64_t GetNRepetitions() const { return fNRepetitions; }
134 const std::vector<DescriptorId_t> &GetLinkIds() const { return fLinkIds; }
135 const std::vector<DescriptorId_t> &GetLogicalColumnIds() const { return fLogicalColumnIds; }
136 std::uint32_t GetColumnCardinality() const { return fColumnCardinality; }
137 std::optional<std::uint32_t> GetTypeChecksum() const { return fTypeChecksum; }
139 /// Tells if the field describes a user-defined class rather than a fundamental type, a collection, or one of the
140 /// natively supported stdlib classes.
141 /// The dictionary does not need to be available for this method.
142 bool IsCustomClass() const;
143};
144
145// clang-format off
146/**
147\class ROOT::Experimental::RColumnDescriptor
148\ingroup NTuple
149\brief Meta-data stored for every column of an ntuple
150*/
151// clang-format on
155
156public:
157 struct RValueRange {
158 double fMin = 0, fMax = 0;
159
160 RValueRange() = default;
161 RValueRange(double min, double max) : fMin(min), fMax(max) {}
162 RValueRange(std::pair<double, double> range) : fMin(range.first), fMax(range.second) {}
163
164 bool operator==(RValueRange other) const { return fMin == other.fMin && fMax == other.fMax; }
165 bool operator!=(RValueRange other) const { return !(*this == other); }
166 };
167
168private:
169 /// The actual column identifier, which is the link to the corresponding field
171 /// Usually identical to the logical column ID, except for alias columns where it references the shadowed column
173 /// Every column belongs to one and only one field
175 /// The absolute value specifies the index for the first stored element for this column.
176 /// For deferred columns the absolute value is larger than zero.
177 /// Negative values specify a suppressed and deferred column.
178 std::int64_t fFirstElementIndex = 0U;
179 /// A field can be serialized into several columns, which are numbered from zero to $n$
180 std::uint32_t fIndex = 0;
181 /// A field may use multiple column representations, which are numbered from zero to $m$.
182 /// Every representation has the same number of columns.
183 std::uint16_t fRepresentationIndex = 0;
184 /// The size in bits of elements of this column. Most columns have the size fixed by their type
185 /// but low-precision float columns have variable bit widths.
186 std::uint16_t fBitsOnStorage = 0;
187 /// The on-disk column type
189 /// Optional value range (used e.g. by quantized real fields)
190 std::optional<RValueRange> fValueRange;
191
192public:
193 RColumnDescriptor() = default;
194 RColumnDescriptor(const RColumnDescriptor &other) = delete;
198
199 bool operator==(const RColumnDescriptor &other) const;
200 /// Get a copy of the descriptor
201 RColumnDescriptor Clone() const;
202
206 std::uint32_t GetIndex() const { return fIndex; }
207 std::uint16_t GetRepresentationIndex() const { return fRepresentationIndex; }
208 std::uint64_t GetFirstElementIndex() const { return std::abs(fFirstElementIndex); }
209 std::uint16_t GetBitsOnStorage() const { return fBitsOnStorage; }
210 EColumnType GetType() const { return fType; }
211 std::optional<RValueRange> GetValueRange() const { return fValueRange; }
213 bool IsDeferredColumn() const { return fFirstElementIndex != 0; }
215};
216
217// clang-format off
218/**
219\class ROOT::Experimental::RClusterDescriptor
220\ingroup NTuple
221\brief Meta-data for a set of ntuple clusters
222
223The cluster descriptor is built in two phases. In a first phase, the descriptor has only an ID.
224In a second phase, the event range, column group, page locations and column ranges are added.
225Both phases are populated by the RClusterDescriptorBuilder.
226Clusters usually span across all available columns but in some cases they can describe only a subset of the columns,
227for instance when describing friend ntuples.
228*/
229// clang-format on
232
233public:
234 /// The window of element indexes of a particular column in a particular cluster
237 /// The global index of the first column element in the cluster
239 /// The number of column elements in the cluster
241 /// The usual format for ROOT compression settings (see Compression.h).
242 /// The pages of a particular column in a particular cluster are all compressed with the same settings.
244 /// Suppressed columns have an empty page range and unknown compression settings.
245 /// Their element index range, however, is aligned with the corresponding column of the
246 /// primary column representation (see Section "Suppressed Columns" in the specification)
247 bool fIsSuppressed = false;
248
249 // TODO(jblomer): we perhaps want to store summary information, such as average, min/max, etc.
250 // Should this be done on the field level?
251
252 bool operator==(const RColumnRange &other) const
253 {
257 }
258
260 {
262 }
263 };
264
265 // clang-format off
266 /**
267 \class ROOT::Experimental::RClusterDescriptor::RPageRange
268 \ingroup NTuple
269 \brief Records the partition of data into pages for a particular column in a particular cluster
270 */
271 // clang-format on
274 /// Extend this RPageRange to fit the given RColumnRange, i.e. prepend as many synthetic RPageInfos as needed to
275 /// cover the range in `columnRange`. `RPageInfo`s are constructed to contain as many elements of type `element`
276 /// given a page size limit of `pageSize` (in bytes); the locator for the referenced pages is `kTypePageZero`.
277 /// This function is used to make up `RPageRange`s for clusters that contain deferred columns.
278 /// \return The number of column elements covered by the synthesized RPageInfos
279 std::size_t ExtendToFitColumnRange(const RColumnRange &columnRange, const Internal::RColumnElementBase &element,
280 std::size_t pageSize);
281
282 public:
283 /// We do not need to store the element size / uncompressed page size because we know to which column
284 /// the page belongs
285 struct RPageInfo {
286 /// The sum of the elements of all the pages must match the corresponding fNElements field in fColumnRanges
287 std::uint32_t fNElements = std::uint32_t(-1);
288 /// The meaning of fLocator depends on the storage backend.
290 /// If true, the 8 bytes following the serialized page are an xxhash of the on-disk page data
291 bool fHasChecksum = false;
292
293 bool operator==(const RPageInfo &other) const
294 {
295 return fNElements == other.fNElements && fLocator == other.fLocator;
296 }
297 };
299 /// Index (in cluster) of the first element in page.
301 /// Page number in the corresponding RPageRange.
303
304 RPageInfoExtended() = default;
306 : RPageInfo(pi), fFirstInPage(i), fPageNo(n)
307 {
308 }
309 };
310
311 RPageRange() = default;
312 RPageRange(const RPageRange &other) = delete;
313 RPageRange &operator=(const RPageRange &other) = delete;
314 RPageRange(RPageRange &&other) = default;
315 RPageRange &operator=(RPageRange &&other) = default;
316
318 {
319 RPageRange clone;
321 clone.fPageInfos = fPageInfos;
322 return clone;
323 }
324
325 /// Find the page in the RPageRange that contains the given element. The element must exist.
326 RPageInfoExtended Find(ClusterSize_t::ValueType idxInCluster) const;
327
329 std::vector<RPageInfo> fPageInfos;
330
331 bool operator==(const RPageRange &other) const
332 {
333 return fPhysicalColumnId == other.fPhysicalColumnId && fPageInfos == other.fPageInfos;
334 }
335 };
336
337private:
339 /// Clusters can be swapped by adjusting the entry offsets
341 // TODO(jblomer): change to std::uint64_t
343
344 std::unordered_map<DescriptorId_t, RColumnRange> fColumnRanges;
345 std::unordered_map<DescriptorId_t, RPageRange> fPageRanges;
346
347public:
349
355
357
358 bool operator==(const RClusterDescriptor &other) const;
359
360 DescriptorId_t GetId() const { return fClusterId; }
363 const RColumnRange &GetColumnRange(DescriptorId_t physicalId) const { return fColumnRanges.at(physicalId); }
364 const RPageRange &GetPageRange(DescriptorId_t physicalId) const { return fPageRanges.at(physicalId); }
365 /// Returns an iterator over pairs { columnId, columnRange }. The iteration order is unspecified.
366 RColumnRangeIterable GetColumnRangeIterable() const;
367 bool ContainsColumn(DescriptorId_t physicalId) const
368 {
369 return fColumnRanges.find(physicalId) != fColumnRanges.end();
370 }
371 std::uint64_t GetBytesOnStorage() const;
372};
373
375private:
377
378public:
379 class RIterator {
380 private:
381 using Iter_t = std::unordered_map<DescriptorId_t, RColumnRange>::const_iterator;
382 /// The wrapped map iterator
384
385 public:
386 using iterator_category = std::forward_iterator_tag;
389 using difference_type = std::ptrdiff_t;
390 using pointer = const RColumnRange *;
391 using reference = const RColumnRange &;
392
393 RIterator(Iter_t iter) : fIter(iter) {}
395 {
396 ++fIter;
397 return *this;
398 }
399 reference operator*() { return fIter->second; }
400 pointer operator->() { return &fIter->second; }
401 bool operator!=(const iterator &rh) const { return fIter != rh.fIter; }
402 bool operator==(const iterator &rh) const { return fIter == rh.fIter; }
403 };
404
405 explicit RColumnRangeIterable(const RClusterDescriptor &desc) : fDesc(desc) {}
406
408 RIterator end() { return fDesc.fColumnRanges.cend(); }
409 size_t size() { return fDesc.fColumnRanges.size(); }
410};
411
412// clang-format off
413/**
414\class ROOT::Experimental::RClusterGroupDescriptor
415\ingroup NTuple
416\brief Clusters are bundled in cluster groups.
417
418Very large ntuples or combined ntuples (chains, friends) contain multiple cluster groups. The cluster groups
419may contain sharded clusters.
420Every ntuple has at least one cluster group. The clusters in a cluster group are ordered corresponding to
421the order of page locations in the page list envelope that belongs to the cluster group (see format specification)
422*/
423// clang-format on
426
427private:
429 /// The cluster IDs can be empty if the corresponding page list is not loaded.
430 std::vector<DescriptorId_t> fClusterIds;
431 /// The page list that corresponds to the cluster group
433 /// Uncompressed size of the page list
434 std::uint64_t fPageListLength = 0;
435 /// The minimum first entry number of the clusters in the cluster group
436 std::uint64_t fMinEntry = 0;
437 /// Number of entries that are (partially for sharded clusters) covered by this cluster group.
438 std::uint64_t fEntrySpan = 0;
439 /// Number of clusters is always known even if the cluster IDs are not (yet) populated
440 std::uint32_t fNClusters = 0;
441
442public:
448
450 // Creates a clone without the cluster IDs
452
453 bool operator==(const RClusterGroupDescriptor &other) const;
454
456 std::uint32_t GetNClusters() const { return fNClusters; }
458 std::uint64_t GetPageListLength() const { return fPageListLength; }
459 const std::vector<DescriptorId_t> &GetClusterIds() const { return fClusterIds; }
460 std::uint64_t GetMinEntry() const { return fMinEntry; }
461 std::uint64_t GetEntrySpan() const { return fEntrySpan; }
462 /// A cluster group is loaded in two stages. Stage one loads only the summary information.
463 /// Stage two loads the list of cluster IDs.
464 bool HasClusterDetails() const { return !fClusterIds.empty(); }
465};
466
467/// Used in RExtraTypeInfoDescriptor
469
470// clang-format off
471/**
472\class ROOT::Experimental::RExtraTypeInfoDescriptor
473\ingroup NTuple
474\brief Field specific extra type information from the header / extenstion header
475
476Currently only used by streamer fields to store RNTuple-wide list of streamer info records.
477*/
478// clang-format on
481
482private:
483 /// Specifies the meaning of the extra information
485 /// Type version the extra type information is bound to
486 std::uint32_t fTypeVersion = 0;
487 /// The type name the extra information refers to; empty for RNTuple-wide extra information
488 std::string fTypeName;
489 /// The content format depends on the content ID and may be binary
490 std::string fContent;
491
492public:
498
499 bool operator==(const RExtraTypeInfoDescriptor &other) const;
500
502
504 std::uint32_t GetTypeVersion() const { return fTypeVersion; }
505 const std::string &GetTypeName() const { return fTypeName; }
506 const std::string &GetContent() const { return fContent; }
507};
508
509// clang-format off
510/**
511\class ROOT::Experimental::RNTupleDescriptor
512\ingroup NTuple
513\brief The on-storage meta-data of an ntuple
514
515Represents the on-disk (on storage) information about an ntuple. The meta-data consists of a header and one or
516several footers. The header carries the ntuple schema, i.e. the fields and the associated columns and their
517relationships. The footer(s) carry information about one or several clusters. For every cluster, a footer stores
518its location and size, and for every column the range of element indexes as well as a list of pages and page
519locations.
520
521The descriptor provide machine-independent (de-)serialization of headers and footers, and it provides lookup routines
522for ntuple objects (pages, clusters, ...). It is supposed to be usable by all RPageStorage implementations.
523
524The serialization does not use standard ROOT streamers in order to not let it depend on libCore. The serialization uses
525the concept of frames: header, footer, and substructures have a preamble with version numbers and the size of the
526writte struct. This allows for forward and backward compatibility when the meta-data evolves.
527*/
528// clang-format on
531
532public:
533 class RHeaderExtension;
534
535private:
536 /// The ntuple name needs to be unique in a given storage location (file)
537 std::string fName;
538 /// Free text from the user
539 std::string fDescription;
540
541 std::uint64_t fOnDiskHeaderXxHash3 = 0; ///< Set by the descriptor builder when deserialized
542 std::uint64_t fOnDiskHeaderSize = 0; ///< Set by the descriptor builder when deserialized
543 std::uint64_t fOnDiskFooterSize = 0; ///< Like fOnDiskHeaderSize, contains both cluster summaries and page locations
544
545 std::uint64_t fNEntries = 0; ///< Updated by the descriptor builder when the cluster groups are added
546 std::uint64_t fNClusters = 0; ///< Updated by the descriptor builder when the cluster groups are added
547 std::uint64_t fNPhysicalColumns = 0; ///< Updated by the descriptor builder when columns are added
548
549 DescriptorId_t fFieldZeroId = kInvalidDescriptorId; ///< Set by the descriptor builder
550
551 /**
552 * Once constructed by an RNTupleDescriptorBuilder, the descriptor is mostly immutable except for set of
553 * active the page locations. During the lifetime of the descriptor, page location information for clusters
554 * can be added or removed. When this happens, the generation should be increased, so that users of the
555 * descriptor know that the information changed. The generation is increased, e.g., by the page source's
556 * exclusive lock guard around the descriptor. It is used, e.g., by the descriptor cache in RNTupleReader.
557 */
558 std::uint64_t fGeneration = 0;
559
560 std::set<unsigned int> fFeatureFlags;
561 std::unordered_map<DescriptorId_t, RFieldDescriptor> fFieldDescriptors;
562 std::unordered_map<DescriptorId_t, RColumnDescriptor> fColumnDescriptors;
563 std::unordered_map<DescriptorId_t, RClusterGroupDescriptor> fClusterGroupDescriptors;
564 /// May contain only a subset of all the available clusters, e.g. the clusters of the current file
565 /// from a chain of files
566 std::unordered_map<DescriptorId_t, RClusterDescriptor> fClusterDescriptors;
567 std::vector<RExtraTypeInfoDescriptor> fExtraTypeInfoDescriptors;
568 std::unique_ptr<RHeaderExtension> fHeaderExtension;
569
570public:
571 static constexpr unsigned int kFeatureFlagTest = 137; // Bit reserved for forward-compatibility testing
572
578
579 /// Modifiers passed to `CreateModel`
581 RCreateModelOptions() {} // Work around compiler bug, see https://gcc.gnu.org/bugzilla/show_bug.cgi?id=88165
582 /// If set to true, projected fields will be reconstructed as such. This will prevent the model to be used
583 /// with an RNTupleReader, but it is useful, e.g., to accurately merge data.
585 /// Normally creating a model will fail if any of the reconstructed fields contains an unknown column type.
586 /// If this option is enabled, the model will be created and all fields containing unknown data (directly
587 /// or indirectly) will be skipped instead.
588 bool fForwardCompatible = false;
589 /// If true, the model will be created without a default entry (bare model).
590 bool fCreateBare = false;
591 };
592
593 RNTupleDescriptor() = default;
594 RNTupleDescriptor(const RNTupleDescriptor &other) = delete;
598
599 std::unique_ptr<RNTupleDescriptor> Clone() const;
600
601 bool operator==(const RNTupleDescriptor &other) const;
602
603 std::uint64_t GetOnDiskHeaderXxHash3() const { return fOnDiskHeaderXxHash3; }
604 std::uint64_t GetOnDiskHeaderSize() const { return fOnDiskHeaderSize; }
605 std::uint64_t GetOnDiskFooterSize() const { return fOnDiskFooterSize; }
606
607 const RFieldDescriptor &GetFieldDescriptor(DescriptorId_t fieldId) const { return fFieldDescriptors.at(fieldId); }
609 {
610 return fColumnDescriptors.at(columnId);
611 }
613 {
614 return fClusterGroupDescriptors.at(clusterGroupId);
615 }
617 {
618 return fClusterDescriptors.at(clusterId);
619 }
620
621 RFieldDescriptorIterable GetFieldIterable(const RFieldDescriptor &fieldDesc) const;
622 RFieldDescriptorIterable
623 GetFieldIterable(const RFieldDescriptor &fieldDesc,
624 const std::function<bool(DescriptorId_t, DescriptorId_t)> &comparator) const;
625 RFieldDescriptorIterable GetFieldIterable(DescriptorId_t fieldId) const;
626 RFieldDescriptorIterable
628 const std::function<bool(DescriptorId_t, DescriptorId_t)> &comparator) const;
629
630 RFieldDescriptorIterable GetTopLevelFields() const;
631 RFieldDescriptorIterable
632 GetTopLevelFields(const std::function<bool(DescriptorId_t, DescriptorId_t)> &comparator) const;
633
634 RColumnDescriptorIterable GetColumnIterable() const;
635 RColumnDescriptorIterable GetColumnIterable(const RFieldDescriptor &fieldDesc) const;
636 RColumnDescriptorIterable GetColumnIterable(DescriptorId_t fieldId) const;
637
638 RClusterGroupDescriptorIterable GetClusterGroupIterable() const;
639
640 RClusterDescriptorIterable GetClusterIterable() const;
641
642 RExtraTypeInfoDescriptorIterable GetExtraTypeInfoIterable() const;
643
644 const std::string &GetName() const { return fName; }
645 const std::string &GetDescription() const { return fDescription; }
646
647 std::size_t GetNFields() const { return fFieldDescriptors.size(); }
648 std::size_t GetNLogicalColumns() const { return fColumnDescriptors.size(); }
649 std::size_t GetNPhysicalColumns() const { return fNPhysicalColumns; }
650 std::size_t GetNClusterGroups() const { return fClusterGroupDescriptors.size(); }
651 std::size_t GetNClusters() const { return fNClusters; }
652 std::size_t GetNActiveClusters() const { return fClusterDescriptors.size(); }
653 std::size_t GetNExtraTypeInfos() const { return fExtraTypeInfoDescriptors.size(); }
654
655 /// We know the number of entries from adding the cluster summaries
657 NTupleSize_t GetNElements(DescriptorId_t physicalColumnId) const;
658
659 /// Returns the logical parent of all top-level NTuple data fields.
662 DescriptorId_t FindFieldId(std::string_view fieldName, DescriptorId_t parentId) const;
663 /// Searches for a top-level field
664 DescriptorId_t FindFieldId(std::string_view fieldName) const;
666 FindLogicalColumnId(DescriptorId_t fieldId, std::uint32_t columnIndex, std::uint16_t representationIndex) const;
668 FindPhysicalColumnId(DescriptorId_t fieldId, std::uint32_t columnIndex, std::uint16_t representationIndex) const;
672
673 /// Walks up the parents of the field ID and returns a field name of the form a.b.c.d
674 /// In case of invalid field ID, an empty string is returned.
675 std::string GetQualifiedFieldName(DescriptorId_t fieldId) const;
676
677 bool HasFeature(unsigned int flag) const { return fFeatureFlags.count(flag) > 0; }
678 std::vector<std::uint64_t> GetFeatureFlags() const;
679
680 /// Return header extension information; if the descriptor does not have a header extension, return `nullptr`
681 const RHeaderExtension *GetHeaderExtension() const { return fHeaderExtension.get(); }
682
683 /// Methods to load and drop cluster group details (cluster IDs and page locations)
684 RResult<void> AddClusterGroupDetails(DescriptorId_t clusterGroupId, std::vector<RClusterDescriptor> &clusterDescs);
686
687 std::uint64_t GetGeneration() const { return fGeneration; }
689
690 /// Re-create the C++ model from the stored meta-data
691 std::unique_ptr<RNTupleModel> CreateModel(const RCreateModelOptions &options = RCreateModelOptions()) const;
692 void PrintInfo(std::ostream &output) const;
693};
694
695// clang-format off
696/**
697\class ROOT::Experimental::RNTupleDescriptor::RColumnDescriptorIterable
698\ingroup NTuple
699\brief Used to loop over a field's associated columns
700*/
701// clang-format on
703private:
704 /// The associated NTuple for this range.
706 /// The descriptor ids of the columns ordered by field, representation, and column index
707 std::vector<DescriptorId_t> fColumns = {};
708
709public:
710 class RIterator {
711 private:
712 /// The enclosing range's NTuple.
714 /// The enclosing range's descriptor id list.
715 const std::vector<DescriptorId_t> &fColumns;
716 std::size_t fIndex = 0;
717
718 public:
719 using iterator_category = std::forward_iterator_tag;
722 using difference_type = std::ptrdiff_t;
723 using pointer = const RColumnDescriptor *;
725
726 RIterator(const RNTupleDescriptor &ntuple, const std::vector<DescriptorId_t> &columns, std::size_t index)
727 : fNTuple(ntuple), fColumns(columns), fIndex(index)
728 {
729 }
731 {
732 ++fIndex;
733 return *this;
734 }
737 bool operator!=(const iterator &rh) const { return fIndex != rh.fIndex; }
738 bool operator==(const iterator &rh) const { return fIndex == rh.fIndex; }
739 };
740
741 RColumnDescriptorIterable(const RNTupleDescriptor &ntuple, const RFieldDescriptor &fieldDesc);
743
746 size_t size() { return fColumns.size(); }
747};
748
749// clang-format off
750/**
751\class ROOT::Experimental::RNTupleDescriptor::RFieldDescriptorIterable
752\ingroup NTuple
753\brief Used to loop over a field's child fields
754*/
755// clang-format on
757private:
758 /// The associated NTuple for this range.
760 /// The descriptor ids of the child fields. These may be sorted using
761 /// a comparison function.
762 std::vector<DescriptorId_t> fFieldChildren = {};
763
764public:
765 class RIterator {
766 private:
767 /// The enclosing range's NTuple.
769 /// The enclosing range's descriptor id list.
770 const std::vector<DescriptorId_t> &fFieldChildren;
771 std::size_t fIndex = 0;
772
773 public:
774 using iterator_category = std::forward_iterator_tag;
777 using difference_type = std::ptrdiff_t;
780
781 RIterator(const RNTupleDescriptor &ntuple, const std::vector<DescriptorId_t> &fieldChildren, std::size_t index)
782 : fNTuple(ntuple), fFieldChildren(fieldChildren), fIndex(index)
783 {
784 }
786 {
787 ++fIndex;
788 return *this;
789 }
791 bool operator!=(const iterator &rh) const { return fIndex != rh.fIndex; }
792 bool operator==(const iterator &rh) const { return fIndex == rh.fIndex; }
793 };
795 : fNTuple(ntuple), fFieldChildren(field.GetLinkIds())
796 {
797 }
798 /// Sort the range using an arbitrary comparison function.
800 const std::function<bool(DescriptorId_t, DescriptorId_t)> &comparator)
801 : fNTuple(ntuple), fFieldChildren(field.GetLinkIds())
802 {
803 std::sort(fFieldChildren.begin(), fFieldChildren.end(), comparator);
804 }
807};
808
809// clang-format off
810/**
811\class ROOT::Experimental::RNTupleDescriptor::RClusterGroupDescriptorIterable
812\ingroup NTuple
813\brief Used to loop over all the cluster groups of an ntuple (in unspecified order)
814
815Enumerate all cluster group IDs from the cluster group descriptor. No specific order can be assumed, use
816FindNextClusterGroupId and FindPrevClusterGroupId to traverse clusters groups by entry number.
817*/
818// clang-format on
820private:
821 /// The associated NTuple for this range.
823
824public:
825 class RIterator {
826 private:
827 /// The enclosing range's NTuple.
829 std::size_t fIndex = 0;
830
831 public:
832 using iterator_category = std::forward_iterator_tag;
835 using difference_type = std::ptrdiff_t;
838
839 RIterator(const RNTupleDescriptor &ntuple, std::size_t index) : fNTuple(ntuple), fIndex(index) {}
841 {
842 ++fIndex;
843 return *this;
844 }
846 {
847 auto it = fNTuple.fClusterGroupDescriptors.begin();
848 std::advance(it, fIndex);
849 return it->second;
850 }
851 bool operator!=(const iterator &rh) const { return fIndex != rh.fIndex; }
852 bool operator==(const iterator &rh) const { return fIndex == rh.fIndex; }
853 };
854
858};
859
860// clang-format off
861/**
862\class ROOT::Experimental::RNTupleDescriptor::RClusterDescriptorIterable
863\ingroup NTuple
864\brief Used to loop over all the clusters of an ntuple (in unspecified order)
865
866Enumerate all cluster IDs from the cluster descriptor. No specific order can be assumed, use
867FindNextClusterId and FindPrevClusterId to travers clusters by entry number.
868*/
869// clang-format on
871private:
872 /// The associated NTuple for this range.
874
875public:
876 class RIterator {
877 private:
878 /// The enclosing range's NTuple.
880 std::size_t fIndex = 0;
881
882 public:
883 using iterator_category = std::forward_iterator_tag;
886 using difference_type = std::ptrdiff_t;
889
890 RIterator(const RNTupleDescriptor &ntuple, std::size_t index) : fNTuple(ntuple), fIndex(index) {}
892 {
893 ++fIndex;
894 return *this;
895 }
897 {
898 auto it = fNTuple.fClusterDescriptors.begin();
899 std::advance(it, fIndex);
900 return it->second;
901 }
902 bool operator!=(const iterator &rh) const { return fIndex != rh.fIndex; }
903 bool operator==(const iterator &rh) const { return fIndex == rh.fIndex; }
904 };
905
909};
910
911// clang-format off
912/**
913\class ROOT::Experimental::RNTupleDescriptor::RExtraTypeInfoDescriptorIterable
914\ingroup NTuple
915\brief Used to loop over all the extra type info record of an ntuple (in unspecified order)
916*/
917// clang-format on
919private:
920 /// The associated NTuple for this range.
922
923public:
924 class RIterator {
925 private:
926 /// The enclosing range's NTuple.
928 std::size_t fIndex = 0;
929
930 public:
931 using iterator_category = std::forward_iterator_tag;
934 using difference_type = std::ptrdiff_t;
937
938 RIterator(const RNTupleDescriptor &ntuple, std::size_t index) : fNTuple(ntuple), fIndex(index) {}
940 {
941 ++fIndex;
942 return *this;
943 }
945 {
946 auto it = fNTuple.fExtraTypeInfoDescriptors.begin();
947 std::advance(it, fIndex);
948 return *it;
949 }
950 bool operator!=(const iterator &rh) const { return fIndex != rh.fIndex; }
951 bool operator==(const iterator &rh) const { return fIndex == rh.fIndex; }
952 };
953
957};
958
959// clang-format off
960/**
961\class ROOT::Experimental::RNTupleDescriptor::RHeaderExtension
962\ingroup NTuple
963\brief Summarizes information about fields and the corresponding columns that were added after the header has been serialized
964*/
965// clang-format on
968
969private:
970 /// All field IDs of late model extensions, in the order of field addition. This is necessary to serialize the
971 /// the fields in that order.
972 std::vector<DescriptorId_t> fFieldIdsOrder;
973 /// All field IDs of late model extensions for efficient lookup. When a column gets added to the extension
974 /// header, this enables us to determine if the column belongs to a field of the header extension of if it
975 /// belongs to a field of the regular header that gets extended by additional column representations.
976 std::unordered_set<DescriptorId_t> fFieldIdsLookup;
977 /// All logical column IDs of columns that extend, with additional column representations, fields of the regular
978 /// header. During serialization, these columns are not picked up as columns of `fFieldIdsOrder`. But instead
979 /// these columns need to be serialized in the extension header without re-serializing the field.
980 std::vector<DescriptorId_t> fExtendedColumnRepresentations;
981 /// Number of logical and physical columns; updated by the descriptor builder when columns are added
982 std::uint32_t fNLogicalColumns = 0;
983 std::uint32_t fNPhysicalColumns = 0;
984
985 void AddExtendedField(const RFieldDescriptor &fieldDesc)
986 {
987 fFieldIdsOrder.emplace_back(fieldDesc.GetId());
988 fFieldIdsLookup.insert(fieldDesc.GetId());
989 }
990
991 void AddExtendedColumn(const RColumnDescriptor &columnDesc)
992 {
994 if (!columnDesc.IsAliasColumn())
996 if (fFieldIdsLookup.count(columnDesc.GetFieldId()) == 0) {
997 fExtendedColumnRepresentations.emplace_back(columnDesc.GetLogicalId());
998 }
999 }
1000
1001public:
1002 std::size_t GetNFields() const { return fFieldIdsOrder.size(); }
1003 std::size_t GetNLogicalColumns() const { return fNLogicalColumns; }
1004 std::size_t GetNPhysicalColumns() const { return fNPhysicalColumns; }
1005 const std::vector<DescriptorId_t> &GetExtendedColumnRepresentations() const
1006 {
1008 }
1009 /// Return a vector containing the IDs of the top-level fields defined in the extension header, in the order
1010 /// of their addition.
1011 /// We cannot create this vector when building the fFields because at the time when AddExtendedField is called,
1012 /// the field is not yet linked into the schema tree.
1013 std::vector<DescriptorId_t> GetTopLevelFields(const RNTupleDescriptor &desc) const;
1014};
1015
1016namespace Internal {
1017
1018// clang-format off
1019/**
1020\class ROOT::Experimental::Internal::RColumnDescriptorBuilder
1021\ingroup NTuple
1022\brief A helper class for piece-wise construction of an RColumnDescriptor
1023
1024Dangling column descriptors can become actual descriptors when added to an
1025RNTupleDescriptorBuilder instance and then linked to their fields.
1026*/
1027// clang-format on
1029private:
1031
1032public:
1033 /// Make an empty column descriptor builder.
1035
1037 {
1038 fColumn.fLogicalColumnId = logicalColumnId;
1039 return *this;
1040 }
1042 {
1043 fColumn.fPhysicalColumnId = physicalColumnId;
1044 return *this;
1045 }
1046 RColumnDescriptorBuilder &BitsOnStorage(std::uint16_t bitsOnStorage)
1047 {
1048 fColumn.fBitsOnStorage = bitsOnStorage;
1049 return *this;
1050 }
1052 {
1053 fColumn.fType = type;
1054 return *this;
1055 }
1057 {
1058 fColumn.fFieldId = fieldId;
1059 return *this;
1060 }
1062 {
1064 return *this;
1065 }
1066 RColumnDescriptorBuilder &FirstElementIndex(std::uint64_t firstElementIdx)
1067 {
1068 fColumn.fFirstElementIndex = firstElementIdx;
1069 return *this;
1070 }
1072 {
1076 return *this;
1077 }
1078 RColumnDescriptorBuilder &RepresentationIndex(std::uint16_t representationIndex)
1079 {
1080 fColumn.fRepresentationIndex = representationIndex;
1081 return *this;
1082 }
1083 RColumnDescriptorBuilder &ValueRange(double min, double max)
1084 {
1085 fColumn.fValueRange = {min, max};
1086 return *this;
1087 }
1088 RColumnDescriptorBuilder &ValueRange(std::optional<RColumnDescriptor::RValueRange> valueRange)
1089 {
1090 fColumn.fValueRange = valueRange;
1091 return *this;
1092 }
1095 /// Attempt to make a column descriptor. This may fail if the column
1096 /// was not given enough information to make a proper descriptor.
1098};
1099
1100// clang-format off
1101/**
1102\class ROOT::Experimental::Internal::RFieldDescriptorBuilder
1103\ingroup NTuple
1104\brief A helper class for piece-wise construction of an RFieldDescriptor
1105
1106Dangling field descriptors describe a single field in isolation. They are
1107missing the necessary relationship information (parent field, any child fields)
1108required to describe a real NTuple field.
1109
1110Dangling field descriptors can only become actual descriptors when added to an
1111RNTupleDescriptorBuilder instance and then linked to other fields.
1112*/
1113// clang-format on
1115private:
1117
1118public:
1119 /// Make an empty dangling field descriptor.
1121 /// Make a new RFieldDescriptorBuilder based off an existing descriptor.
1122 /// Relationship information is lost during the conversion to a
1123 /// dangling descriptor:
1124 /// * Parent id is reset to an invalid id.
1125 /// * Field children ids are forgotten.
1126 ///
1127 /// These properties must be set using RNTupleDescriptorBuilder::AddFieldLink().
1128 explicit RFieldDescriptorBuilder(const RFieldDescriptor &fieldDesc);
1129
1130 /// Make a new RFieldDescriptorBuilder based off a live NTuple field.
1131 static RFieldDescriptorBuilder FromField(const RFieldBase &field);
1132
1134 {
1135 fField.fFieldId = fieldId;
1136 return *this;
1137 }
1138 RFieldDescriptorBuilder &FieldVersion(std::uint32_t fieldVersion)
1139 {
1140 fField.fFieldVersion = fieldVersion;
1141 return *this;
1142 }
1143 RFieldDescriptorBuilder &TypeVersion(std::uint32_t typeVersion)
1144 {
1145 fField.fTypeVersion = typeVersion;
1146 return *this;
1147 }
1149 {
1151 return *this;
1152 }
1154 {
1156 return *this;
1157 }
1158 RFieldDescriptorBuilder &FieldName(const std::string &fieldName)
1159 {
1160 fField.fFieldName = fieldName;
1161 return *this;
1162 }
1163 RFieldDescriptorBuilder &FieldDescription(const std::string &fieldDescription)
1164 {
1165 fField.fFieldDescription = fieldDescription;
1166 return *this;
1167 }
1168 RFieldDescriptorBuilder &TypeName(const std::string &typeName)
1169 {
1170 fField.fTypeName = typeName;
1171 return *this;
1172 }
1173 RFieldDescriptorBuilder &TypeAlias(const std::string &typeAlias)
1174 {
1175 fField.fTypeAlias = typeAlias;
1176 return *this;
1177 }
1178 RFieldDescriptorBuilder &NRepetitions(std::uint64_t nRepetitions)
1179 {
1180 fField.fNRepetitions = nRepetitions;
1181 return *this;
1182 }
1184 {
1185 fField.fStructure = structure;
1186 return *this;
1187 }
1188 RFieldDescriptorBuilder &TypeChecksum(const std::optional<std::uint32_t> typeChecksum)
1189 {
1190 fField.fTypeChecksum = typeChecksum;
1191 return *this;
1192 }
1194 /// Attempt to make a field descriptor. This may fail if the dangling field
1195 /// was not given enough information to make a proper descriptor.
1197};
1198
1199// clang-format off
1200/**
1201\class ROOT::Experimental::Internal::RClusterDescriptorBuilder
1202\ingroup NTuple
1203\brief A helper class for piece-wise construction of an RClusterDescriptor
1204
1205The cluster descriptor builder starts from a summary-only cluster descriptor and allows for the
1206piecewise addition of page locations.
1207*/
1208// clang-format on
1210private:
1212
1213public:
1215 {
1216 fCluster.fClusterId = clusterId;
1217 return *this;
1218 }
1219
1220 RClusterDescriptorBuilder &FirstEntryIndex(std::uint64_t firstEntryIndex)
1221 {
1222 fCluster.fFirstEntryIndex = firstEntryIndex;
1223 return *this;
1224 }
1225
1226 RClusterDescriptorBuilder &NEntries(std::uint64_t nEntries)
1227 {
1228 fCluster.fNEntries = nEntries;
1229 return *this;
1230 }
1231
1232 RResult<void> CommitColumnRange(DescriptorId_t physicalId, std::uint64_t firstElementIndex,
1233 std::uint32_t compressionSettings, const RClusterDescriptor::RPageRange &pageRange);
1234
1235 /// Books the given column ID as being suppressed in this cluster. The correct first element index and number of
1236 /// elements need to be set by CommitSuppressedColumnRanges() once all the calls to CommitColumnRange() and
1237 /// MarkSuppressedColumnRange() took place.
1239
1240 /// Sets the first element index and number of elements for all the suppressed column ranges.
1241 /// The information is taken from the corresponding columns from the primary representation.
1242 /// Needs to be called when all the columns (suppressed and regular) where added.
1244
1245 /// Add column and page ranges for columns created during late model extension missing in this cluster. The locator
1246 /// type for the synthesized page ranges is `kTypePageZero`. All the page sources must be able to populate the
1247 /// 'zero' page from such locator. Any call to `CommitColumnRange()` and `CommitSuppressedColumnRanges()`
1248 /// should happen before calling this function.
1250
1252 {
1253 return fCluster.GetColumnRange(physicalId);
1254 }
1255
1256 /// Move out the full cluster descriptor including page locations
1258};
1259
1260// clang-format off
1261/**
1262\class ROOT::Experimental::Internal::RClusterGroupDescriptorBuilder
1263\ingroup NTuple
1264\brief A helper class for piece-wise construction of an RClusterGroupDescriptor
1265*/
1266// clang-format on
1268private:
1270
1271public:
1274
1276 {
1277 fClusterGroup.fClusterGroupId = clusterGroupId;
1278 return *this;
1279 }
1281 {
1282 fClusterGroup.fPageListLocator = pageListLocator;
1283 return *this;
1284 }
1285 RClusterGroupDescriptorBuilder &PageListLength(std::uint64_t pageListLength)
1286 {
1287 fClusterGroup.fPageListLength = pageListLength;
1288 return *this;
1289 }
1291 {
1292 fClusterGroup.fMinEntry = minEntry;
1293 return *this;
1294 }
1296 {
1297 fClusterGroup.fEntrySpan = entrySpan;
1298 return *this;
1299 }
1301 {
1302 fClusterGroup.fNClusters = nClusters;
1303 return *this;
1304 }
1305 void AddClusters(const std::vector<DescriptorId_t> &clusterIds)
1306 {
1307 if (clusterIds.size() != fClusterGroup.GetNClusters())
1308 throw RException(R__FAIL("mismatch of number of clusters"));
1309 fClusterGroup.fClusterIds = clusterIds;
1310 }
1311
1313};
1314
1315// clang-format off
1316/**
1317\class ROOT::Experimental::Internal::RExtraTypeInfoDescriptorBuilder
1318\ingroup NTuple
1319\brief A helper class for piece-wise construction of an RExtraTypeInfoDescriptor
1320*/
1321// clang-format on
1323private:
1325
1326public:
1328
1330 {
1331 fExtraTypeInfo.fContentId = contentId;
1332 return *this;
1333 }
1335 {
1336 fExtraTypeInfo.fTypeVersion = typeVersion;
1337 return *this;
1338 }
1339 RExtraTypeInfoDescriptorBuilder &TypeName(const std::string &typeName)
1340 {
1341 fExtraTypeInfo.fTypeName = typeName;
1342 return *this;
1343 }
1344 RExtraTypeInfoDescriptorBuilder &Content(const std::string &content)
1345 {
1346 fExtraTypeInfo.fContent = content;
1347 return *this;
1348 }
1349
1351};
1352
1353// clang-format off
1354/**
1355\class ROOT::Experimental::Internal::RNTupleDescriptorBuilder
1356\ingroup NTuple
1357\brief A helper class for piece-wise construction of an RNTupleDescriptor
1358
1359Used by RPageStorage implementations in order to construct the RNTupleDescriptor from the various header parts.
1360*/
1361// clang-format on
1363private:
1366
1367public:
1368 /// Checks whether invariants hold:
1369 /// * NTuple name is valid
1370 /// * Fields have valid parents
1371 /// * Number of columns is constant across column representations
1375
1376 void SetNTuple(const std::string_view name, const std::string_view description);
1377 void SetFeature(unsigned int flag);
1378
1379 void SetOnDiskHeaderXxHash3(std::uint64_t xxhash3) { fDescriptor.fOnDiskHeaderXxHash3 = xxhash3; }
1381 /// The real footer size also include the page list envelopes
1383
1384 void AddField(const RFieldDescriptor &fieldDesc);
1387
1388 // The field that the column belongs to has to be already available. For fields with multiple columns,
1389 // the columns need to be added in order of the column index
1391
1394
1396
1397 /// Clears so-far stored clusters, fields, and columns and return to a pristine ntuple descriptor
1398 void Reset();
1399
1400 /// Mark the beginning of the header extension; any fields and columns added after a call to this function are
1401 /// annotated as begin part of the header extension.
1402 void BeginHeaderExtension();
1403
1404 /// If the descriptor is constructed in pieces consisting of physical and alias columns
1405 /// (regular and projected fields), the natural column order would be
1406 /// - Physical and alias columns of piece one
1407 /// - Physical and alias columns of piece two
1408 /// - etc.
1409 /// What we want, however, are first all physical column IDs and then all alias column IDs.
1410 /// This method adds `offset` to the logical column IDs of all alias columns and fixes up the corresponding
1411 /// column IDs in the projected field descriptors. In this way, a new piece of physical and alias columns can
1412 /// first shift the existing alias columns by the number of new physical columns, resulting in the following order
1413 /// - Physical columns of piece one
1414 /// - Physical columns of piece two
1415 /// - ...
1416 // - Logical columns of piece one
1417 /// - Logical columns of piece two
1418 /// - ...
1419 void ShiftAliasColumns(std::uint32_t offset);
1420
1421 /// Get the streamer info records for custom classes. Currently requires the corresponding dictionaries to be loaded.
1423};
1424
1425} // namespace Internal
1426} // namespace Experimental
1427} // namespace ROOT
1428
1429#endif // ROOT7_RNTupleDescriptor
#define R__FAIL(msg)
Short-hand to return an RResult<T> in an error state; the RError is implicitly converted into RResult...
Definition RError.hxx:290
size_t size(const MatrixT &matrix)
retrieve the size of a square matrix
#define R__ASSERT(e)
Checks condition e and reports a fatal error if it's false.
Definition TError.h:125
Option_t Option_t TPoint TPoint const char GetTextMagnitude GetFillStyle GetLineColor GetLineWidth GetMarkerStyle GetTextAlign GetTextColor GetTextSize void char Point_t Rectangle_t WindowAttributes_t Float_t Float_t Float_t Int_t Int_t UInt_t UInt_t Rectangle_t Int_t Int_t Window_t TString Int_t GCValues_t GetPrimarySelectionOwner GetDisplay GetScreen GetColormap GetNativeEvent const char const char dpyName wid window const char font_name cursor keysym reg const char only_if_exist regb h Point_t winding char text const char depth char const char Int_t count const char ColorStruct_t color const char Pixmap_t Pixmap_t PictureAttributes_t attr const char char ret_data h unsigned char height h offset
Option_t Option_t TPoint TPoint const char GetTextMagnitude GetFillStyle GetLineColor GetLineWidth GetMarkerStyle GetTextAlign GetTextColor GetTextSize void char Point_t Rectangle_t WindowAttributes_t index
Option_t Option_t TPoint TPoint const char GetTextMagnitude GetFillStyle GetLineColor GetLineWidth GetMarkerStyle GetTextAlign GetTextColor GetTextSize id
Option_t Option_t TPoint TPoint const char GetTextMagnitude GetFillStyle GetLineColor GetLineWidth GetMarkerStyle GetTextAlign GetTextColor GetTextSize void char Point_t Rectangle_t WindowAttributes_t Float_t Float_t Float_t Int_t Int_t UInt_t UInt_t Rectangle_t Int_t Int_t Window_t TString Int_t GCValues_t GetPrimarySelectionOwner GetDisplay GetScreen GetColormap GetNativeEvent const char const char dpyName wid window const char font_name cursor keysym reg const char only_if_exist regb h Point_t winding char text const char depth char const char Int_t count const char ColorStruct_t color const char Pixmap_t Pixmap_t PictureAttributes_t attr const char char ret_data h unsigned char height h Atom_t Int_t ULong_t ULong_t unsigned char prop_list Atom_t Atom_t Atom_t Time_t type
char name[80]
Definition TGX11.cxx:110
A helper class for piece-wise construction of an RClusterDescriptor.
RResult< RClusterDescriptor > MoveDescriptor()
Move out the full cluster descriptor including page locations.
RClusterDescriptorBuilder & ClusterId(DescriptorId_t clusterId)
RClusterDescriptorBuilder & NEntries(std::uint64_t nEntries)
RClusterDescriptorBuilder & FirstEntryIndex(std::uint64_t firstEntryIndex)
const RClusterDescriptor::RColumnRange & GetColumnRange(DescriptorId_t physicalId)
RResult< void > MarkSuppressedColumnRange(DescriptorId_t physicalId)
Books the given column ID as being suppressed in this cluster.
RClusterDescriptorBuilder & AddExtendedColumnRanges(const RNTupleDescriptor &desc)
Add column and page ranges for columns created during late model extension missing in this cluster.
RResult< void > CommitColumnRange(DescriptorId_t physicalId, std::uint64_t firstElementIndex, std::uint32_t compressionSettings, const RClusterDescriptor::RPageRange &pageRange)
RResult< void > CommitSuppressedColumnRanges(const RNTupleDescriptor &desc)
Sets the first element index and number of elements for all the suppressed column ranges.
A helper class for piece-wise construction of an RClusterGroupDescriptor.
RClusterGroupDescriptorBuilder & PageListLocator(const RNTupleLocator &pageListLocator)
void AddClusters(const std::vector< DescriptorId_t > &clusterIds)
RClusterGroupDescriptorBuilder & MinEntry(std::uint64_t minEntry)
RClusterGroupDescriptorBuilder & ClusterGroupId(DescriptorId_t clusterGroupId)
RClusterGroupDescriptorBuilder & EntrySpan(std::uint64_t entrySpan)
RClusterGroupDescriptorBuilder & NClusters(std::uint32_t nClusters)
RClusterGroupDescriptorBuilder & PageListLength(std::uint64_t pageListLength)
static RClusterGroupDescriptorBuilder FromSummary(const RClusterGroupDescriptor &clusterGroupDesc)
A helper class for piece-wise construction of an RColumnDescriptor.
RColumnDescriptorBuilder & PhysicalColumnId(DescriptorId_t physicalColumnId)
RColumnDescriptorBuilder & Type(EColumnType type)
RColumnDescriptorBuilder & BitsOnStorage(std::uint16_t bitsOnStorage)
RColumnDescriptorBuilder()=default
Make an empty column descriptor builder.
RColumnDescriptorBuilder & RepresentationIndex(std::uint16_t representationIndex)
RColumnDescriptorBuilder & FieldId(DescriptorId_t fieldId)
RColumnDescriptorBuilder & Index(std::uint32_t index)
RColumnDescriptorBuilder & FirstElementIndex(std::uint64_t firstElementIdx)
RResult< RColumnDescriptor > MakeDescriptor() const
Attempt to make a column descriptor.
RColumnDescriptorBuilder & ValueRange(std::optional< RColumnDescriptor::RValueRange > valueRange)
RColumnDescriptorBuilder & LogicalColumnId(DescriptorId_t logicalColumnId)
RColumnDescriptorBuilder & ValueRange(double min, double max)
A column element encapsulates the translation between basic C++ types and their column representation...
A helper class for piece-wise construction of an RExtraTypeInfoDescriptor.
RExtraTypeInfoDescriptorBuilder & Content(const std::string &content)
RExtraTypeInfoDescriptorBuilder & TypeVersion(std::uint32_t typeVersion)
RExtraTypeInfoDescriptorBuilder & TypeName(const std::string &typeName)
RExtraTypeInfoDescriptorBuilder & ContentId(EExtraTypeInfoIds contentId)
A helper class for piece-wise construction of an RFieldDescriptor.
RFieldDescriptorBuilder & TypeVersion(std::uint32_t typeVersion)
RFieldDescriptorBuilder & NRepetitions(std::uint64_t nRepetitions)
RFieldDescriptorBuilder & ProjectionSourceId(DescriptorId_t id)
RFieldDescriptorBuilder & FieldVersion(std::uint32_t fieldVersion)
RFieldDescriptorBuilder & Structure(const ENTupleStructure &structure)
RFieldDescriptorBuilder & TypeName(const std::string &typeName)
static RFieldDescriptorBuilder FromField(const RFieldBase &field)
Make a new RFieldDescriptorBuilder based off a live NTuple field.
RResult< RFieldDescriptor > MakeDescriptor() const
Attempt to make a field descriptor.
RFieldDescriptorBuilder & FieldName(const std::string &fieldName)
RFieldDescriptorBuilder & ParentId(DescriptorId_t id)
RFieldDescriptorBuilder()=default
Make an empty dangling field descriptor.
RFieldDescriptorBuilder & TypeChecksum(const std::optional< std::uint32_t > typeChecksum)
RFieldDescriptorBuilder & TypeAlias(const std::string &typeAlias)
RFieldDescriptorBuilder & FieldId(DescriptorId_t fieldId)
RFieldDescriptorBuilder & FieldDescription(const std::string &fieldDescription)
A helper class for piece-wise construction of an RNTupleDescriptor.
RNTupleSerializer::StreamerInfoMap_t BuildStreamerInfos() const
Get the streamer info records for custom classes. Currently requires the corresponding dictionaries t...
RResult< void > AddFieldProjection(DescriptorId_t sourceId, DescriptorId_t targetId)
void BeginHeaderExtension()
Mark the beginning of the header extension; any fields and columns added after a call to this functio...
void ShiftAliasColumns(std::uint32_t offset)
If the descriptor is constructed in pieces consisting of physical and alias columns (regular and proj...
RResult< void > EnsureFieldExists(DescriptorId_t fieldId) const
RResult< void > AddFieldLink(DescriptorId_t fieldId, DescriptorId_t linkId)
RResult< void > EnsureValidDescriptor() const
Checks whether invariants hold:
RResult< void > AddCluster(RClusterDescriptor &&clusterDesc)
void AddToOnDiskFooterSize(std::uint64_t size)
The real footer size also include the page list envelopes.
void SetNTuple(const std::string_view name, const std::string_view description)
RResult< void > AddClusterGroup(RClusterGroupDescriptor &&clusterGroup)
RResult< void > AddColumn(RColumnDescriptor &&columnDesc)
void Reset()
Clears so-far stored clusters, fields, and columns and return to a pristine ntuple descriptor.
RResult< void > AddExtraTypeInfo(RExtraTypeInfoDescriptor &&extraTypeInfoDesc)
std::map< Int_t, TVirtualStreamerInfo * > StreamerInfoMap_t
std::unordered_map< DescriptorId_t, RColumnRange >::const_iterator Iter_t
Records the partition of data into pages for a particular column in a particular cluster.
RPageRange & operator=(RPageRange &&other)=default
std::size_t ExtendToFitColumnRange(const RColumnRange &columnRange, const Internal::RColumnElementBase &element, std::size_t pageSize)
Extend this RPageRange to fit the given RColumnRange, i.e.
RPageInfoExtended Find(ClusterSize_t::ValueType idxInCluster) const
Find the page in the RPageRange that contains the given element. The element must exist.
RPageRange(const RPageRange &other)=delete
RPageRange & operator=(const RPageRange &other)=delete
Meta-data for a set of ntuple clusters.
std::unordered_map< DescriptorId_t, RPageRange > fPageRanges
RClusterDescriptor & operator=(RClusterDescriptor &&other)=default
RClusterDescriptor(RClusterDescriptor &&other)=default
bool ContainsColumn(DescriptorId_t physicalId) const
RColumnRangeIterable GetColumnRangeIterable() const
Returns an iterator over pairs { columnId, columnRange }. The iteration order is unspecified.
RClusterDescriptor(const RClusterDescriptor &other)=delete
NTupleSize_t fFirstEntryIndex
Clusters can be swapped by adjusting the entry offsets.
RClusterDescriptor & operator=(const RClusterDescriptor &other)=delete
const RColumnRange & GetColumnRange(DescriptorId_t physicalId) const
std::unordered_map< DescriptorId_t, RColumnRange > fColumnRanges
bool operator==(const RClusterDescriptor &other) const
const RPageRange & GetPageRange(DescriptorId_t physicalId) const
Clusters are bundled in cluster groups.
std::uint64_t fMinEntry
The minimum first entry number of the clusters in the cluster group.
RClusterGroupDescriptor(const RClusterGroupDescriptor &other)=delete
RClusterGroupDescriptor & operator=(RClusterGroupDescriptor &&other)=default
std::uint64_t fEntrySpan
Number of entries that are (partially for sharded clusters) covered by this cluster group.
std::uint64_t fPageListLength
Uncompressed size of the page list.
RClusterGroupDescriptor CloneSummary() const
const std::vector< DescriptorId_t > & GetClusterIds() const
RClusterGroupDescriptor & operator=(const RClusterGroupDescriptor &other)=delete
std::uint32_t fNClusters
Number of clusters is always known even if the cluster IDs are not (yet) populated.
RNTupleLocator fPageListLocator
The page list that corresponds to the cluster group.
bool HasClusterDetails() const
A cluster group is loaded in two stages.
bool operator==(const RClusterGroupDescriptor &other) const
std::vector< DescriptorId_t > fClusterIds
The cluster IDs can be empty if the corresponding page list is not loaded.
RClusterGroupDescriptor(RClusterGroupDescriptor &&other)=default
Meta-data stored for every column of an ntuple.
std::optional< RValueRange > fValueRange
Optional value range (used e.g. by quantized real fields)
std::uint16_t fBitsOnStorage
The size in bits of elements of this column.
DescriptorId_t fPhysicalColumnId
Usually identical to the logical column ID, except for alias columns where it references the shadowed...
RColumnDescriptor & operator=(RColumnDescriptor &&other)=default
RColumnDescriptor(const RColumnDescriptor &other)=delete
DescriptorId_t fLogicalColumnId
The actual column identifier, which is the link to the corresponding field.
RColumnDescriptor Clone() const
Get a copy of the descriptor.
RColumnDescriptor(RColumnDescriptor &&other)=default
DescriptorId_t fFieldId
Every column belongs to one and only one field.
RColumnDescriptor & operator=(const RColumnDescriptor &other)=delete
std::optional< RValueRange > GetValueRange() const
std::int64_t fFirstElementIndex
The absolute value specifies the index for the first stored element for this column.
std::uint16_t fRepresentationIndex
A field may use multiple column representations, which are numbered from zero to $m$.
EColumnType fType
The on-disk column type.
std::uint32_t fIndex
A field can be serialized into several columns, which are numbered from zero to $n$.
bool operator==(const RColumnDescriptor &other) const
Base class for all ROOT issued exceptions.
Definition RError.hxx:78
Field specific extra type information from the header / extenstion header.
bool operator==(const RExtraTypeInfoDescriptor &other) const
RExtraTypeInfoDescriptor & operator=(RExtraTypeInfoDescriptor &&other)=default
std::uint32_t fTypeVersion
Type version the extra type information is bound to.
EExtraTypeInfoIds fContentId
Specifies the meaning of the extra information.
std::string fTypeName
The type name the extra information refers to; empty for RNTuple-wide extra information.
RExtraTypeInfoDescriptor & operator=(const RExtraTypeInfoDescriptor &other)=delete
RExtraTypeInfoDescriptor(RExtraTypeInfoDescriptor &&other)=default
RExtraTypeInfoDescriptor(const RExtraTypeInfoDescriptor &other)=delete
std::string fContent
The content format depends on the content ID and may be binary.
A field translates read and write calls from/to underlying columns to/from tree values.
Meta-data stored for every field of an ntuple.
RFieldDescriptor & operator=(RFieldDescriptor &&other)=default
std::vector< DescriptorId_t > fLinkIds
The pointers in the other direction from parent to children.
std::unique_ptr< RFieldBase > CreateField(const RNTupleDescriptor &ntplDesc, bool continueOnError=false) const
In general, we create a field simply from the C++ type name.
const std::string & GetFieldName() const
const std::string & GetTypeName() const
const std::string & GetFieldDescription() const
std::uint32_t fTypeVersion
The version of the C++ type itself.
std::uint32_t fColumnCardinality
The number of columns in the column representations of the field.
std::optional< std::uint32_t > fTypeChecksum
For custom classes, we store the ROOT TClass reported checksum to facilitate the use of I/O rules tha...
bool IsCustomClass() const
Tells if the field describes a user-defined class rather than a fundamental type, a collection,...
const std::vector< DescriptorId_t > & GetLogicalColumnIds() const
std::string fFieldDescription
Free text set by the user.
const std::string & GetTypeAlias() const
std::string fFieldName
The leaf name, not including parent fields.
std::uint32_t fFieldVersion
The version of the C++-type-to-column translation mechanics.
std::vector< DescriptorId_t > fLogicalColumnIds
The ordered list of columns attached to this field: first by representation index then by column inde...
const std::vector< DescriptorId_t > & GetLinkIds() const
RFieldDescriptor(const RFieldDescriptor &other)=delete
DescriptorId_t fParentId
Establishes sub field relationships, such as classes and collections.
RFieldDescriptor Clone() const
Get a copy of the descriptor.
bool operator==(const RFieldDescriptor &other) const
std::string fTypeAlias
A typedef or using directive that resolved to the type name during field creation.
ENTupleStructure fStructure
The structural information carried by this field in the data model tree.
RFieldDescriptor & operator=(const RFieldDescriptor &other)=delete
RFieldDescriptor(RFieldDescriptor &&other)=default
std::optional< std::uint32_t > GetTypeChecksum() const
std::string fTypeName
The C++ type that was used when writing the field.
std::uint64_t fNRepetitions
The number of elements per entry for fixed-size arrays.
DescriptorId_t fProjectionSourceId
For projected fields, the source field ID.
Used to loop over all the clusters of an ntuple (in unspecified order)
const RNTupleDescriptor & fNTuple
The associated NTuple for this range.
Used to loop over all the cluster groups of an ntuple (in unspecified order)
const RNTupleDescriptor & fNTuple
The associated NTuple for this range.
RIterator(const RNTupleDescriptor &ntuple, const std::vector< DescriptorId_t > &columns, std::size_t index)
const std::vector< DescriptorId_t > & fColumns
The enclosing range's descriptor id list.
const RNTupleDescriptor & fNTuple
The enclosing range's NTuple.
const RNTupleDescriptor & fNTuple
The associated NTuple for this range.
std::vector< DescriptorId_t > fColumns
The descriptor ids of the columns ordered by field, representation, and column index.
Used to loop over all the extra type info record of an ntuple (in unspecified order)
const RNTupleDescriptor & fNTuple
The associated NTuple for this range.
const std::vector< DescriptorId_t > & fFieldChildren
The enclosing range's descriptor id list.
const RNTupleDescriptor & fNTuple
The enclosing range's NTuple.
RIterator(const RNTupleDescriptor &ntuple, const std::vector< DescriptorId_t > &fieldChildren, std::size_t index)
std::vector< DescriptorId_t > fFieldChildren
The descriptor ids of the child fields.
const RNTupleDescriptor & fNTuple
The associated NTuple for this range.
RFieldDescriptorIterable(const RNTupleDescriptor &ntuple, const RFieldDescriptor &field, const std::function< bool(DescriptorId_t, DescriptorId_t)> &comparator)
Sort the range using an arbitrary comparison function.
RFieldDescriptorIterable(const RNTupleDescriptor &ntuple, const RFieldDescriptor &field)
Summarizes information about fields and the corresponding columns that were added after the header ha...
std::vector< DescriptorId_t > fFieldIdsOrder
All field IDs of late model extensions, in the order of field addition.
void AddExtendedColumn(const RColumnDescriptor &columnDesc)
std::uint32_t fNLogicalColumns
Number of logical and physical columns; updated by the descriptor builder when columns are added.
const std::vector< DescriptorId_t > & GetExtendedColumnRepresentations() const
std::unordered_set< DescriptorId_t > fFieldIdsLookup
All field IDs of late model extensions for efficient lookup.
std::vector< DescriptorId_t > fExtendedColumnRepresentations
All logical column IDs of columns that extend, with additional column representations,...
void AddExtendedField(const RFieldDescriptor &fieldDesc)
The on-storage meta-data of an ntuple.
std::uint64_t fNPhysicalColumns
Updated by the descriptor builder when columns are added.
std::unordered_map< DescriptorId_t, RClusterDescriptor > fClusterDescriptors
May contain only a subset of all the available clusters, e.g.
std::uint64_t fGeneration
Once constructed by an RNTupleDescriptorBuilder, the descriptor is mostly immutable except for set of...
std::uint64_t fOnDiskFooterSize
Like fOnDiskHeaderSize, contains both cluster summaries and page locations.
std::uint64_t fNEntries
Updated by the descriptor builder when the cluster groups are added.
std::vector< RExtraTypeInfoDescriptor > fExtraTypeInfoDescriptors
NTupleSize_t GetNElements(DescriptorId_t physicalColumnId) const
std::unordered_map< DescriptorId_t, RClusterGroupDescriptor > fClusterGroupDescriptors
std::unique_ptr< RNTupleModel > CreateModel(const RCreateModelOptions &options=RCreateModelOptions()) const
Re-create the C++ model from the stored meta-data.
DescriptorId_t FindLogicalColumnId(DescriptorId_t fieldId, std::uint32_t columnIndex, std::uint16_t representationIndex) const
DescriptorId_t FindNextClusterId(DescriptorId_t clusterId) const
DescriptorId_t FindPrevClusterId(DescriptorId_t clusterId) const
std::unordered_map< DescriptorId_t, RColumnDescriptor > fColumnDescriptors
std::unique_ptr< RNTupleDescriptor > Clone() const
RColumnDescriptorIterable GetColumnIterable() const
DescriptorId_t FindClusterId(DescriptorId_t physicalColumnId, NTupleSize_t index) const
RNTupleDescriptor(RNTupleDescriptor &&other)=default
std::uint64_t fNClusters
Updated by the descriptor builder when the cluster groups are added.
std::string fName
The ntuple name needs to be unique in a given storage location (file)
const RClusterDescriptor & GetClusterDescriptor(DescriptorId_t clusterId) const
RNTupleDescriptor(const RNTupleDescriptor &other)=delete
std::unordered_map< DescriptorId_t, RFieldDescriptor > fFieldDescriptors
DescriptorId_t GetFieldZeroId() const
Returns the logical parent of all top-level NTuple data fields.
DescriptorId_t FindPhysicalColumnId(DescriptorId_t fieldId, std::uint32_t columnIndex, std::uint16_t representationIndex) const
static constexpr unsigned int kFeatureFlagTest
RNTupleDescriptor & operator=(RNTupleDescriptor &&other)=default
std::uint64_t fOnDiskHeaderXxHash3
Set by the descriptor builder when deserialized.
NTupleSize_t GetNEntries() const
We know the number of entries from adding the cluster summaries.
bool operator==(const RNTupleDescriptor &other) const
std::string GetQualifiedFieldName(DescriptorId_t fieldId) const
Walks up the parents of the field ID and returns a field name of the form a.b.c.d In case of invalid ...
RResult< void > AddClusterGroupDetails(DescriptorId_t clusterGroupId, std::vector< RClusterDescriptor > &clusterDescs)
Methods to load and drop cluster group details (cluster IDs and page locations)
DescriptorId_t FindFieldId(std::string_view fieldName, DescriptorId_t parentId) const
const RColumnDescriptor & GetColumnDescriptor(DescriptorId_t columnId) const
RExtraTypeInfoDescriptorIterable GetExtraTypeInfoIterable() const
const RFieldDescriptor & GetFieldDescriptor(DescriptorId_t fieldId) const
RResult< void > DropClusterGroupDetails(DescriptorId_t clusterGroupId)
std::unique_ptr< RHeaderExtension > fHeaderExtension
const RClusterGroupDescriptor & GetClusterGroupDescriptor(DescriptorId_t clusterGroupId) const
RClusterGroupDescriptorIterable GetClusterGroupIterable() const
bool HasFeature(unsigned int flag) const
RNTupleDescriptor & operator=(const RNTupleDescriptor &other)=delete
const std::string & GetDescription() const
RClusterDescriptorIterable GetClusterIterable() const
std::string fDescription
Free text from the user.
RFieldDescriptorIterable GetTopLevelFields() const
DescriptorId_t fFieldZeroId
Set by the descriptor builder.
const RHeaderExtension * GetHeaderExtension() const
Return header extension information; if the descriptor does not have a header extension,...
std::uint64_t fOnDiskHeaderSize
Set by the descriptor builder when deserialized.
const RFieldDescriptor & GetFieldZero() const
void PrintInfo(std::ostream &output) const
RFieldDescriptorIterable GetFieldIterable(const RFieldDescriptor &fieldDesc) const
std::vector< std::uint64_t > GetFeatureFlags() const
The class is used as a return type for operations that can fail; wraps a value of type T or an RError...
Definition RError.hxx:194
const Int_t n
Definition legend1.C:16
std::uint64_t NTupleSize_t
Integer type long enough to hold the maximum number of entries in a column.
constexpr int kUnknownCompressionSettings
std::uint64_t DescriptorId_t
Distriniguishes elements of the same type within a descriptor, e.g. different fields.
constexpr NTupleSize_t kInvalidNTupleIndex
constexpr ClusterSize_t kInvalidClusterIndex(std::uint64_t(-1))
EExtraTypeInfoIds
Used in RExtraTypeInfoDescriptor.
ENTupleStructure
The fields in the ntuple model tree can carry different structural information about the type system.
constexpr DescriptorId_t kInvalidDescriptorId
tbb::task_arena is an alias of tbb::interface7::task_arena, which doesn't allow to forward declare tb...
The window of element indexes of a particular column in a particular cluster.
NTupleSize_t fFirstElementIndex
The global index of the first column element in the cluster.
bool fIsSuppressed
Suppressed columns have an empty page range and unknown compression settings.
int fCompressionSettings
The usual format for ROOT compression settings (see Compression.h).
ClusterSize_t fNElements
The number of column elements in the cluster.
ClusterSize_t::ValueType fFirstInPage
Index (in cluster) of the first element in page.
RPageInfoExtended(const RPageInfo &pi, ClusterSize_t::ValueType i, NTupleSize_t n)
NTupleSize_t fPageNo
Page number in the corresponding RPageRange.
We do not need to store the element size / uncompressed page size because we know to which column the...
std::uint32_t fNElements
The sum of the elements of all the pages must match the corresponding fNElements field in fColumnRang...
bool fHasChecksum
If true, the 8 bytes following the serialized page are an xxhash of the on-disk page data.
RNTupleLocator fLocator
The meaning of fLocator depends on the storage backend.
Wrap the integer in a struct in order to avoid template specialization clash with std::uint64_t.
bool fCreateBare
If true, the model will be created without a default entry (bare model).
bool fReconstructProjections
If set to true, projected fields will be reconstructed as such.
bool fForwardCompatible
Normally creating a model will fail if any of the reconstructed fields contains an unknown column typ...
Generic information about the physical location of data.
static void output()