Logo ROOT  
Reference Guide
 
Loading...
Searching...
No Matches
RNTupleDescriptor.hxx
Go to the documentation of this file.
1/// \file ROOT/RNTupleDescriptor.hxx
2/// \ingroup NTuple ROOT7
3/// \author Jakob Blomer <jblomer@cern.ch>
4/// \author Javier Lopez-Gomez <javier.lopez.gomez@cern.ch>
5/// \date 2018-07-19
6/// \warning This is part of the ROOT 7 prototype! It will change without notice. It might trigger earthquakes. Feedback
7/// is welcome!
8
9/*************************************************************************
10 * Copyright (C) 1995-2019, Rene Brun and Fons Rademakers. *
11 * All rights reserved. *
12 * *
13 * For the licensing terms see $ROOTSYS/LICENSE. *
14 * For the list of contributors see $ROOTSYS/README/CREDITS. *
15 *************************************************************************/
16
17#ifndef ROOT7_RNTupleDescriptor
18#define ROOT7_RNTupleDescriptor
19
20#include <ROOT/RError.hxx>
22#include <ROOT/RNTupleUtil.hxx>
23#include <ROOT/RSpan.hxx>
24
25#include <TError.h>
26
27#include <algorithm>
28#include <chrono>
29#include <cmath>
30#include <functional>
31#include <iterator>
32#include <map>
33#include <memory>
34#include <optional>
35#include <ostream>
36#include <vector>
37#include <set>
38#include <string>
39#include <string_view>
40#include <unordered_map>
41#include <unordered_set>
42
43namespace ROOT {
44namespace Experimental {
45
46class RFieldBase;
47class RNTupleDescriptor;
48class RNTupleModel;
49
50namespace Internal {
51class RColumnElementBase;
52} // namespace Internal
53
54namespace Internal {
55class RColumnDescriptorBuilder;
56class RClusterDescriptorBuilder;
57class RClusterGroupDescriptorBuilder;
58class RExtraTypeInfoDescriptorBuilder;
59class RFieldDescriptorBuilder;
60class RNTupleDescriptorBuilder;
61} // namespace Internal
62
63// clang-format off
64/**
65\class ROOT::Experimental::RFieldDescriptor
66\ingroup NTuple
67\brief Meta-data stored for every field of an ntuple
68*/
69// clang-format on
73
74private:
76 /// The version of the C++-type-to-column translation mechanics
77 std::uint32_t fFieldVersion = 0;
78 /// The version of the C++ type itself
79 std::uint32_t fTypeVersion = 0;
80 /// The leaf name, not including parent fields
81 std::string fFieldName;
82 /// Free text set by the user
83 std::string fFieldDescription;
84 /// The C++ type that was used when writing the field
85 std::string fTypeName;
86 /// A typedef or using directive that resolved to the type name during field creation
87 std::string fTypeAlias;
88 /// The number of elements per entry for fixed-size arrays
89 std::uint64_t fNRepetitions = 0;
90 /// The structural information carried by this field in the data model tree
92 /// Establishes sub field relationships, such as classes and collections
94 /// For projected fields, the source field ID
96 /// The pointers in the other direction from parent to children. They are serialized, too, to keep the
97 /// order of sub fields.
98 std::vector<DescriptorId_t> fLinkIds;
99 /// The number of columns in the column representations of the field. The column cardinality helps to navigate the
100 /// list of logical column ids. For example, the second column of the third column representation is
101 /// fLogicalColumnIds[2 * fColumnCardinality + 1]
102 std::uint32_t fColumnCardinality = 0;
103 /// The ordered list of columns attached to this field: first by representation index then by column index.
104 std::vector<DescriptorId_t> fLogicalColumnIds;
105 /// For custom classes, we store the ROOT TClass reported checksum to facilitate the use of I/O rules that
106 /// identify types by their checksum
107 std::optional<std::uint32_t> fTypeChecksum;
108
109public:
110 RFieldDescriptor() = default;
111 RFieldDescriptor(const RFieldDescriptor &other) = delete;
115
116 bool operator==(const RFieldDescriptor &other) const;
117 /// Get a copy of the descriptor
118 RFieldDescriptor Clone() const;
119 /// In general, we create a field simply from the C++ type name. For untyped fields, however, we potentially need
120 /// access to sub fields, which is provided by the ntuple descriptor argument.
121 std::unique_ptr<RFieldBase> CreateField(const RNTupleDescriptor &ntplDesc, bool continueOnError = false) const;
122
123 DescriptorId_t GetId() const { return fFieldId; }
124 std::uint32_t GetFieldVersion() const { return fFieldVersion; }
125 std::uint32_t GetTypeVersion() const { return fTypeVersion; }
126 const std::string &GetFieldName() const { return fFieldName; }
127 const std::string &GetFieldDescription() const { return fFieldDescription; }
128 const std::string &GetTypeName() const { return fTypeName; }
129 const std::string &GetTypeAlias() const { return fTypeAlias; }
130 std::uint64_t GetNRepetitions() const { return fNRepetitions; }
134 const std::vector<DescriptorId_t> &GetLinkIds() const { return fLinkIds; }
135 const std::vector<DescriptorId_t> &GetLogicalColumnIds() const { return fLogicalColumnIds; }
136 std::uint32_t GetColumnCardinality() const { return fColumnCardinality; }
137 std::optional<std::uint32_t> GetTypeChecksum() const { return fTypeChecksum; }
139 /// Tells if the field describes a user-defined class rather than a fundamental type, a collection, or one of the
140 /// natively supported stdlib classes.
141 /// The dictionary does not need to be available for this method.
142 bool IsCustomClass() const;
143};
144
145// clang-format off
146/**
147\class ROOT::Experimental::RColumnDescriptor
148\ingroup NTuple
149\brief Meta-data stored for every column of an ntuple
150*/
151// clang-format on
155
156public:
157 struct RValueRange {
158 double fMin = 0, fMax = 0;
159
160 RValueRange() = default;
161 RValueRange(double min, double max) : fMin(min), fMax(max) {}
162 RValueRange(std::pair<double, double> range) : fMin(range.first), fMax(range.second) {}
163
164 bool operator==(RValueRange other) const { return fMin == other.fMin && fMax == other.fMax; }
165 bool operator!=(RValueRange other) const { return !(*this == other); }
166 };
167
168private:
169 /// The actual column identifier, which is the link to the corresponding field
171 /// Usually identical to the logical column ID, except for alias columns where it references the shadowed column
173 /// Every column belongs to one and only one field
175 /// The absolute value specifies the index for the first stored element for this column.
176 /// For deferred columns the absolute value is larger than zero.
177 /// Negative values specify a suppressed and deferred column.
178 std::int64_t fFirstElementIndex = 0U;
179 /// A field can be serialized into several columns, which are numbered from zero to $n$
180 std::uint32_t fIndex = 0;
181 /// A field may use multiple column representations, which are numbered from zero to $m$.
182 /// Every representation has the same number of columns.
183 std::uint16_t fRepresentationIndex = 0;
184 /// The size in bits of elements of this column. Most columns have the size fixed by their type
185 /// but low-precision float columns have variable bit widths.
186 std::uint16_t fBitsOnStorage = 0;
187 /// The on-disk column type
189 /// Optional value range (used e.g. by quantized real fields)
190 std::optional<RValueRange> fValueRange;
191
192public:
193 RColumnDescriptor() = default;
194 RColumnDescriptor(const RColumnDescriptor &other) = delete;
198
199 bool operator==(const RColumnDescriptor &other) const;
200 /// Get a copy of the descriptor
201 RColumnDescriptor Clone() const;
202
206 std::uint32_t GetIndex() const { return fIndex; }
207 std::uint16_t GetRepresentationIndex() const { return fRepresentationIndex; }
208 std::uint64_t GetFirstElementIndex() const { return std::abs(fFirstElementIndex); }
209 std::uint16_t GetBitsOnStorage() const { return fBitsOnStorage; }
210 EColumnType GetType() const { return fType; }
211 std::optional<RValueRange> GetValueRange() const { return fValueRange; }
213 bool IsDeferredColumn() const { return fFirstElementIndex != 0; }
215};
216
217// clang-format off
218/**
219\class ROOT::Experimental::RClusterDescriptor
220\ingroup NTuple
221\brief Meta-data for a set of ntuple clusters
222
223The cluster descriptor is built in two phases. In a first phase, the descriptor has only an ID.
224In a second phase, the event range, column group, page locations and column ranges are added.
225Both phases are populated by the RClusterDescriptorBuilder.
226Clusters usually span across all available columns but in some cases they can describe only a subset of the columns,
227for instance when describing friend ntuples.
228*/
229// clang-format on
232
233public:
234 /// The window of element indexes of a particular column in a particular cluster
237 /// The global index of the first column element in the cluster
239 /// The number of column elements in the cluster
241 /// The usual format for ROOT compression settings (see Compression.h).
242 /// The pages of a particular column in a particular cluster are all compressed with the same settings.
244 /// Suppressed columns have an empty page range and unknown compression settings.
245 /// Their element index range, however, is aligned with the corresponding column of the
246 /// primary column representation (see Section "Suppressed Columns" in the specification)
247 bool fIsSuppressed = false;
248
249 // TODO(jblomer): we perhaps want to store summary information, such as average, min/max, etc.
250 // Should this be done on the field level?
251
252 bool operator==(const RColumnRange &other) const
253 {
257 }
258
260 {
262 }
263 };
264
265 // clang-format off
266 /**
267 \class ROOT::Experimental::RClusterDescriptor::RPageRange
268 \ingroup NTuple
269 \brief Records the partition of data into pages for a particular column in a particular cluster
270 */
271 // clang-format on
274 /// Extend this RPageRange to fit the given RColumnRange, i.e. prepend as many synthetic RPageInfos as needed to
275 /// cover the range in `columnRange`. `RPageInfo`s are constructed to contain as many elements of type `element`
276 /// given a page size limit of `pageSize` (in bytes); the locator for the referenced pages is `kTypePageZero`.
277 /// This function is used to make up `RPageRange`s for clusters that contain deferred columns.
278 /// \return The number of column elements covered by the synthesized RPageInfos
279 std::size_t ExtendToFitColumnRange(const RColumnRange &columnRange, const Internal::RColumnElementBase &element,
280 std::size_t pageSize);
281
282 /// Has the same length than fPageInfos and stores the sum of the number of elements of all the pages
283 /// up to and including a given index. Used for binary search in Find().
284 std::vector<NTupleSize_t> fCumulativeNElements;
285
286 public:
287 /// We do not need to store the element size / uncompressed page size because we know to which column
288 /// the page belongs
289 struct RPageInfo {
290 /// The sum of the elements of all the pages must match the corresponding fNElements field in fColumnRanges
291 std::uint32_t fNElements = std::uint32_t(-1);
292 /// The meaning of fLocator depends on the storage backend.
294 /// If true, the 8 bytes following the serialized page are an xxhash of the on-disk page data
295 bool fHasChecksum = false;
296
297 bool operator==(const RPageInfo &other) const
298 {
299 return fNElements == other.fNElements && fLocator == other.fLocator;
300 }
301 };
303 /// Index (in cluster) of the first element in page.
305 /// Page number in the corresponding RPageRange.
307
308 RPageInfoExtended() = default;
310 : RPageInfo(pi), fFirstInPage(i), fPageNo(n)
311 {
312 }
313 };
314
315 RPageRange() = default;
316 RPageRange(const RPageRange &other) = delete;
317 RPageRange &operator=(const RPageRange &other) = delete;
318 RPageRange(RPageRange &&other) = default;
319 RPageRange &operator=(RPageRange &&other) = default;
320
322 {
323 RPageRange clone;
325 clone.fPageInfos = fPageInfos;
327 return clone;
328 }
329
330 /// Find the page in the RPageRange that contains the given element. The element must exist.
331 RPageInfoExtended Find(NTupleSize_t idxInCluster) const;
332
334 std::vector<RPageInfo> fPageInfos;
335
336 bool operator==(const RPageRange &other) const
337 {
338 return fPhysicalColumnId == other.fPhysicalColumnId && fPageInfos == other.fPageInfos;
339 }
340 };
341
342private:
344 /// Clusters can be swapped by adjusting the entry offsets
346 // TODO(jblomer): change to std::uint64_t
348
349 std::unordered_map<DescriptorId_t, RColumnRange> fColumnRanges;
350 std::unordered_map<DescriptorId_t, RPageRange> fPageRanges;
351
352public:
354
360
362
363 bool operator==(const RClusterDescriptor &other) const;
364
365 DescriptorId_t GetId() const { return fClusterId; }
368 const RColumnRange &GetColumnRange(DescriptorId_t physicalId) const { return fColumnRanges.at(physicalId); }
369 const RPageRange &GetPageRange(DescriptorId_t physicalId) const { return fPageRanges.at(physicalId); }
370 /// Returns an iterator over pairs { columnId, columnRange }. The iteration order is unspecified.
371 RColumnRangeIterable GetColumnRangeIterable() const;
372 bool ContainsColumn(DescriptorId_t physicalId) const
373 {
374 return fColumnRanges.find(physicalId) != fColumnRanges.end();
375 }
376 std::uint64_t GetNBytesOnStorage() const;
377};
378
380private:
382
383public:
384 class RIterator {
385 private:
386 using Iter_t = std::unordered_map<DescriptorId_t, RColumnRange>::const_iterator;
387 /// The wrapped map iterator
389
390 public:
391 using iterator_category = std::forward_iterator_tag;
394 using difference_type = std::ptrdiff_t;
395 using pointer = const RColumnRange *;
396 using reference = const RColumnRange &;
397
398 RIterator(Iter_t iter) : fIter(iter) {}
400 {
401 ++fIter;
402 return *this;
403 }
404 reference operator*() { return fIter->second; }
405 pointer operator->() { return &fIter->second; }
406 bool operator!=(const iterator &rh) const { return fIter != rh.fIter; }
407 bool operator==(const iterator &rh) const { return fIter == rh.fIter; }
408 };
409
410 explicit RColumnRangeIterable(const RClusterDescriptor &desc) : fDesc(desc) {}
411
413 RIterator end() { return fDesc.fColumnRanges.cend(); }
414 size_t size() { return fDesc.fColumnRanges.size(); }
415};
416
417// clang-format off
418/**
419\class ROOT::Experimental::RClusterGroupDescriptor
420\ingroup NTuple
421\brief Clusters are bundled in cluster groups.
422
423Very large ntuples or combined ntuples (chains, friends) contain multiple cluster groups. The cluster groups
424may contain sharded clusters.
425Every ntuple has at least one cluster group. The clusters in a cluster group are ordered corresponding to
426the order of page locations in the page list envelope that belongs to the cluster group (see format specification)
427*/
428// clang-format on
431
432private:
434 /// The cluster IDs can be empty if the corresponding page list is not loaded.
435 /// Otherwise, cluster ids are sorted by first entry number.
436 std::vector<DescriptorId_t> fClusterIds;
437 /// The page list that corresponds to the cluster group
439 /// Uncompressed size of the page list
440 std::uint64_t fPageListLength = 0;
441 /// The minimum first entry number of the clusters in the cluster group
442 std::uint64_t fMinEntry = 0;
443 /// Number of entries that are (partially for sharded clusters) covered by this cluster group.
444 std::uint64_t fEntrySpan = 0;
445 /// Number of clusters is always known even if the cluster IDs are not (yet) populated
446 std::uint32_t fNClusters = 0;
447
448public:
454
456 // Creates a clone without the cluster IDs
458
459 bool operator==(const RClusterGroupDescriptor &other) const;
460
462 std::uint32_t GetNClusters() const { return fNClusters; }
464 std::uint64_t GetPageListLength() const { return fPageListLength; }
465 const std::vector<DescriptorId_t> &GetClusterIds() const { return fClusterIds; }
466 std::uint64_t GetMinEntry() const { return fMinEntry; }
467 std::uint64_t GetEntrySpan() const { return fEntrySpan; }
468 /// A cluster group is loaded in two stages. Stage one loads only the summary information.
469 /// Stage two loads the list of cluster IDs.
470 bool HasClusterDetails() const { return !fClusterIds.empty(); }
471};
472
473/// Used in RExtraTypeInfoDescriptor
475
476// clang-format off
477/**
478\class ROOT::Experimental::RExtraTypeInfoDescriptor
479\ingroup NTuple
480\brief Field specific extra type information from the header / extenstion header
481
482Currently only used by streamer fields to store RNTuple-wide list of streamer info records.
483*/
484// clang-format on
487
488private:
489 /// Specifies the meaning of the extra information
491 /// Type version the extra type information is bound to
492 std::uint32_t fTypeVersion = 0;
493 /// The type name the extra information refers to; empty for RNTuple-wide extra information
494 std::string fTypeName;
495 /// The content format depends on the content ID and may be binary
496 std::string fContent;
497
498public:
504
505 bool operator==(const RExtraTypeInfoDescriptor &other) const;
506
508
510 std::uint32_t GetTypeVersion() const { return fTypeVersion; }
511 const std::string &GetTypeName() const { return fTypeName; }
512 const std::string &GetContent() const { return fContent; }
513};
514
515// clang-format off
516/**
517\class ROOT::Experimental::RNTupleDescriptor
518\ingroup NTuple
519\brief The on-storage meta-data of an ntuple
520
521Represents the on-disk (on storage) information about an ntuple. The meta-data consists of a header and one or
522several footers. The header carries the ntuple schema, i.e. the fields and the associated columns and their
523relationships. The footer(s) carry information about one or several clusters. For every cluster, a footer stores
524its location and size, and for every column the range of element indexes as well as a list of pages and page
525locations.
526
527The descriptor provide machine-independent (de-)serialization of headers and footers, and it provides lookup routines
528for ntuple objects (pages, clusters, ...). It is supposed to be usable by all RPageStorage implementations.
529
530The serialization does not use standard ROOT streamers in order to not let it depend on libCore. The serialization uses
531the concept of frames: header, footer, and substructures have a preamble with version numbers and the size of the
532writte struct. This allows for forward and backward compatibility when the meta-data evolves.
533*/
534// clang-format on
537
538public:
539 class RHeaderExtension;
540
541private:
542 /// The ntuple name needs to be unique in a given storage location (file)
543 std::string fName;
544 /// Free text from the user
545 std::string fDescription;
546
547 std::uint64_t fOnDiskHeaderXxHash3 = 0; ///< Set by the descriptor builder when deserialized
548 std::uint64_t fOnDiskHeaderSize = 0; ///< Set by the descriptor builder when deserialized
549 std::uint64_t fOnDiskFooterSize = 0; ///< Like fOnDiskHeaderSize, contains both cluster summaries and page locations
550
551 std::uint64_t fNEntries = 0; ///< Updated by the descriptor builder when the cluster groups are added
552 std::uint64_t fNClusters = 0; ///< Updated by the descriptor builder when the cluster groups are added
553 std::uint64_t fNPhysicalColumns = 0; ///< Updated by the descriptor builder when columns are added
554
555 DescriptorId_t fFieldZeroId = kInvalidDescriptorId; ///< Set by the descriptor builder
556
557 /**
558 * Once constructed by an RNTupleDescriptorBuilder, the descriptor is mostly immutable except for set of
559 * active the page locations. During the lifetime of the descriptor, page location information for clusters
560 * can be added or removed. When this happens, the generation should be increased, so that users of the
561 * descriptor know that the information changed. The generation is increased, e.g., by the page source's
562 * exclusive lock guard around the descriptor. It is used, e.g., by the descriptor cache in RNTupleReader.
563 */
564 std::uint64_t fGeneration = 0;
565
566 std::set<unsigned int> fFeatureFlags;
567 std::unordered_map<DescriptorId_t, RFieldDescriptor> fFieldDescriptors;
568 std::unordered_map<DescriptorId_t, RColumnDescriptor> fColumnDescriptors;
569 std::unordered_map<DescriptorId_t, RClusterGroupDescriptor> fClusterGroupDescriptors;
570 /// References cluster groups sorted by entry range and thus allows for binary search.
571 /// Note that this list is empty during the descriptor building process and will only be
572 /// created when the final descriptor is extracted from the builder.
573 std::vector<DescriptorId_t> fSortedClusterGroupIds;
574 /// May contain only a subset of all the available clusters, e.g. the clusters of the current file
575 /// from a chain of files
576 std::unordered_map<DescriptorId_t, RClusterDescriptor> fClusterDescriptors;
577 std::vector<RExtraTypeInfoDescriptor> fExtraTypeInfoDescriptors;
578 std::unique_ptr<RHeaderExtension> fHeaderExtension;
579
580 // We don't expose this publicly because when we add sharded clusters, this interface does not make sense anymore
582
583public:
584 static constexpr unsigned int kFeatureFlagTest = 137; // Bit reserved for forward-compatibility testing
585
591
592 /// Modifiers passed to `CreateModel`
594 RCreateModelOptions() {} // Work around compiler bug, see https://gcc.gnu.org/bugzilla/show_bug.cgi?id=88165
595 /// If set to true, projected fields will be reconstructed as such. This will prevent the model to be used
596 /// with an RNTupleReader, but it is useful, e.g., to accurately merge data.
598 /// Normally creating a model will fail if any of the reconstructed fields contains an unknown column type.
599 /// If this option is enabled, the model will be created and all fields containing unknown data (directly
600 /// or indirectly) will be skipped instead.
601 bool fForwardCompatible = false;
602 /// If true, the model will be created without a default entry (bare model).
603 bool fCreateBare = false;
604 };
605
606 RNTupleDescriptor() = default;
607 RNTupleDescriptor(const RNTupleDescriptor &other) = delete;
611
612 std::unique_ptr<RNTupleDescriptor> Clone() const;
613
614 bool operator==(const RNTupleDescriptor &other) const;
615
616 std::uint64_t GetOnDiskHeaderXxHash3() const { return fOnDiskHeaderXxHash3; }
617 std::uint64_t GetOnDiskHeaderSize() const { return fOnDiskHeaderSize; }
618 std::uint64_t GetOnDiskFooterSize() const { return fOnDiskFooterSize; }
619
620 const RFieldDescriptor &GetFieldDescriptor(DescriptorId_t fieldId) const { return fFieldDescriptors.at(fieldId); }
622 {
623 return fColumnDescriptors.at(columnId);
624 }
626 {
627 return fClusterGroupDescriptors.at(clusterGroupId);
628 }
630 {
631 return fClusterDescriptors.at(clusterId);
632 }
633
634 RFieldDescriptorIterable GetFieldIterable(const RFieldDescriptor &fieldDesc) const;
635 RFieldDescriptorIterable
636 GetFieldIterable(const RFieldDescriptor &fieldDesc,
637 const std::function<bool(DescriptorId_t, DescriptorId_t)> &comparator) const;
638 RFieldDescriptorIterable GetFieldIterable(DescriptorId_t fieldId) const;
639 RFieldDescriptorIterable
641 const std::function<bool(DescriptorId_t, DescriptorId_t)> &comparator) const;
642
643 RFieldDescriptorIterable GetTopLevelFields() const;
644 RFieldDescriptorIterable
645 GetTopLevelFields(const std::function<bool(DescriptorId_t, DescriptorId_t)> &comparator) const;
646
647 RColumnDescriptorIterable GetColumnIterable() const;
648 RColumnDescriptorIterable GetColumnIterable(const RFieldDescriptor &fieldDesc) const;
649 RColumnDescriptorIterable GetColumnIterable(DescriptorId_t fieldId) const;
650
651 RClusterGroupDescriptorIterable GetClusterGroupIterable() const;
652
653 RClusterDescriptorIterable GetClusterIterable() const;
654
655 RExtraTypeInfoDescriptorIterable GetExtraTypeInfoIterable() const;
656
657 const std::string &GetName() const { return fName; }
658 const std::string &GetDescription() const { return fDescription; }
659
660 std::size_t GetNFields() const { return fFieldDescriptors.size(); }
661 std::size_t GetNLogicalColumns() const { return fColumnDescriptors.size(); }
662 std::size_t GetNPhysicalColumns() const { return fNPhysicalColumns; }
663 std::size_t GetNClusterGroups() const { return fClusterGroupDescriptors.size(); }
664 std::size_t GetNClusters() const { return fNClusters; }
665 std::size_t GetNActiveClusters() const { return fClusterDescriptors.size(); }
666 std::size_t GetNExtraTypeInfos() const { return fExtraTypeInfoDescriptors.size(); }
667
668 /// We know the number of entries from adding the cluster summaries
670 NTupleSize_t GetNElements(DescriptorId_t physicalColumnId) const;
671
672 /// Returns the logical parent of all top-level NTuple data fields.
675 DescriptorId_t FindFieldId(std::string_view fieldName, DescriptorId_t parentId) const;
676 /// Searches for a top-level field
677 DescriptorId_t FindFieldId(std::string_view fieldName) const;
679 FindLogicalColumnId(DescriptorId_t fieldId, std::uint32_t columnIndex, std::uint16_t representationIndex) const;
681 FindPhysicalColumnId(DescriptorId_t fieldId, std::uint32_t columnIndex, std::uint16_t representationIndex) const;
685
686 /// Walks up the parents of the field ID and returns a field name of the form a.b.c.d
687 /// In case of invalid field ID, an empty string is returned.
688 std::string GetQualifiedFieldName(DescriptorId_t fieldId) const;
689
690 bool HasFeature(unsigned int flag) const { return fFeatureFlags.count(flag) > 0; }
691 std::vector<std::uint64_t> GetFeatureFlags() const;
692
693 /// Return header extension information; if the descriptor does not have a header extension, return `nullptr`
694 const RHeaderExtension *GetHeaderExtension() const { return fHeaderExtension.get(); }
695
696 /// Methods to load and drop cluster group details (cluster IDs and page locations)
697 RResult<void> AddClusterGroupDetails(DescriptorId_t clusterGroupId, std::vector<RClusterDescriptor> &clusterDescs);
699
700 std::uint64_t GetGeneration() const { return fGeneration; }
702
703 /// Re-create the C++ model from the stored meta-data
704 std::unique_ptr<RNTupleModel> CreateModel(const RCreateModelOptions &options = RCreateModelOptions()) const;
705 void PrintInfo(std::ostream &output) const;
706};
707
708// clang-format off
709/**
710\class ROOT::Experimental::RNTupleDescriptor::RColumnDescriptorIterable
711\ingroup NTuple
712\brief Used to loop over a field's associated columns
713*/
714// clang-format on
716private:
717 /// The associated NTuple for this range.
719 /// The descriptor ids of the columns ordered by field, representation, and column index
720 std::vector<DescriptorId_t> fColumns = {};
721
722public:
723 class RIterator {
724 private:
725 /// The enclosing range's NTuple.
727 /// The enclosing range's descriptor id list.
728 const std::vector<DescriptorId_t> &fColumns;
729 std::size_t fIndex = 0;
730
731 public:
732 using iterator_category = std::forward_iterator_tag;
735 using difference_type = std::ptrdiff_t;
736 using pointer = const RColumnDescriptor *;
738
739 RIterator(const RNTupleDescriptor &ntuple, const std::vector<DescriptorId_t> &columns, std::size_t index)
740 : fNTuple(ntuple), fColumns(columns), fIndex(index)
741 {
742 }
744 {
745 ++fIndex;
746 return *this;
747 }
750 bool operator!=(const iterator &rh) const { return fIndex != rh.fIndex; }
751 bool operator==(const iterator &rh) const { return fIndex == rh.fIndex; }
752 };
753
754 RColumnDescriptorIterable(const RNTupleDescriptor &ntuple, const RFieldDescriptor &fieldDesc);
756
759 size_t size() { return fColumns.size(); }
760};
761
762// clang-format off
763/**
764\class ROOT::Experimental::RNTupleDescriptor::RFieldDescriptorIterable
765\ingroup NTuple
766\brief Used to loop over a field's child fields
767*/
768// clang-format on
770private:
771 /// The associated NTuple for this range.
773 /// The descriptor ids of the child fields. These may be sorted using
774 /// a comparison function.
775 std::vector<DescriptorId_t> fFieldChildren = {};
776
777public:
778 class RIterator {
779 private:
780 /// The enclosing range's NTuple.
782 /// The enclosing range's descriptor id list.
783 const std::vector<DescriptorId_t> &fFieldChildren;
784 std::size_t fIndex = 0;
785
786 public:
787 using iterator_category = std::forward_iterator_tag;
790 using difference_type = std::ptrdiff_t;
793
794 RIterator(const RNTupleDescriptor &ntuple, const std::vector<DescriptorId_t> &fieldChildren, std::size_t index)
795 : fNTuple(ntuple), fFieldChildren(fieldChildren), fIndex(index)
796 {
797 }
799 {
800 ++fIndex;
801 return *this;
802 }
804 bool operator!=(const iterator &rh) const { return fIndex != rh.fIndex; }
805 bool operator==(const iterator &rh) const { return fIndex == rh.fIndex; }
806 };
808 : fNTuple(ntuple), fFieldChildren(field.GetLinkIds())
809 {
810 }
811 /// Sort the range using an arbitrary comparison function.
813 const std::function<bool(DescriptorId_t, DescriptorId_t)> &comparator)
814 : fNTuple(ntuple), fFieldChildren(field.GetLinkIds())
815 {
816 std::sort(fFieldChildren.begin(), fFieldChildren.end(), comparator);
817 }
820};
821
822// clang-format off
823/**
824\class ROOT::Experimental::RNTupleDescriptor::RClusterGroupDescriptorIterable
825\ingroup NTuple
826\brief Used to loop over all the cluster groups of an ntuple (in unspecified order)
827
828Enumerate all cluster group IDs from the cluster group descriptor. No specific order can be assumed, use
829FindNextClusterGroupId and FindPrevClusterGroupId to traverse clusters groups by entry number.
830*/
831// clang-format on
833private:
834 /// The associated NTuple for this range.
836
837public:
838 class RIterator {
839 private:
840 /// The enclosing range's NTuple.
842 std::size_t fIndex = 0;
843
844 public:
845 using iterator_category = std::forward_iterator_tag;
848 using difference_type = std::ptrdiff_t;
851
852 RIterator(const RNTupleDescriptor &ntuple, std::size_t index) : fNTuple(ntuple), fIndex(index) {}
854 {
855 ++fIndex;
856 return *this;
857 }
859 {
860 auto it = fNTuple.fClusterGroupDescriptors.begin();
861 std::advance(it, fIndex);
862 return it->second;
863 }
864 bool operator!=(const iterator &rh) const { return fIndex != rh.fIndex; }
865 bool operator==(const iterator &rh) const { return fIndex == rh.fIndex; }
866 };
867
871};
872
873// clang-format off
874/**
875\class ROOT::Experimental::RNTupleDescriptor::RClusterDescriptorIterable
876\ingroup NTuple
877\brief Used to loop over all the clusters of an ntuple (in unspecified order)
878
879Enumerate all cluster IDs from the cluster descriptor. No specific order can be assumed, use
880FindNextClusterId and FindPrevClusterId to travers clusters by entry number.
881*/
882// clang-format on
884private:
885 /// The associated NTuple for this range.
887
888public:
889 class RIterator {
890 private:
891 /// The enclosing range's NTuple.
893 std::size_t fIndex = 0;
894
895 public:
896 using iterator_category = std::forward_iterator_tag;
899 using difference_type = std::ptrdiff_t;
902
903 RIterator(const RNTupleDescriptor &ntuple, std::size_t index) : fNTuple(ntuple), fIndex(index) {}
905 {
906 ++fIndex;
907 return *this;
908 }
910 {
911 auto it = fNTuple.fClusterDescriptors.begin();
912 std::advance(it, fIndex);
913 return it->second;
914 }
915 bool operator!=(const iterator &rh) const { return fIndex != rh.fIndex; }
916 bool operator==(const iterator &rh) const { return fIndex == rh.fIndex; }
917 };
918
922};
923
924// clang-format off
925/**
926\class ROOT::Experimental::RNTupleDescriptor::RExtraTypeInfoDescriptorIterable
927\ingroup NTuple
928\brief Used to loop over all the extra type info record of an ntuple (in unspecified order)
929*/
930// clang-format on
932private:
933 /// The associated NTuple for this range.
935
936public:
937 class RIterator {
938 private:
939 /// The enclosing range's NTuple.
941 std::size_t fIndex = 0;
942
943 public:
944 using iterator_category = std::forward_iterator_tag;
947 using difference_type = std::ptrdiff_t;
950
951 RIterator(const RNTupleDescriptor &ntuple, std::size_t index) : fNTuple(ntuple), fIndex(index) {}
953 {
954 ++fIndex;
955 return *this;
956 }
958 {
959 auto it = fNTuple.fExtraTypeInfoDescriptors.begin();
960 std::advance(it, fIndex);
961 return *it;
962 }
963 bool operator!=(const iterator &rh) const { return fIndex != rh.fIndex; }
964 bool operator==(const iterator &rh) const { return fIndex == rh.fIndex; }
965 };
966
970};
971
972// clang-format off
973/**
974\class ROOT::Experimental::RNTupleDescriptor::RHeaderExtension
975\ingroup NTuple
976\brief Summarizes information about fields and the corresponding columns that were added after the header has been serialized
977*/
978// clang-format on
981
982private:
983 /// All field IDs of late model extensions, in the order of field addition. This is necessary to serialize the
984 /// the fields in that order.
985 std::vector<DescriptorId_t> fFieldIdsOrder;
986 /// All field IDs of late model extensions for efficient lookup. When a column gets added to the extension
987 /// header, this enables us to determine if the column belongs to a field of the header extension of if it
988 /// belongs to a field of the regular header that gets extended by additional column representations.
989 std::unordered_set<DescriptorId_t> fFieldIdsLookup;
990 /// All logical column IDs of columns that extend, with additional column representations, fields of the regular
991 /// header. During serialization, these columns are not picked up as columns of `fFieldIdsOrder`. But instead
992 /// these columns need to be serialized in the extension header without re-serializing the field.
993 std::vector<DescriptorId_t> fExtendedColumnRepresentations;
994 /// Number of logical and physical columns; updated by the descriptor builder when columns are added
995 std::uint32_t fNLogicalColumns = 0;
996 std::uint32_t fNPhysicalColumns = 0;
997
998 void AddExtendedField(const RFieldDescriptor &fieldDesc)
999 {
1000 fFieldIdsOrder.emplace_back(fieldDesc.GetId());
1001 fFieldIdsLookup.insert(fieldDesc.GetId());
1002 }
1003
1004 void AddExtendedColumn(const RColumnDescriptor &columnDesc)
1005 {
1007 if (!columnDesc.IsAliasColumn())
1009 if (fFieldIdsLookup.count(columnDesc.GetFieldId()) == 0) {
1010 fExtendedColumnRepresentations.emplace_back(columnDesc.GetLogicalId());
1011 }
1012 }
1013
1014public:
1015 std::size_t GetNFields() const { return fFieldIdsOrder.size(); }
1016 std::size_t GetNLogicalColumns() const { return fNLogicalColumns; }
1017 std::size_t GetNPhysicalColumns() const { return fNPhysicalColumns; }
1018 const std::vector<DescriptorId_t> &GetExtendedColumnRepresentations() const
1019 {
1021 }
1022 /// Return a vector containing the IDs of the top-level fields defined in the extension header, in the order
1023 /// of their addition.
1024 /// We cannot create this vector when building the fFields because at the time when AddExtendedField is called,
1025 /// the field is not yet linked into the schema tree.
1026 std::vector<DescriptorId_t> GetTopLevelFields(const RNTupleDescriptor &desc) const;
1027};
1028
1029namespace Internal {
1030
1031// clang-format off
1032/**
1033\class ROOT::Experimental::Internal::RColumnDescriptorBuilder
1034\ingroup NTuple
1035\brief A helper class for piece-wise construction of an RColumnDescriptor
1036
1037Dangling column descriptors can become actual descriptors when added to an
1038RNTupleDescriptorBuilder instance and then linked to their fields.
1039*/
1040// clang-format on
1042private:
1044
1045public:
1046 /// Make an empty column descriptor builder.
1048
1050 {
1051 fColumn.fLogicalColumnId = logicalColumnId;
1052 return *this;
1053 }
1055 {
1056 fColumn.fPhysicalColumnId = physicalColumnId;
1057 return *this;
1058 }
1059 RColumnDescriptorBuilder &BitsOnStorage(std::uint16_t bitsOnStorage)
1060 {
1061 fColumn.fBitsOnStorage = bitsOnStorage;
1062 return *this;
1063 }
1065 {
1066 fColumn.fType = type;
1067 return *this;
1068 }
1070 {
1071 fColumn.fFieldId = fieldId;
1072 return *this;
1073 }
1075 {
1077 return *this;
1078 }
1079 RColumnDescriptorBuilder &FirstElementIndex(std::uint64_t firstElementIdx)
1080 {
1081 fColumn.fFirstElementIndex = firstElementIdx;
1082 return *this;
1083 }
1085 {
1089 return *this;
1090 }
1091 RColumnDescriptorBuilder &RepresentationIndex(std::uint16_t representationIndex)
1092 {
1093 fColumn.fRepresentationIndex = representationIndex;
1094 return *this;
1095 }
1096 RColumnDescriptorBuilder &ValueRange(double min, double max)
1097 {
1098 fColumn.fValueRange = {min, max};
1099 return *this;
1100 }
1101 RColumnDescriptorBuilder &ValueRange(std::optional<RColumnDescriptor::RValueRange> valueRange)
1102 {
1103 fColumn.fValueRange = valueRange;
1104 return *this;
1105 }
1108 /// Attempt to make a column descriptor. This may fail if the column
1109 /// was not given enough information to make a proper descriptor.
1111};
1112
1113// clang-format off
1114/**
1115\class ROOT::Experimental::Internal::RFieldDescriptorBuilder
1116\ingroup NTuple
1117\brief A helper class for piece-wise construction of an RFieldDescriptor
1118
1119Dangling field descriptors describe a single field in isolation. They are
1120missing the necessary relationship information (parent field, any child fields)
1121required to describe a real NTuple field.
1122
1123Dangling field descriptors can only become actual descriptors when added to an
1124RNTupleDescriptorBuilder instance and then linked to other fields.
1125*/
1126// clang-format on
1128private:
1130
1131public:
1132 /// Make an empty dangling field descriptor.
1134 /// Make a new RFieldDescriptorBuilder based off an existing descriptor.
1135 /// Relationship information is lost during the conversion to a
1136 /// dangling descriptor:
1137 /// * Parent id is reset to an invalid id.
1138 /// * Field children ids are forgotten.
1139 ///
1140 /// These properties must be set using RNTupleDescriptorBuilder::AddFieldLink().
1141 explicit RFieldDescriptorBuilder(const RFieldDescriptor &fieldDesc);
1142
1143 /// Make a new RFieldDescriptorBuilder based off a live NTuple field.
1144 static RFieldDescriptorBuilder FromField(const RFieldBase &field);
1145
1147 {
1148 fField.fFieldId = fieldId;
1149 return *this;
1150 }
1151 RFieldDescriptorBuilder &FieldVersion(std::uint32_t fieldVersion)
1152 {
1153 fField.fFieldVersion = fieldVersion;
1154 return *this;
1155 }
1156 RFieldDescriptorBuilder &TypeVersion(std::uint32_t typeVersion)
1157 {
1158 fField.fTypeVersion = typeVersion;
1159 return *this;
1160 }
1162 {
1164 return *this;
1165 }
1167 {
1169 return *this;
1170 }
1171 RFieldDescriptorBuilder &FieldName(const std::string &fieldName)
1172 {
1173 fField.fFieldName = fieldName;
1174 return *this;
1175 }
1176 RFieldDescriptorBuilder &FieldDescription(const std::string &fieldDescription)
1177 {
1178 fField.fFieldDescription = fieldDescription;
1179 return *this;
1180 }
1181 RFieldDescriptorBuilder &TypeName(const std::string &typeName)
1182 {
1183 fField.fTypeName = typeName;
1184 return *this;
1185 }
1186 RFieldDescriptorBuilder &TypeAlias(const std::string &typeAlias)
1187 {
1188 fField.fTypeAlias = typeAlias;
1189 return *this;
1190 }
1191 RFieldDescriptorBuilder &NRepetitions(std::uint64_t nRepetitions)
1192 {
1193 fField.fNRepetitions = nRepetitions;
1194 return *this;
1195 }
1197 {
1198 fField.fStructure = structure;
1199 return *this;
1200 }
1201 RFieldDescriptorBuilder &TypeChecksum(const std::optional<std::uint32_t> typeChecksum)
1202 {
1203 fField.fTypeChecksum = typeChecksum;
1204 return *this;
1205 }
1207 /// Attempt to make a field descriptor. This may fail if the dangling field
1208 /// was not given enough information to make a proper descriptor.
1210};
1211
1212// clang-format off
1213/**
1214\class ROOT::Experimental::Internal::RClusterDescriptorBuilder
1215\ingroup NTuple
1216\brief A helper class for piece-wise construction of an RClusterDescriptor
1217
1218The cluster descriptor builder starts from a summary-only cluster descriptor and allows for the
1219piecewise addition of page locations.
1220*/
1221// clang-format on
1223private:
1225
1226public:
1228 {
1229 fCluster.fClusterId = clusterId;
1230 return *this;
1231 }
1232
1233 RClusterDescriptorBuilder &FirstEntryIndex(std::uint64_t firstEntryIndex)
1234 {
1235 fCluster.fFirstEntryIndex = firstEntryIndex;
1236 return *this;
1237 }
1238
1239 RClusterDescriptorBuilder &NEntries(std::uint64_t nEntries)
1240 {
1241 fCluster.fNEntries = nEntries;
1242 return *this;
1243 }
1244
1245 RResult<void> CommitColumnRange(DescriptorId_t physicalId, std::uint64_t firstElementIndex,
1246 std::uint32_t compressionSettings, const RClusterDescriptor::RPageRange &pageRange);
1247
1248 /// Books the given column ID as being suppressed in this cluster. The correct first element index and number of
1249 /// elements need to be set by CommitSuppressedColumnRanges() once all the calls to CommitColumnRange() and
1250 /// MarkSuppressedColumnRange() took place.
1252
1253 /// Sets the first element index and number of elements for all the suppressed column ranges.
1254 /// The information is taken from the corresponding columns from the primary representation.
1255 /// Needs to be called when all the columns (suppressed and regular) where added.
1257
1258 /// Add column and page ranges for columns created during late model extension missing in this cluster. The locator
1259 /// type for the synthesized page ranges is `kTypePageZero`. All the page sources must be able to populate the
1260 /// 'zero' page from such locator. Any call to `CommitColumnRange()` and `CommitSuppressedColumnRanges()`
1261 /// should happen before calling this function.
1263
1265 {
1266 return fCluster.GetColumnRange(physicalId);
1267 }
1268
1269 /// Move out the full cluster descriptor including page locations
1271};
1272
1273// clang-format off
1274/**
1275\class ROOT::Experimental::Internal::RClusterGroupDescriptorBuilder
1276\ingroup NTuple
1277\brief A helper class for piece-wise construction of an RClusterGroupDescriptor
1278*/
1279// clang-format on
1281private:
1283
1284public:
1287
1289 {
1290 fClusterGroup.fClusterGroupId = clusterGroupId;
1291 return *this;
1292 }
1294 {
1295 fClusterGroup.fPageListLocator = pageListLocator;
1296 return *this;
1297 }
1298 RClusterGroupDescriptorBuilder &PageListLength(std::uint64_t pageListLength)
1299 {
1300 fClusterGroup.fPageListLength = pageListLength;
1301 return *this;
1302 }
1304 {
1305 fClusterGroup.fMinEntry = minEntry;
1306 return *this;
1307 }
1309 {
1310 fClusterGroup.fEntrySpan = entrySpan;
1311 return *this;
1312 }
1314 {
1315 fClusterGroup.fNClusters = nClusters;
1316 return *this;
1317 }
1318 void AddSortedClusters(const std::vector<DescriptorId_t> &clusterIds)
1319 {
1320 if (clusterIds.size() != fClusterGroup.GetNClusters())
1321 throw RException(R__FAIL("mismatch of number of clusters"));
1322 fClusterGroup.fClusterIds = clusterIds;
1323 }
1324
1326};
1327
1328// clang-format off
1329/**
1330\class ROOT::Experimental::Internal::RExtraTypeInfoDescriptorBuilder
1331\ingroup NTuple
1332\brief A helper class for piece-wise construction of an RExtraTypeInfoDescriptor
1333*/
1334// clang-format on
1336private:
1338
1339public:
1341
1343 {
1344 fExtraTypeInfo.fContentId = contentId;
1345 return *this;
1346 }
1348 {
1349 fExtraTypeInfo.fTypeVersion = typeVersion;
1350 return *this;
1351 }
1352 RExtraTypeInfoDescriptorBuilder &TypeName(const std::string &typeName)
1353 {
1354 fExtraTypeInfo.fTypeName = typeName;
1355 return *this;
1356 }
1357 RExtraTypeInfoDescriptorBuilder &Content(const std::string &content)
1358 {
1359 fExtraTypeInfo.fContent = content;
1360 return *this;
1361 }
1362
1364};
1365
1366// clang-format off
1367/**
1368\class ROOT::Experimental::Internal::RNTupleDescriptorBuilder
1369\ingroup NTuple
1370\brief A helper class for piece-wise construction of an RNTupleDescriptor
1371
1372Used by RPageStorage implementations in order to construct the RNTupleDescriptor from the various header parts.
1373*/
1374// clang-format on
1376private:
1379
1380public:
1381 /// Checks whether invariants hold:
1382 /// * NTuple name is valid
1383 /// * Fields have valid parents
1384 /// * Number of columns is constant across column representations
1388
1389 void SetNTuple(const std::string_view name, const std::string_view description);
1390 void SetFeature(unsigned int flag);
1391
1392 void SetOnDiskHeaderXxHash3(std::uint64_t xxhash3) { fDescriptor.fOnDiskHeaderXxHash3 = xxhash3; }
1394 /// The real footer size also include the page list envelopes
1396
1397 void AddField(const RFieldDescriptor &fieldDesc);
1400
1401 // The field that the column belongs to has to be already available. For fields with multiple columns,
1402 // the columns need to be added in order of the column index
1404
1407
1409
1410 /// Clears so-far stored clusters, fields, and columns and return to a pristine ntuple descriptor
1411 void Reset();
1412
1413 /// Mark the beginning of the header extension; any fields and columns added after a call to this function are
1414 /// annotated as begin part of the header extension.
1415 void BeginHeaderExtension();
1416
1417 /// If the descriptor is constructed in pieces consisting of physical and alias columns
1418 /// (regular and projected fields), the natural column order would be
1419 /// - Physical and alias columns of piece one
1420 /// - Physical and alias columns of piece two
1421 /// - etc.
1422 /// What we want, however, are first all physical column IDs and then all alias column IDs.
1423 /// This method adds `offset` to the logical column IDs of all alias columns and fixes up the corresponding
1424 /// column IDs in the projected field descriptors. In this way, a new piece of physical and alias columns can
1425 /// first shift the existing alias columns by the number of new physical columns, resulting in the following order
1426 /// - Physical columns of piece one
1427 /// - Physical columns of piece two
1428 /// - ...
1429 // - Logical columns of piece one
1430 /// - Logical columns of piece two
1431 /// - ...
1432 void ShiftAliasColumns(std::uint32_t offset);
1433
1434 /// Get the streamer info records for custom classes. Currently requires the corresponding dictionaries to be loaded.
1436};
1437
1438} // namespace Internal
1439} // namespace Experimental
1440} // namespace ROOT
1441
1442#endif // ROOT7_RNTupleDescriptor
#define R__FAIL(msg)
Short-hand to return an RResult<T> in an error state; the RError is implicitly converted into RResult...
Definition RError.hxx:299
size_t size(const MatrixT &matrix)
retrieve the size of a square matrix
#define R__ASSERT(e)
Checks condition e and reports a fatal error if it's false.
Definition TError.h:125
Option_t Option_t TPoint TPoint const char GetTextMagnitude GetFillStyle GetLineColor GetLineWidth GetMarkerStyle GetTextAlign GetTextColor GetTextSize void char Point_t Rectangle_t WindowAttributes_t Float_t Float_t Float_t Int_t Int_t UInt_t UInt_t Rectangle_t Int_t Int_t Window_t TString Int_t GCValues_t GetPrimarySelectionOwner GetDisplay GetScreen GetColormap GetNativeEvent const char const char dpyName wid window const char font_name cursor keysym reg const char only_if_exist regb h Point_t winding char text const char depth char const char Int_t count const char ColorStruct_t color const char Pixmap_t Pixmap_t PictureAttributes_t attr const char char ret_data h unsigned char height h offset
Option_t Option_t TPoint TPoint const char GetTextMagnitude GetFillStyle GetLineColor GetLineWidth GetMarkerStyle GetTextAlign GetTextColor GetTextSize void char Point_t Rectangle_t WindowAttributes_t index
Option_t Option_t TPoint TPoint const char GetTextMagnitude GetFillStyle GetLineColor GetLineWidth GetMarkerStyle GetTextAlign GetTextColor GetTextSize id
Option_t Option_t TPoint TPoint const char GetTextMagnitude GetFillStyle GetLineColor GetLineWidth GetMarkerStyle GetTextAlign GetTextColor GetTextSize void char Point_t Rectangle_t WindowAttributes_t Float_t Float_t Float_t Int_t Int_t UInt_t UInt_t Rectangle_t Int_t Int_t Window_t TString Int_t GCValues_t GetPrimarySelectionOwner GetDisplay GetScreen GetColormap GetNativeEvent const char const char dpyName wid window const char font_name cursor keysym reg const char only_if_exist regb h Point_t winding char text const char depth char const char Int_t count const char ColorStruct_t color const char Pixmap_t Pixmap_t PictureAttributes_t attr const char char ret_data h unsigned char height h Atom_t Int_t ULong_t ULong_t unsigned char prop_list Atom_t Atom_t Atom_t Time_t type
char name[80]
Definition TGX11.cxx:110
A helper class for piece-wise construction of an RClusterDescriptor.
RResult< void > MarkSuppressedColumnRange(DescriptorId_t physicalId)
Books the given column ID as being suppressed in this cluster.
RResult< void > CommitSuppressedColumnRanges(const RNTupleDescriptor &desc)
Sets the first element index and number of elements for all the suppressed column ranges.
RResult< RClusterDescriptor > MoveDescriptor()
Move out the full cluster descriptor including page locations.
RClusterDescriptorBuilder & ClusterId(DescriptorId_t clusterId)
RClusterDescriptorBuilder & NEntries(std::uint64_t nEntries)
RClusterDescriptorBuilder & FirstEntryIndex(std::uint64_t firstEntryIndex)
const RClusterDescriptor::RColumnRange & GetColumnRange(DescriptorId_t physicalId)
RClusterDescriptorBuilder & AddExtendedColumnRanges(const RNTupleDescriptor &desc)
Add column and page ranges for columns created during late model extension missing in this cluster.
RResult< void > CommitColumnRange(DescriptorId_t physicalId, std::uint64_t firstElementIndex, std::uint32_t compressionSettings, const RClusterDescriptor::RPageRange &pageRange)
A helper class for piece-wise construction of an RClusterGroupDescriptor.
RClusterGroupDescriptorBuilder & PageListLocator(const RNTupleLocator &pageListLocator)
RClusterGroupDescriptorBuilder & MinEntry(std::uint64_t minEntry)
RClusterGroupDescriptorBuilder & ClusterGroupId(DescriptorId_t clusterGroupId)
RClusterGroupDescriptorBuilder & EntrySpan(std::uint64_t entrySpan)
void AddSortedClusters(const std::vector< DescriptorId_t > &clusterIds)
RClusterGroupDescriptorBuilder & NClusters(std::uint32_t nClusters)
RClusterGroupDescriptorBuilder & PageListLength(std::uint64_t pageListLength)
static RClusterGroupDescriptorBuilder FromSummary(const RClusterGroupDescriptor &clusterGroupDesc)
A helper class for piece-wise construction of an RColumnDescriptor.
RColumnDescriptorBuilder & PhysicalColumnId(DescriptorId_t physicalColumnId)
RColumnDescriptorBuilder & Type(EColumnType type)
RColumnDescriptorBuilder & BitsOnStorage(std::uint16_t bitsOnStorage)
RColumnDescriptorBuilder()=default
Make an empty column descriptor builder.
RColumnDescriptorBuilder & RepresentationIndex(std::uint16_t representationIndex)
RColumnDescriptorBuilder & FieldId(DescriptorId_t fieldId)
RColumnDescriptorBuilder & Index(std::uint32_t index)
RColumnDescriptorBuilder & FirstElementIndex(std::uint64_t firstElementIdx)
RColumnDescriptorBuilder & ValueRange(std::optional< RColumnDescriptor::RValueRange > valueRange)
RResult< RColumnDescriptor > MakeDescriptor() const
Attempt to make a column descriptor.
RColumnDescriptorBuilder & LogicalColumnId(DescriptorId_t logicalColumnId)
RColumnDescriptorBuilder & ValueRange(double min, double max)
A column element encapsulates the translation between basic C++ types and their column representation...
A helper class for piece-wise construction of an RExtraTypeInfoDescriptor.
RExtraTypeInfoDescriptorBuilder & Content(const std::string &content)
RExtraTypeInfoDescriptorBuilder & TypeVersion(std::uint32_t typeVersion)
RExtraTypeInfoDescriptorBuilder & TypeName(const std::string &typeName)
RExtraTypeInfoDescriptorBuilder & ContentId(EExtraTypeInfoIds contentId)
A helper class for piece-wise construction of an RFieldDescriptor.
RFieldDescriptorBuilder & TypeVersion(std::uint32_t typeVersion)
RFieldDescriptorBuilder & NRepetitions(std::uint64_t nRepetitions)
RFieldDescriptorBuilder & ProjectionSourceId(DescriptorId_t id)
RFieldDescriptorBuilder & FieldVersion(std::uint32_t fieldVersion)
RFieldDescriptorBuilder & Structure(const ENTupleStructure &structure)
RFieldDescriptorBuilder & TypeName(const std::string &typeName)
static RFieldDescriptorBuilder FromField(const RFieldBase &field)
Make a new RFieldDescriptorBuilder based off a live NTuple field.
RFieldDescriptorBuilder & FieldName(const std::string &fieldName)
RResult< RFieldDescriptor > MakeDescriptor() const
Attempt to make a field descriptor.
RFieldDescriptorBuilder & ParentId(DescriptorId_t id)
RFieldDescriptorBuilder()=default
Make an empty dangling field descriptor.
RFieldDescriptorBuilder & TypeChecksum(const std::optional< std::uint32_t > typeChecksum)
RFieldDescriptorBuilder & TypeAlias(const std::string &typeAlias)
RFieldDescriptorBuilder & FieldId(DescriptorId_t fieldId)
RFieldDescriptorBuilder & FieldDescription(const std::string &fieldDescription)
A helper class for piece-wise construction of an RNTupleDescriptor.
RNTupleSerializer::StreamerInfoMap_t BuildStreamerInfos() const
Get the streamer info records for custom classes. Currently requires the corresponding dictionaries t...
RResult< void > AddExtraTypeInfo(RExtraTypeInfoDescriptor &&extraTypeInfoDesc)
RResult< void > AddColumn(RColumnDescriptor &&columnDesc)
RResult< void > AddFieldProjection(DescriptorId_t sourceId, DescriptorId_t targetId)
void BeginHeaderExtension()
Mark the beginning of the header extension; any fields and columns added after a call to this functio...
void ShiftAliasColumns(std::uint32_t offset)
If the descriptor is constructed in pieces consisting of physical and alias columns (regular and proj...
RResult< void > AddClusterGroup(RClusterGroupDescriptor &&clusterGroup)
RResult< void > EnsureFieldExists(DescriptorId_t fieldId) const
void AddToOnDiskFooterSize(std::uint64_t size)
The real footer size also include the page list envelopes.
void SetNTuple(const std::string_view name, const std::string_view description)
RResult< void > AddCluster(RClusterDescriptor &&clusterDesc)
RResult< void > EnsureValidDescriptor() const
Checks whether invariants hold:
RResult< void > AddFieldLink(DescriptorId_t fieldId, DescriptorId_t linkId)
void Reset()
Clears so-far stored clusters, fields, and columns and return to a pristine ntuple descriptor.
std::map< Int_t, TVirtualStreamerInfo * > StreamerInfoMap_t
std::unordered_map< DescriptorId_t, RColumnRange >::const_iterator Iter_t
Records the partition of data into pages for a particular column in a particular cluster.
RPageInfoExtended Find(NTupleSize_t idxInCluster) const
Find the page in the RPageRange that contains the given element. The element must exist.
RPageRange & operator=(RPageRange &&other)=default
std::vector< NTupleSize_t > fCumulativeNElements
Has the same length than fPageInfos and stores the sum of the number of elements of all the pages up ...
std::size_t ExtendToFitColumnRange(const RColumnRange &columnRange, const Internal::RColumnElementBase &element, std::size_t pageSize)
Extend this RPageRange to fit the given RColumnRange, i.e.
RPageRange(const RPageRange &other)=delete
RPageRange & operator=(const RPageRange &other)=delete
Meta-data for a set of ntuple clusters.
std::unordered_map< DescriptorId_t, RPageRange > fPageRanges
RClusterDescriptor & operator=(RClusterDescriptor &&other)=default
RClusterDescriptor(RClusterDescriptor &&other)=default
bool ContainsColumn(DescriptorId_t physicalId) const
RColumnRangeIterable GetColumnRangeIterable() const
Returns an iterator over pairs { columnId, columnRange }. The iteration order is unspecified.
RClusterDescriptor(const RClusterDescriptor &other)=delete
NTupleSize_t fFirstEntryIndex
Clusters can be swapped by adjusting the entry offsets.
RClusterDescriptor & operator=(const RClusterDescriptor &other)=delete
const RColumnRange & GetColumnRange(DescriptorId_t physicalId) const
std::unordered_map< DescriptorId_t, RColumnRange > fColumnRanges
bool operator==(const RClusterDescriptor &other) const
const RPageRange & GetPageRange(DescriptorId_t physicalId) const
Clusters are bundled in cluster groups.
std::uint64_t fMinEntry
The minimum first entry number of the clusters in the cluster group.
RClusterGroupDescriptor(const RClusterGroupDescriptor &other)=delete
RClusterGroupDescriptor & operator=(RClusterGroupDescriptor &&other)=default
std::uint64_t fEntrySpan
Number of entries that are (partially for sharded clusters) covered by this cluster group.
std::uint64_t fPageListLength
Uncompressed size of the page list.
RClusterGroupDescriptor CloneSummary() const
const std::vector< DescriptorId_t > & GetClusterIds() const
RClusterGroupDescriptor & operator=(const RClusterGroupDescriptor &other)=delete
std::uint32_t fNClusters
Number of clusters is always known even if the cluster IDs are not (yet) populated.
RNTupleLocator fPageListLocator
The page list that corresponds to the cluster group.
bool HasClusterDetails() const
A cluster group is loaded in two stages.
bool operator==(const RClusterGroupDescriptor &other) const
std::vector< DescriptorId_t > fClusterIds
The cluster IDs can be empty if the corresponding page list is not loaded.
RClusterGroupDescriptor(RClusterGroupDescriptor &&other)=default
Meta-data stored for every column of an ntuple.
std::optional< RValueRange > fValueRange
Optional value range (used e.g. by quantized real fields)
std::uint16_t fBitsOnStorage
The size in bits of elements of this column.
DescriptorId_t fPhysicalColumnId
Usually identical to the logical column ID, except for alias columns where it references the shadowed...
RColumnDescriptor & operator=(RColumnDescriptor &&other)=default
RColumnDescriptor(const RColumnDescriptor &other)=delete
DescriptorId_t fLogicalColumnId
The actual column identifier, which is the link to the corresponding field.
RColumnDescriptor Clone() const
Get a copy of the descriptor.
RColumnDescriptor(RColumnDescriptor &&other)=default
DescriptorId_t fFieldId
Every column belongs to one and only one field.
RColumnDescriptor & operator=(const RColumnDescriptor &other)=delete
std::optional< RValueRange > GetValueRange() const
std::int64_t fFirstElementIndex
The absolute value specifies the index for the first stored element for this column.
std::uint16_t fRepresentationIndex
A field may use multiple column representations, which are numbered from zero to $m$.
EColumnType fType
The on-disk column type.
std::uint32_t fIndex
A field can be serialized into several columns, which are numbered from zero to $n$.
bool operator==(const RColumnDescriptor &other) const
Field specific extra type information from the header / extenstion header.
bool operator==(const RExtraTypeInfoDescriptor &other) const
RExtraTypeInfoDescriptor & operator=(RExtraTypeInfoDescriptor &&other)=default
std::uint32_t fTypeVersion
Type version the extra type information is bound to.
EExtraTypeInfoIds fContentId
Specifies the meaning of the extra information.
std::string fTypeName
The type name the extra information refers to; empty for RNTuple-wide extra information.
RExtraTypeInfoDescriptor & operator=(const RExtraTypeInfoDescriptor &other)=delete
RExtraTypeInfoDescriptor(RExtraTypeInfoDescriptor &&other)=default
RExtraTypeInfoDescriptor(const RExtraTypeInfoDescriptor &other)=delete
std::string fContent
The content format depends on the content ID and may be binary.
A field translates read and write calls from/to underlying columns to/from tree values.
Meta-data stored for every field of an ntuple.
RFieldDescriptor & operator=(RFieldDescriptor &&other)=default
std::vector< DescriptorId_t > fLinkIds
The pointers in the other direction from parent to children.
std::unique_ptr< RFieldBase > CreateField(const RNTupleDescriptor &ntplDesc, bool continueOnError=false) const
In general, we create a field simply from the C++ type name.
const std::string & GetFieldName() const
const std::string & GetTypeName() const
const std::string & GetFieldDescription() const
std::uint32_t fTypeVersion
The version of the C++ type itself.
std::uint32_t fColumnCardinality
The number of columns in the column representations of the field.
std::optional< std::uint32_t > fTypeChecksum
For custom classes, we store the ROOT TClass reported checksum to facilitate the use of I/O rules tha...
bool IsCustomClass() const
Tells if the field describes a user-defined class rather than a fundamental type, a collection,...
const std::vector< DescriptorId_t > & GetLogicalColumnIds() const
std::string fFieldDescription
Free text set by the user.
const std::string & GetTypeAlias() const
std::string fFieldName
The leaf name, not including parent fields.
std::uint32_t fFieldVersion
The version of the C++-type-to-column translation mechanics.
std::vector< DescriptorId_t > fLogicalColumnIds
The ordered list of columns attached to this field: first by representation index then by column inde...
const std::vector< DescriptorId_t > & GetLinkIds() const
RFieldDescriptor(const RFieldDescriptor &other)=delete
DescriptorId_t fParentId
Establishes sub field relationships, such as classes and collections.
RFieldDescriptor Clone() const
Get a copy of the descriptor.
bool operator==(const RFieldDescriptor &other) const
std::string fTypeAlias
A typedef or using directive that resolved to the type name during field creation.
ENTupleStructure fStructure
The structural information carried by this field in the data model tree.
RFieldDescriptor & operator=(const RFieldDescriptor &other)=delete
RFieldDescriptor(RFieldDescriptor &&other)=default
std::optional< std::uint32_t > GetTypeChecksum() const
std::string fTypeName
The C++ type that was used when writing the field.
std::uint64_t fNRepetitions
The number of elements per entry for fixed-size arrays.
DescriptorId_t fProjectionSourceId
For projected fields, the source field ID.
Used to loop over all the clusters of an ntuple (in unspecified order)
const RNTupleDescriptor & fNTuple
The associated NTuple for this range.
Used to loop over all the cluster groups of an ntuple (in unspecified order)
const RNTupleDescriptor & fNTuple
The associated NTuple for this range.
RIterator(const RNTupleDescriptor &ntuple, const std::vector< DescriptorId_t > &columns, std::size_t index)
const std::vector< DescriptorId_t > & fColumns
The enclosing range's descriptor id list.
const RNTupleDescriptor & fNTuple
The enclosing range's NTuple.
const RNTupleDescriptor & fNTuple
The associated NTuple for this range.
std::vector< DescriptorId_t > fColumns
The descriptor ids of the columns ordered by field, representation, and column index.
Used to loop over all the extra type info record of an ntuple (in unspecified order)
const RNTupleDescriptor & fNTuple
The associated NTuple for this range.
const std::vector< DescriptorId_t > & fFieldChildren
The enclosing range's descriptor id list.
const RNTupleDescriptor & fNTuple
The enclosing range's NTuple.
RIterator(const RNTupleDescriptor &ntuple, const std::vector< DescriptorId_t > &fieldChildren, std::size_t index)
std::vector< DescriptorId_t > fFieldChildren
The descriptor ids of the child fields.
const RNTupleDescriptor & fNTuple
The associated NTuple for this range.
RFieldDescriptorIterable(const RNTupleDescriptor &ntuple, const RFieldDescriptor &field, const std::function< bool(DescriptorId_t, DescriptorId_t)> &comparator)
Sort the range using an arbitrary comparison function.
RFieldDescriptorIterable(const RNTupleDescriptor &ntuple, const RFieldDescriptor &field)
Summarizes information about fields and the corresponding columns that were added after the header ha...
std::vector< DescriptorId_t > fFieldIdsOrder
All field IDs of late model extensions, in the order of field addition.
void AddExtendedColumn(const RColumnDescriptor &columnDesc)
std::uint32_t fNLogicalColumns
Number of logical and physical columns; updated by the descriptor builder when columns are added.
const std::vector< DescriptorId_t > & GetExtendedColumnRepresentations() const
std::unordered_set< DescriptorId_t > fFieldIdsLookup
All field IDs of late model extensions for efficient lookup.
std::vector< DescriptorId_t > fExtendedColumnRepresentations
All logical column IDs of columns that extend, with additional column representations,...
void AddExtendedField(const RFieldDescriptor &fieldDesc)
The on-storage meta-data of an ntuple.
std::uint64_t fNPhysicalColumns
Updated by the descriptor builder when columns are added.
std::unordered_map< DescriptorId_t, RClusterDescriptor > fClusterDescriptors
May contain only a subset of all the available clusters, e.g.
std::uint64_t fGeneration
Once constructed by an RNTupleDescriptorBuilder, the descriptor is mostly immutable except for set of...
std::uint64_t fOnDiskFooterSize
Like fOnDiskHeaderSize, contains both cluster summaries and page locations.
std::uint64_t fNEntries
Updated by the descriptor builder when the cluster groups are added.
std::vector< RExtraTypeInfoDescriptor > fExtraTypeInfoDescriptors
NTupleSize_t GetNElements(DescriptorId_t physicalColumnId) const
std::unordered_map< DescriptorId_t, RClusterGroupDescriptor > fClusterGroupDescriptors
std::unique_ptr< RNTupleModel > CreateModel(const RCreateModelOptions &options=RCreateModelOptions()) const
Re-create the C++ model from the stored meta-data.
DescriptorId_t FindLogicalColumnId(DescriptorId_t fieldId, std::uint32_t columnIndex, std::uint16_t representationIndex) const
DescriptorId_t FindNextClusterId(DescriptorId_t clusterId) const
DescriptorId_t FindPrevClusterId(DescriptorId_t clusterId) const
std::unordered_map< DescriptorId_t, RColumnDescriptor > fColumnDescriptors
std::unique_ptr< RNTupleDescriptor > Clone() const
RColumnDescriptorIterable GetColumnIterable() const
RNTupleDescriptor(RNTupleDescriptor &&other)=default
std::uint64_t fNClusters
Updated by the descriptor builder when the cluster groups are added.
std::string fName
The ntuple name needs to be unique in a given storage location (file)
const RClusterDescriptor & GetClusterDescriptor(DescriptorId_t clusterId) const
RNTupleDescriptor(const RNTupleDescriptor &other)=delete
std::unordered_map< DescriptorId_t, RFieldDescriptor > fFieldDescriptors
DescriptorId_t GetFieldZeroId() const
Returns the logical parent of all top-level NTuple data fields.
DescriptorId_t FindPhysicalColumnId(DescriptorId_t fieldId, std::uint32_t columnIndex, std::uint16_t representationIndex) const
static constexpr unsigned int kFeatureFlagTest
RNTupleDescriptor & operator=(RNTupleDescriptor &&other)=default
std::uint64_t fOnDiskHeaderXxHash3
Set by the descriptor builder when deserialized.
NTupleSize_t GetNEntries() const
We know the number of entries from adding the cluster summaries.
RResult< void > AddClusterGroupDetails(DescriptorId_t clusterGroupId, std::vector< RClusterDescriptor > &clusterDescs)
Methods to load and drop cluster group details (cluster IDs and page locations)
bool operator==(const RNTupleDescriptor &other) const
std::vector< DescriptorId_t > fSortedClusterGroupIds
References cluster groups sorted by entry range and thus allows for binary search.
RResult< void > DropClusterGroupDetails(DescriptorId_t clusterGroupId)
std::string GetQualifiedFieldName(DescriptorId_t fieldId) const
Walks up the parents of the field ID and returns a field name of the form a.b.c.d In case of invalid ...
DescriptorId_t FindFieldId(std::string_view fieldName, DescriptorId_t parentId) const
const RColumnDescriptor & GetColumnDescriptor(DescriptorId_t columnId) const
RExtraTypeInfoDescriptorIterable GetExtraTypeInfoIterable() const
const RFieldDescriptor & GetFieldDescriptor(DescriptorId_t fieldId) const
std::unique_ptr< RHeaderExtension > fHeaderExtension
const RClusterGroupDescriptor & GetClusterGroupDescriptor(DescriptorId_t clusterGroupId) const
RClusterGroupDescriptorIterable GetClusterGroupIterable() const
bool HasFeature(unsigned int flag) const
RNTupleDescriptor & operator=(const RNTupleDescriptor &other)=delete
const std::string & GetDescription() const
RClusterDescriptorIterable GetClusterIterable() const
std::string fDescription
Free text from the user.
RFieldDescriptorIterable GetTopLevelFields() const
DescriptorId_t FindClusterId(NTupleSize_t entryIdx) const
DescriptorId_t fFieldZeroId
Set by the descriptor builder.
const RHeaderExtension * GetHeaderExtension() const
Return header extension information; if the descriptor does not have a header extension,...
std::uint64_t fOnDiskHeaderSize
Set by the descriptor builder when deserialized.
const RFieldDescriptor & GetFieldZero() const
void PrintInfo(std::ostream &output) const
RFieldDescriptorIterable GetFieldIterable(const RFieldDescriptor &fieldDesc) const
std::vector< std::uint64_t > GetFeatureFlags() const
Generic information about the physical location of data.
Base class for all ROOT issued exceptions.
Definition RError.hxx:79
The class is used as a return type for operations that can fail; wraps a value of type T or an RError...
Definition RError.hxx:197
const Int_t n
Definition legend1.C:16
constexpr int kNTupleUnknownCompression
Regular, known compression settings have the form algorithm * 100 + level, e.g. 101,...
std::uint64_t NTupleSize_t
Integer type long enough to hold the maximum number of entries in a column.
std::uint64_t DescriptorId_t
Distriniguishes elements of the same type within a descriptor, e.g. different fields.
constexpr NTupleSize_t kInvalidNTupleIndex
EExtraTypeInfoIds
Used in RExtraTypeInfoDescriptor.
ENTupleStructure
The fields in the ntuple model tree can carry different structural information about the type system.
constexpr DescriptorId_t kInvalidDescriptorId
tbb::task_arena is an alias of tbb::interface7::task_arena, which doesn't allow to forward declare tb...
The window of element indexes of a particular column in a particular cluster.
NTupleSize_t fFirstElementIndex
The global index of the first column element in the cluster.
bool fIsSuppressed
Suppressed columns have an empty page range and unknown compression settings.
int fCompressionSettings
The usual format for ROOT compression settings (see Compression.h).
NTupleSize_t fNElements
The number of column elements in the cluster.
RPageInfoExtended(const RPageInfo &pi, NTupleSize_t i, NTupleSize_t n)
NTupleSize_t fFirstInPage
Index (in cluster) of the first element in page.
NTupleSize_t fPageNo
Page number in the corresponding RPageRange.
We do not need to store the element size / uncompressed page size because we know to which column the...
std::uint32_t fNElements
The sum of the elements of all the pages must match the corresponding fNElements field in fColumnRang...
bool fHasChecksum
If true, the 8 bytes following the serialized page are an xxhash of the on-disk page data.
RNTupleLocator fLocator
The meaning of fLocator depends on the storage backend.
bool fCreateBare
If true, the model will be created without a default entry (bare model).
bool fReconstructProjections
If set to true, projected fields will be reconstructed as such.
bool fForwardCompatible
Normally creating a model will fail if any of the reconstructed fields contains an unknown column typ...
static void output()