Logo ROOT  
Reference Guide
 
Loading...
Searching...
No Matches
RNTupleDescriptor.hxx
Go to the documentation of this file.
1/// \file ROOT/RNTupleDescriptor.hxx
2/// \ingroup NTuple ROOT7
3/// \author Jakob Blomer <jblomer@cern.ch>
4/// \author Javier Lopez-Gomez <javier.lopez.gomez@cern.ch>
5/// \date 2018-07-19
6/// \warning This is part of the ROOT 7 prototype! It will change without notice. It might trigger earthquakes. Feedback
7/// is welcome!
8
9/*************************************************************************
10 * Copyright (C) 1995-2019, Rene Brun and Fons Rademakers. *
11 * All rights reserved. *
12 * *
13 * For the licensing terms see $ROOTSYS/LICENSE. *
14 * For the list of contributors see $ROOTSYS/README/CREDITS. *
15 *************************************************************************/
16
17#ifndef ROOT7_RNTupleDescriptor
18#define ROOT7_RNTupleDescriptor
19
20#include <ROOT/RColumnModel.hxx>
21#include <ROOT/RError.hxx>
23#include <ROOT/RNTupleUtil.hxx>
24#include <ROOT/RSpan.hxx>
25#include <string_view>
26
27#include <algorithm>
28#include <chrono>
29#include <functional>
30#include <iterator>
31#include <map>
32#include <memory>
33#include <ostream>
34#include <vector>
35#include <set>
36#include <string>
37#include <unordered_map>
38#include <unordered_set>
39
40namespace ROOT {
41namespace Experimental {
42
43class RFieldBase;
44class RNTupleDescriptor;
45class RNTupleModel;
46
47namespace Internal {
48class RColumnElementBase;
49} // namespace Internal
50
51namespace Internal {
52class RColumnDescriptorBuilder;
53class RColumnGroupDescriptorBuilder;
54class RClusterDescriptorBuilder;
55class RClusterGroupDescriptorBuilder;
56class RFieldDescriptorBuilder;
57class RNTupleDescriptorBuilder;
58} // namespace Internal
59
60// clang-format off
61/**
62\class ROOT::Experimental::RFieldDescriptor
63\ingroup NTuple
64\brief Meta-data stored for every field of an ntuple
65*/
66// clang-format on
70
71private:
73 /// The version of the C++-type-to-column translation mechanics
74 std::uint32_t fFieldVersion = 0;
75 /// The version of the C++ type itself
76 std::uint32_t fTypeVersion = 0;
77 /// The leaf name, not including parent fields
78 std::string fFieldName;
79 /// Free text set by the user
80 std::string fFieldDescription;
81 /// The C++ type that was used when writing the field
82 std::string fTypeName;
83 /// A typedef or using directive that resolved to the type name during field creation
84 std::string fTypeAlias;
85 /// The number of elements per entry for fixed-size arrays
86 std::uint64_t fNRepetitions = 0;
87 /// The structural information carried by this field in the data model tree
89 /// Establishes sub field relationships, such as classes and collections
91 /// The pointers in the other direction from parent to children. They are serialized, too, to keep the
92 /// order of sub fields.
93 std::vector<DescriptorId_t> fLinkIds;
94
95public:
96 RFieldDescriptor() = default;
97 RFieldDescriptor(const RFieldDescriptor &other) = delete;
101
102 bool operator==(const RFieldDescriptor &other) const;
103 /// Get a copy of the descriptor
104 RFieldDescriptor Clone() const;
105 /// In general, we create a field simply from the C++ type name. For untyped fields, however, we potentially need
106 /// access to sub fields, which is provided by the ntuple descriptor argument.
107 std::unique_ptr<RFieldBase> CreateField(const RNTupleDescriptor &ntplDesc) const;
108
109 DescriptorId_t GetId() const { return fFieldId; }
110 std::uint32_t GetFieldVersion() const { return fFieldVersion; }
111 std::uint32_t GetTypeVersion() const { return fTypeVersion; }
112 std::string GetFieldName() const { return fFieldName; }
113 std::string GetFieldDescription() const { return fFieldDescription; }
114 std::string GetTypeName() const { return fTypeName; }
115 std::string GetTypeAlias() const { return fTypeAlias; }
116 std::uint64_t GetNRepetitions() const { return fNRepetitions; }
119 const std::vector<DescriptorId_t> &GetLinkIds() const { return fLinkIds; }
120};
121
122
123// clang-format off
124/**
125\class ROOT::Experimental::RColumnDescriptor
126\ingroup NTuple
127\brief Meta-data stored for every column of an ntuple
128*/
129// clang-format on
133
134private:
135 /// The actual column identifier, which is the link to the corresponding field
137 /// Usually identical to the logical column ID, except for alias columns where it references the shadowed column
139 /// Contains the column type and whether it is sorted
141 /// Every column belongs to one and only one field
143 /// A field can be serialized into several columns, which are numbered from zero to $n$
144 std::uint32_t fIndex;
145 /// Specifies the index for the first stored element for this column. For deferred columns the value is greater
146 /// than 0
147 std::uint64_t fFirstElementIndex = 0U;
148
149public:
150 RColumnDescriptor() = default;
151 RColumnDescriptor(const RColumnDescriptor &other) = delete;
155
156 bool operator==(const RColumnDescriptor &other) const;
157 /// Get a copy of the descriptor
158 RColumnDescriptor Clone() const;
159
162 RColumnModel GetModel() const { return fModel; }
163 std::uint32_t GetIndex() const { return fIndex; }
166 std::uint64_t GetFirstElementIndex() const { return fFirstElementIndex; }
167 bool IsDeferredColumn() const { return fFirstElementIndex > 0; }
168};
169
170// clang-format off
171/**
172\class ROOT::Experimental::RColumnGroupDescriptor
173\ingroup NTuple
174\brief Meta-data for a sets of columns; non-trivial column groups are used for sharded clusters
175
176Clusters can span a subset of columns. Such subsets are described as a column group. An empty column group
177is used to denote the column group of all the columns. Every ntuple has at least one column group.
178*/
179// clang-format on
182
183private:
185 std::unordered_set<DescriptorId_t> fPhysicalColumnIds;
186
187public:
193
194 bool operator==(const RColumnGroupDescriptor &other) const;
195
197 const std::unordered_set<DescriptorId_t> &GetPhysicalColumnIds() const { return fPhysicalColumnIds; }
198 bool Contains(DescriptorId_t physicalId) const
199 {
200 return fPhysicalColumnIds.empty() || fPhysicalColumnIds.count(physicalId) > 0;
201 }
202 bool HasAllColumns() const { return fPhysicalColumnIds.empty(); }
203};
204
205// clang-format off
206/**
207\class ROOT::Experimental::RClusterDescriptor
208\ingroup NTuple
209\brief Meta-data for a set of ntuple clusters
210
211The cluster descriptor is built in two phases. In a first phase, the descriptor has only an ID.
212In a second phase, the event range, column group, page locations and column ranges are added.
213Both phases are populated by the RClusterDescriptorBuilder.
214Clusters usually span across all available columns but in some cases they can describe only a subset of the columns,
215for instance when describing friend ntuples.
216*/
217// clang-format on
220
221public:
222 /// The window of element indexes of a particular column in a particular cluster
225 /// A 64bit element index
227 /// The number of column elements in the cluster
229 /// The usual format for ROOT compression settings (see Compression.h).
230 /// The pages of a particular column in a particular cluster are all compressed with the same settings.
232
233 // TODO(jblomer): we perhaps want to store summary information, such as average, min/max, etc.
234 // Should this be done on the field level?
235
236 bool operator==(const RColumnRange &other) const {
239 }
240
243 }
244 };
245
246 /// Records the parition of data into pages for a particular column in a particular cluster
249 /// Extend this RPageRange to fit the given RColumnRange, i.e. prepend as many synthetic RPageInfos as needed to
250 /// cover the range in `columnRange`. `RPageInfo`s are constructed to contain as many elements of type `element`
251 /// given a page size limit of `pageSize` (in bytes); the locator for the referenced pages is `kTypePageZero`.
252 /// This function is used to make up `RPageRange`s for clusters that contain deferred columns.
253 /// \return The number of column elements covered by the synthesized RPageInfos
254 std::size_t ExtendToFitColumnRange(const RColumnRange &columnRange, const Internal::RColumnElementBase &element,
255 std::size_t pageSize);
256
257 public:
258 /// We do not need to store the element size / uncompressed page size because we know to which column
259 /// the page belongs
260 struct RPageInfo {
261 /// The sum of the elements of all the pages must match the corresponding fNElements field in fColumnRanges
262 std::uint32_t fNElements = std::uint32_t(-1);
263 /// The meaning of fLocator depends on the storage backend.
265
266 bool operator==(const RPageInfo &other) const {
267 return fNElements == other.fNElements && fLocator == other.fLocator;
268 }
269 };
271 /// Index (in cluster) of the first element in page.
273 /// Page number in the corresponding RPageRange.
275
276 RPageInfoExtended() = default;
278 : RPageInfo(pi), fFirstInPage(i), fPageNo(n)
279 {
280 }
281 };
282
283 RPageRange() = default;
284 RPageRange(const RPageRange &other) = delete;
285 RPageRange &operator =(const RPageRange &other) = delete;
286 RPageRange(RPageRange &&other) = default;
287 RPageRange &operator =(RPageRange &&other) = default;
288
291 clone.fPhysicalColumnId = fPhysicalColumnId;
292 clone.fPageInfos = fPageInfos;
293 return clone;
294 }
295
296 /// Find the page in the RPageRange that contains the given element. The element must exist.
297 RPageInfoExtended Find(ClusterSize_t::ValueType idxInCluster) const;
298
300 std::vector<RPageInfo> fPageInfos;
301
302 bool operator==(const RPageRange &other) const {
303 return fPhysicalColumnId == other.fPhysicalColumnId && fPageInfos == other.fPageInfos;
304 }
305 };
306
307private:
309 /// Clusters can be swapped by adjusting the entry offsets
311 // TODO(jblomer): change to std::uint64_t
313
314 std::unordered_map<DescriptorId_t, RColumnRange> fColumnRanges;
315 std::unordered_map<DescriptorId_t, RPageRange> fPageRanges;
316
317public:
323
325
326 bool operator==(const RClusterDescriptor &other) const;
327
328 DescriptorId_t GetId() const { return fClusterId; }
331 const RColumnRange &GetColumnRange(DescriptorId_t physicalId) const { return fColumnRanges.at(physicalId); }
332 const RPageRange &GetPageRange(DescriptorId_t physicalId) const { return fPageRanges.at(physicalId); }
333 bool ContainsColumn(DescriptorId_t physicalId) const
334 {
335 return fColumnRanges.find(physicalId) != fColumnRanges.end();
336 }
337 std::unordered_set<DescriptorId_t> GetColumnIds() const;
338 std::uint64_t GetBytesOnStorage() const;
339};
340
341// clang-format off
342/**
343\class ROOT::Experimental::RClusterGroupDescriptor
344\ingroup NTuple
345\brief Clusters are bundled in cluster groups.
346
347Very large ntuples or combined ntuples (chains, friends) contain multiple cluster groups. The cluster groups
348may contain sharded clusters.
349Every ntuple has at least one cluster group. The clusters in a cluster group are ordered corresponding to
350the order of page locations in the page list envelope that belongs to the cluster group (see format specification)
351*/
352// clang-format on
355
356private:
358 /// The cluster IDs can be empty if the corresponding page list is not loaded.
359 std::vector<DescriptorId_t> fClusterIds;
360 /// The page list that corresponds to the cluster group
362 /// Uncompressed size of the page list
363 std::uint64_t fPageListLength = 0;
364 /// The minimum first entry number of the clusters in the cluster group
365 std::uint64_t fMinEntry = 0;
366 /// Number of entries that are (partially for sharded clusters) covered by this cluster group.
367 std::uint64_t fEntrySpan = 0;
368 /// Number of clusters is always known even if the cluster IDs are not (yet) populated
369 std::uint32_t fNClusters = 0;
370
371public:
377
379 // Creates a clone without the cluster IDs
381
382 bool operator==(const RClusterGroupDescriptor &other) const;
383
385 std::uint32_t GetNClusters() const { return fNClusters; }
387 std::uint64_t GetPageListLength() const { return fPageListLength; }
388 const std::vector<DescriptorId_t> &GetClusterIds() const { return fClusterIds; }
389 std::uint64_t GetMinEntry() const { return fMinEntry; }
390 std::uint64_t GetEntrySpan() const { return fEntrySpan; }
391 /// A cluster group is loaded in two stages. Stage one loads only the summary information.
392 /// Stage two loads the list of cluster IDs.
393 bool HasClusterDetails() const { return !fClusterIds.empty(); }
394};
395
396// clang-format off
397/**
398\class ROOT::Experimental::RNTupleDescriptor
399\ingroup NTuple
400\brief The on-storage meta-data of an ntuple
401
402Represents the on-disk (on storage) information about an ntuple. The meta-data consists of a header and one or
403several footers. The header carries the ntuple schema, i.e. the fields and the associated columns and their
404relationships. The footer(s) carry information about one or several clusters. For every cluster, a footer stores
405its location and size, and for every column the range of element indexes as well as a list of pages and page
406locations.
407
408The descriptor provide machine-independent (de-)serialization of headers and footers, and it provides lookup routines
409for ntuple objects (pages, clusters, ...). It is supposed to be usable by all RPageStorage implementations.
410
411The serialization does not use standard ROOT streamers in order to not let it depend on libCore. The serialization uses
412the concept of frames: header, footer, and substructures have a preamble with version numbers and the size of the
413writte struct. This allows for forward and backward compatibility when the meta-data evolves.
414*/
415// clang-format on
418
419public:
420 class RHeaderExtension;
421
422private:
423 /// The ntuple name needs to be unique in a given storage location (file)
424 std::string fName;
425 /// Free text from the user
426 std::string fDescription;
427
428 std::uint64_t fOnDiskHeaderXxHash3 = 0; ///< Set by the descriptor builder when deserialized
429 std::uint64_t fOnDiskHeaderSize = 0; ///< Set by the descriptor builder when deserialized
430 std::uint64_t fOnDiskFooterSize = 0; ///< Like fOnDiskHeaderSize, contains both cluster summaries and page locations
431
432 std::uint64_t fNEntries = 0; ///< Updated by the descriptor builder when the cluster groups are added
433 std::uint64_t fNClusters = 0; ///< Updated by the descriptor builder when the cluster groups are added
434 std::uint64_t fNPhysicalColumns = 0; ///< Updated by the descriptor builder when columns are added
435
436 /**
437 * Once constructed by an RNTupleDescriptorBuilder, the descriptor is mostly immutable except for set of
438 * active the page locations. During the lifetime of the descriptor, page location information for clusters
439 * can be added or removed. When this happens, the generation should be increased, so that users of the
440 * descriptor know that the information changed. The generation is increased, e.g., by the page source's
441 * exclusive lock guard around the descriptor. It is used, e.g., by the descriptor cache in RNTupleReader.
442 */
443 std::uint64_t fGeneration = 0;
444
445 std::set<unsigned int> fFeatureFlags;
446 std::unordered_map<DescriptorId_t, RFieldDescriptor> fFieldDescriptors;
447 std::unordered_map<DescriptorId_t, RColumnDescriptor> fColumnDescriptors;
448 std::unordered_map<DescriptorId_t, RClusterGroupDescriptor> fClusterGroupDescriptors;
449 /// May contain only a subset of all the available clusters, e.g. the clusters of the current file
450 /// from a chain of files
451 std::unordered_map<DescriptorId_t, RClusterDescriptor> fClusterDescriptors;
452 std::unique_ptr<RHeaderExtension> fHeaderExtension;
453
454public:
455 static constexpr unsigned int kFeatureFlagTest = 137; // Bit reserved for forward-compatibility testing
456
457 // clang-format off
458 /**
459 \class ROOT::Experimental::RNTupleDescriptor::RHeaderExtension
460 \ingroup NTuple
461 \brief Summarizes information about fields and the corresponding columns that were added after the header has been serialized
462 */
463 // clang-format on
466
467 private:
468 /// Contains the list of field IDs that are part of the header extension; the corresponding columns are
469 /// available via `GetColumnIterable()`.
470 std::vector<DescriptorId_t> fFields;
471 /// Number of logical and physical columns; updated by the descriptor builder when columns are added
472 std::uint64_t fNLogicalColumns = 0;
473 std::uint64_t fNPhysicalColumns = 0;
474
475 void AddFieldId(DescriptorId_t id) { fFields.push_back(id); }
476 void AddColumn(bool isAliasColumn)
477 {
479 if (!isAliasColumn)
481 }
482
483 public:
484 std::size_t GetNFields() const { return fFields.size(); }
485 std::size_t GetNLogicalColumns() const { return fNLogicalColumns; }
486 std::size_t GetNPhysicalColumns() const { return fNPhysicalColumns; }
487 /// Return a vector containing the IDs of the top-level fields defined in the extension header
488 std::vector<DescriptorId_t> GetTopLevelFields(const RNTupleDescriptor &desc) const;
489 };
490
491 // clang-format off
492 /**
493 \class ROOT::Experimental::RNTupleDescriptor::RColumnDescriptorIterable
494 \ingroup NTuple
495 \brief Used to loop over a field's associated columns
496 */
497 // clang-format on
499 private:
500 /// The associated NTuple for this range.
502 /// The descriptor ids of the columns ordered by index id
503 std::vector<DescriptorId_t> fColumns = {};
504
505 void CollectColumnIds(DescriptorId_t fieldId);
506 public:
507 class RIterator {
508 private:
509 /// The enclosing range's NTuple.
511 /// The enclosing range's descriptor id list.
512 const std::vector<DescriptorId_t> &fColumns;
513 std::size_t fIndex = 0;
514 public:
515 using iterator_category = std::forward_iterator_tag;
518 using difference_type = std::ptrdiff_t;
521
522 RIterator(const RNTupleDescriptor &ntuple, const std::vector<DescriptorId_t> &columns, std::size_t index)
523 : fNTuple(ntuple), fColumns(columns), fIndex(index) {}
524 iterator operator++() { ++fIndex; return *this; }
526 bool operator!=(const iterator &rh) const { return fIndex != rh.fIndex; }
527 bool operator==(const iterator &rh) const { return fIndex == rh.fIndex; }
528 };
529
532
535 };
536
537 // clang-format off
538 /**
539 \class ROOT::Experimental::RNTupleDescriptor::RFieldDescriptorIterable
540 \ingroup NTuple
541 \brief Used to loop over a field's child fields
542 */
543 // clang-format on
545 private:
546 /// The associated NTuple for this range.
548 /// The descriptor ids of the child fields. These may be sorted using
549 /// a comparison function.
550 std::vector<DescriptorId_t> fFieldChildren = {};
551
552 public:
553 class RIterator {
554 private:
555 /// The enclosing range's NTuple.
557 /// The enclosing range's descriptor id list.
558 const std::vector<DescriptorId_t>& fFieldChildren;
559 std::size_t fIndex = 0;
560 public:
561 using iterator_category = std::forward_iterator_tag;
564 using difference_type = std::ptrdiff_t;
567
568 RIterator(const RNTupleDescriptor& ntuple, const std::vector<DescriptorId_t>& fieldChildren,
569 std::size_t index) : fNTuple(ntuple), fFieldChildren(fieldChildren), fIndex(index) {}
570 iterator operator++() { ++fIndex; return *this; }
574 );
575 }
576 bool operator!=(const iterator& rh) const { return fIndex != rh.fIndex; }
577 bool operator==(const iterator& rh) const { return fIndex == rh.fIndex; }
578 };
580 : fNTuple(ntuple), fFieldChildren(field.GetLinkIds()) {}
581 /// Sort the range using an arbitrary comparison function.
583 const std::function<bool(DescriptorId_t, DescriptorId_t)>& comparator)
584 : fNTuple(ntuple), fFieldChildren(field.GetLinkIds())
585 {
586 std::sort(fFieldChildren.begin(), fFieldChildren.end(), comparator);
587 }
589 return RIterator(fNTuple, fFieldChildren, 0);
590 }
593 }
594 };
595
596 // clang-format off
597 /**
598 \class ROOT::Experimental::RNTupleDescriptor::RClusterGroupDescriptorIterable
599 \ingroup NTuple
600 \brief Used to loop over all the cluster groups of an ntuple (in unspecified order)
601
602 Enumerate all cluster group IDs from the cluster group descriptor. No specific order can be assumed, use
603 FindNextClusterGroupId and FindPrevClusterGroupId to traverse clusters groups by entry number.
604 */
605 // clang-format on
607 private:
608 /// The associated NTuple for this range.
610
611 public:
612 class RIterator {
613 private:
614 /// The enclosing range's NTuple.
616 std::size_t fIndex = 0;
617
618 public:
619 using iterator_category = std::forward_iterator_tag;
622 using difference_type = std::ptrdiff_t;
625
626 RIterator(const RNTupleDescriptor &ntuple, std::size_t index) : fNTuple(ntuple), fIndex(index) {}
628 {
629 ++fIndex;
630 return *this;
631 }
633 {
634 auto it = fNTuple.fClusterGroupDescriptors.begin();
635 std::advance(it, fIndex);
636 return it->second;
637 }
638 bool operator!=(const iterator &rh) const { return fIndex != rh.fIndex; }
639 bool operator==(const iterator &rh) const { return fIndex == rh.fIndex; }
640 };
641
645 };
646
647 // clang-format off
648 /**
649 \class ROOT::Experimental::RNTupleDescriptor::RClusterDescriptorIterable
650 \ingroup NTuple
651 \brief Used to loop over all the clusters of an ntuple (in unspecified order)
652
653 Enumerate all cluster IDs from the cluster descriptor. No specific order can be assumed, use
654 FindNextClusterId and FindPrevClusterId to travers clusters by entry number.
655 */
656 // clang-format on
658 private:
659 /// The associated NTuple for this range.
661 public:
662 class RIterator {
663 private:
664 /// The enclosing range's NTuple.
666 std::size_t fIndex = 0;
667 public:
668 using iterator_category = std::forward_iterator_tag;
671 using difference_type = std::ptrdiff_t;
674
675 RIterator(const RNTupleDescriptor &ntuple, std::size_t index) : fNTuple(ntuple), fIndex(index) {}
676 iterator operator++() { ++fIndex; return *this; }
678 auto it = fNTuple.fClusterDescriptors.begin();
679 std::advance(it, fIndex);
680 return it->second;
681 }
682 bool operator!=(const iterator &rh) const { return fIndex != rh.fIndex; }
683 bool operator==(const iterator &rh) const { return fIndex == rh.fIndex; }
684 };
685
689 };
690
691 RNTupleDescriptor() = default;
692 RNTupleDescriptor(const RNTupleDescriptor &other) = delete;
696
697 std::unique_ptr<RNTupleDescriptor> Clone() const;
698
699 bool operator ==(const RNTupleDescriptor &other) const;
700
701 std::uint64_t GetOnDiskHeaderXxHash3() const { return fOnDiskHeaderXxHash3; }
702 std::uint64_t GetOnDiskHeaderSize() const { return fOnDiskHeaderSize; }
703 std::uint64_t GetOnDiskFooterSize() const { return fOnDiskFooterSize; }
704
706 return fFieldDescriptors.at(fieldId);
707 }
709 return fColumnDescriptors.at(columnId);
710 }
712 {
713 return fClusterGroupDescriptors.at(clusterGroupId);
714 }
716 return fClusterDescriptors.at(clusterId);
717 }
718
720 return RFieldDescriptorIterable(*this, fieldDesc);
721 }
723 const std::function<bool(DescriptorId_t, DescriptorId_t)>& comparator) const
724 {
725 return RFieldDescriptorIterable(*this, fieldDesc, comparator);
726 }
728 return GetFieldIterable(GetFieldDescriptor(fieldId));
729 }
731 const std::function<bool(DescriptorId_t, DescriptorId_t)>& comparator) const
732 {
733 return GetFieldIterable(GetFieldDescriptor(fieldId), comparator);
734 }
737 }
739 const std::function<bool(DescriptorId_t, DescriptorId_t)>& comparator) const
740 {
741 return GetFieldIterable(GetFieldZeroId(), comparator);
742 }
743
745 {
746 return RColumnDescriptorIterable(*this);
747 }
749 {
750 return RColumnDescriptorIterable(*this, fieldDesc);
751 }
753 {
754 return RColumnDescriptorIterable(*this, GetFieldDescriptor(fieldId));
755 }
756
758
760 {
761 return RClusterDescriptorIterable(*this);
762 }
763
764 std::string GetName() const { return fName; }
765 std::string GetDescription() const { return fDescription; }
766
767 std::size_t GetNFields() const { return fFieldDescriptors.size(); }
768 std::size_t GetNLogicalColumns() const { return fColumnDescriptors.size(); }
769 std::size_t GetNPhysicalColumns() const { return fNPhysicalColumns; }
770 std::size_t GetNClusterGroups() const { return fClusterGroupDescriptors.size(); }
771 std::size_t GetNClusters() const { return fNClusters; }
772 std::size_t GetNActiveClusters() const { return fClusterDescriptors.size(); }
773
774 /// We know the number of entries from adding the cluster summaries
776 NTupleSize_t GetNElements(DescriptorId_t physicalColumnId) const;
777
778 /// Returns the logical parent of all top-level NTuple data fields.
781 DescriptorId_t FindFieldId(std::string_view fieldName, DescriptorId_t parentId) const;
782 /// Searches for a top-level field
783 DescriptorId_t FindFieldId(std::string_view fieldName) const;
784 DescriptorId_t FindLogicalColumnId(DescriptorId_t fieldId, std::uint32_t columnIndex) const;
785 DescriptorId_t FindPhysicalColumnId(DescriptorId_t fieldId, std::uint32_t columnIndex) const;
789
790 /// Walks up the parents of the field ID and returns a field name of the form a.b.c.d
791 /// In case of invalid field ID, an empty string is returned.
792 std::string GetQualifiedFieldName(DescriptorId_t fieldId) const;
793
794 bool HasFeature(unsigned int flag) const { return fFeatureFlags.count(flag) > 0; }
795 std::vector<std::uint64_t> GetFeatureFlags() const;
796
797 /// Return header extension information; if the descriptor does not have a header extension, return `nullptr`
798 const RHeaderExtension *GetHeaderExtension() const { return fHeaderExtension.get(); }
799
800 /// Methods to load and drop cluster group details (cluster IDs and page locations)
801 RResult<void> AddClusterGroupDetails(DescriptorId_t clusterGroupId, std::vector<RClusterDescriptor> &clusterDescs);
803
804 std::uint64_t GetGeneration() const { return fGeneration; }
806
807 /// Re-create the C++ model from the stored meta-data
808 std::unique_ptr<RNTupleModel> CreateModel() const;
809 void PrintInfo(std::ostream &output) const;
810};
811
812namespace Internal {
813
814// clang-format off
815/**
816\class ROOT::Experimental::Internal::RColumnDescriptorBuilder
817\ingroup NTuple
818\brief A helper class for piece-wise construction of an RColumnDescriptor
819
820Dangling column descriptors can become actual descriptors when added to an
821RNTupleDescriptorBuilder instance and then linked to their fields.
822*/
823// clang-format on
825private:
827public:
828 /// Make an empty column descriptor builder.
830
832 {
833 fColumn.fLogicalColumnId = logicalColumnId;
834 return *this;
835 }
837 {
838 fColumn.fPhysicalColumnId = physicalColumnId;
839 return *this;
840 }
842 fColumn.fModel = model;
843 return *this;
844 }
846 fColumn.fFieldId = fieldId;
847 return *this;
848 }
851 return *this;
852 }
853 RColumnDescriptorBuilder &FirstElementIndex(std::uint64_t firstElementIdx)
854 {
855 fColumn.fFirstElementIndex = firstElementIdx;
856 return *this;
857 }
859 /// Attempt to make a column descriptor. This may fail if the column
860 /// was not given enough information to make a proper descriptor.
862};
863
864
865// clang-format off
866/**
867\class ROOT::Experimental::Internal::RFieldDescriptorBuilder
868\ingroup NTuple
869\brief A helper class for piece-wise construction of an RFieldDescriptor
870
871Dangling field descriptors describe a single field in isolation. They are
872missing the necessary relationship information (parent field, any child fields)
873required to describe a real NTuple field.
874
875Dangling field descriptors can only become actual descriptors when added to an
876RNTupleDescriptorBuilder instance and then linked to other fields.
877*/
878// clang-format on
880private:
882public:
883 /// Make an empty dangling field descriptor.
885 /// Make a new RFieldDescriptorBuilder based off an existing descriptor.
886 /// Relationship information is lost during the conversion to a
887 /// dangling descriptor:
888 /// * Parent id is reset to an invalid id.
889 /// * Field children ids are forgotten.
890 ///
891 /// These properties must be set using RNTupleDescriptorBuilder::AddFieldLink().
892 explicit RFieldDescriptorBuilder(const RFieldDescriptor& fieldDesc);
893
894 /// Make a new RFieldDescriptorBuilder based off a live NTuple field.
895 static RFieldDescriptorBuilder FromField(const RFieldBase &field);
896
898 fField.fFieldId = fieldId;
899 return *this;
900 }
901 RFieldDescriptorBuilder &FieldVersion(std::uint32_t fieldVersion)
902 {
903 fField.fFieldVersion = fieldVersion;
904 return *this;
905 }
906 RFieldDescriptorBuilder &TypeVersion(std::uint32_t typeVersion)
907 {
908 fField.fTypeVersion = typeVersion;
909 return *this;
910 }
913 return *this;
914 }
915 RFieldDescriptorBuilder& FieldName(const std::string& fieldName) {
916 fField.fFieldName = fieldName;
917 return *this;
918 }
919 RFieldDescriptorBuilder& FieldDescription(const std::string& fieldDescription) {
920 fField.fFieldDescription = fieldDescription;
921 return *this;
922 }
923 RFieldDescriptorBuilder& TypeName(const std::string& typeName) {
924 fField.fTypeName = typeName;
925 return *this;
926 }
927 RFieldDescriptorBuilder &TypeAlias(const std::string &typeAlias)
928 {
929 fField.fTypeAlias = typeAlias;
930 return *this;
931 }
932 RFieldDescriptorBuilder& NRepetitions(std::uint64_t nRepetitions) {
933 fField.fNRepetitions = nRepetitions;
934 return *this;
935 }
937 fField.fStructure = structure;
938 return *this;
939 }
941 /// Attempt to make a field descriptor. This may fail if the dangling field
942 /// was not given enough information to make a proper descriptor.
944};
945
946
947// clang-format off
948/**
949\class ROOT::Experimental::Internal::RClusterDescriptorBuilder
950\ingroup NTuple
951\brief A helper class for piece-wise construction of an RClusterDescriptor
952
953The cluster descriptor builder starts from a summary-only cluster descriptor and allows for the
954piecewise addition of page locations.
955*/
956// clang-format on
958private:
960
961public:
963 {
964 fCluster.fClusterId = clusterId;
965 return *this;
966 }
967
968 RClusterDescriptorBuilder &FirstEntryIndex(std::uint64_t firstEntryIndex)
969 {
970 fCluster.fFirstEntryIndex = firstEntryIndex;
971 return *this;
972 }
973
974 RClusterDescriptorBuilder &NEntries(std::uint64_t nEntries)
975 {
976 fCluster.fNEntries = nEntries;
977 return *this;
978 }
979
980 RResult<void> CommitColumnRange(DescriptorId_t physicalId, std::uint64_t firstElementIndex,
981 std::uint32_t compressionSettings, const RClusterDescriptor::RPageRange &pageRange);
982
983 /// Add column and page ranges for deferred columns missing in this cluster. The locator type for the synthesized
984 /// page ranges is `kTypePageZero`. All the page sources must be able to populate the 'zero' page from such locator.
985 /// Any call to `CommitColumnRange()` should happen before calling this function.
987
988 /// Move out the full cluster descriptor including page locations
990};
991
992// clang-format off
993/**
994\class ROOT::Experimental::Internal::RClusterGroupDescriptorBuilder
995\ingroup NTuple
996\brief A helper class for piece-wise construction of an RClusterGroupDescriptor
997*/
998// clang-format on
1000private:
1002
1003public:
1006
1008 {
1009 fClusterGroup.fClusterGroupId = clusterGroupId;
1010 return *this;
1011 }
1013 {
1014 fClusterGroup.fPageListLocator = pageListLocator;
1015 return *this;
1016 }
1017 RClusterGroupDescriptorBuilder &PageListLength(std::uint64_t pageListLength)
1018 {
1019 fClusterGroup.fPageListLength = pageListLength;
1020 return *this;
1021 }
1023 {
1024 fClusterGroup.fMinEntry = minEntry;
1025 return *this;
1026 }
1028 {
1029 fClusterGroup.fEntrySpan = entrySpan;
1030 return *this;
1031 }
1033 {
1034 fClusterGroup.fNClusters = nClusters;
1035 return *this;
1036 }
1037 void AddClusters(const std::vector<DescriptorId_t> &clusterIds)
1038 {
1039 if (clusterIds.size() != fClusterGroup.GetNClusters())
1040 throw RException(R__FAIL("mismatch of number of clusters"));
1041 fClusterGroup.fClusterIds = clusterIds;
1042 }
1043
1045};
1046
1047// clang-format off
1048/**
1049\class ROOT::Experimental::Internal::RColumnGroupDescriptorBuilder
1050\ingroup NTuple
1051\brief A helper class for piece-wise construction of an RColumnGroupDescriptor
1052*/
1053// clang-format on
1055private:
1057
1058public:
1060
1062 {
1063 fColumnGroup.fColumnGroupId = columnGroupId;
1064 return *this;
1065 }
1066 void AddColumn(DescriptorId_t physicalId) { fColumnGroup.fPhysicalColumnIds.insert(physicalId); }
1067
1069};
1070
1071// clang-format off
1072/**
1073\class ROOT::Experimental::Internal::RNTupleDescriptorBuilder
1074\ingroup NTuple
1075\brief A helper class for piece-wise construction of an RNTupleDescriptor
1076
1077Used by RPageStorage implementations in order to construct the RNTupleDescriptor from the various header parts.
1078*/
1079// clang-format on
1081private:
1084public:
1085 /// Checks whether invariants hold:
1086 /// * NTuple name is valid
1087 /// * Fields have valid parent and child ids
1091
1092 void SetNTuple(const std::string_view name, const std::string_view description);
1093 void SetFeature(unsigned int flag);
1094
1095 void SetOnDiskHeaderXxHash3(std::uint64_t xxhash3) { fDescriptor.fOnDiskHeaderXxHash3 = xxhash3; }
1097 /// The real footer size also include the page list envelopes
1099
1100 void AddField(const RFieldDescriptor& fieldDesc);
1102
1103 void AddColumn(DescriptorId_t logicalId, DescriptorId_t physicalId, DescriptorId_t fieldId,
1104 const RColumnModel &model, std::uint32_t index, std::uint64_t firstElementIdx = 0U);
1106
1109
1110 /// Clears so-far stored clusters, fields, and columns and return to a pristine ntuple descriptor
1111 void Reset();
1112
1113 /// Mark the beginning of the header extension; any fields and columns added after a call to this function are
1114 /// annotated as begin part of the header extension.
1115 void BeginHeaderExtension();
1116};
1117
1118} // namespace Internal
1119} // namespace Experimental
1120} // namespace ROOT
1121
1122#endif // ROOT7_RNTupleDescriptor
#define R__FAIL(msg)
Short-hand to return an RResult<T> in an error state; the RError is implicitly converted into RResult...
Definition RError.hxx:290
TObject * clone(const char *newname) const override
Definition RooChi2Var.h:9
size_t size(const MatrixT &matrix)
retrieve the size of a square matrix
Option_t Option_t TPoint TPoint const char GetTextMagnitude GetFillStyle GetLineColor GetLineWidth GetMarkerStyle GetTextAlign GetTextColor GetTextSize void char Point_t Rectangle_t WindowAttributes_t index
Option_t Option_t TPoint TPoint const char GetTextMagnitude GetFillStyle GetLineColor GetLineWidth GetMarkerStyle GetTextAlign GetTextColor GetTextSize id
char name[80]
Definition TGX11.cxx:110
A helper class for piece-wise construction of an RClusterDescriptor.
RClusterDescriptorBuilder & AddDeferredColumnRanges(const RNTupleDescriptor &desc)
Add column and page ranges for deferred columns missing in this cluster.
RResult< RClusterDescriptor > MoveDescriptor()
Move out the full cluster descriptor including page locations.
RClusterDescriptorBuilder & ClusterId(DescriptorId_t clusterId)
RClusterDescriptorBuilder & NEntries(std::uint64_t nEntries)
RClusterDescriptorBuilder & FirstEntryIndex(std::uint64_t firstEntryIndex)
RResult< void > CommitColumnRange(DescriptorId_t physicalId, std::uint64_t firstElementIndex, std::uint32_t compressionSettings, const RClusterDescriptor::RPageRange &pageRange)
A helper class for piece-wise construction of an RClusterGroupDescriptor.
RClusterGroupDescriptorBuilder & PageListLocator(const RNTupleLocator &pageListLocator)
void AddClusters(const std::vector< DescriptorId_t > &clusterIds)
RClusterGroupDescriptorBuilder & MinEntry(std::uint64_t minEntry)
RClusterGroupDescriptorBuilder & ClusterGroupId(DescriptorId_t clusterGroupId)
RClusterGroupDescriptorBuilder & EntrySpan(std::uint64_t entrySpan)
RClusterGroupDescriptorBuilder & NClusters(std::uint32_t nClusters)
RClusterGroupDescriptorBuilder & PageListLength(std::uint64_t pageListLength)
static RClusterGroupDescriptorBuilder FromSummary(const RClusterGroupDescriptor &clusterGroupDesc)
A helper class for piece-wise construction of an RColumnDescriptor.
RColumnDescriptorBuilder & PhysicalColumnId(DescriptorId_t physicalColumnId)
RColumnDescriptorBuilder()=default
Make an empty column descriptor builder.
RColumnDescriptorBuilder & Model(const RColumnModel &model)
RColumnDescriptorBuilder & FieldId(DescriptorId_t fieldId)
RColumnDescriptorBuilder & Index(std::uint32_t index)
RColumnDescriptorBuilder & FirstElementIndex(std::uint64_t firstElementIdx)
RResult< RColumnDescriptor > MakeDescriptor() const
Attempt to make a column descriptor.
RColumnDescriptorBuilder & LogicalColumnId(DescriptorId_t logicalColumnId)
A column element encapsulates the translation between basic C++ types and their column representation...
A helper class for piece-wise construction of an RColumnGroupDescriptor.
RColumnGroupDescriptorBuilder & ColumnGroupId(DescriptorId_t columnGroupId)
A helper class for piece-wise construction of an RFieldDescriptor.
RFieldDescriptorBuilder & TypeVersion(std::uint32_t typeVersion)
RFieldDescriptorBuilder & NRepetitions(std::uint64_t nRepetitions)
RFieldDescriptorBuilder & FieldVersion(std::uint32_t fieldVersion)
RFieldDescriptorBuilder & Structure(const ENTupleStructure &structure)
RFieldDescriptorBuilder & TypeName(const std::string &typeName)
static RFieldDescriptorBuilder FromField(const RFieldBase &field)
Make a new RFieldDescriptorBuilder based off a live NTuple field.
RResult< RFieldDescriptor > MakeDescriptor() const
Attempt to make a field descriptor.
RFieldDescriptorBuilder & FieldName(const std::string &fieldName)
RFieldDescriptorBuilder & ParentId(DescriptorId_t id)
RFieldDescriptorBuilder()=default
Make an empty dangling field descriptor.
RFieldDescriptorBuilder & TypeAlias(const std::string &typeAlias)
RFieldDescriptorBuilder & FieldId(DescriptorId_t fieldId)
RFieldDescriptorBuilder & FieldDescription(const std::string &fieldDescription)
A helper class for piece-wise construction of an RNTupleDescriptor.
void BeginHeaderExtension()
Mark the beginning of the header extension; any fields and columns added after a call to this functio...
RResult< void > EnsureFieldExists(DescriptorId_t fieldId) const
RResult< void > AddFieldLink(DescriptorId_t fieldId, DescriptorId_t linkId)
RResult< void > EnsureValidDescriptor() const
Checks whether invariants hold:
RResult< void > AddCluster(RClusterDescriptor &&clusterDesc)
void AddToOnDiskFooterSize(std::uint64_t size)
The real footer size also include the page list envelopes.
void SetNTuple(const std::string_view name, const std::string_view description)
RResult< void > AddClusterGroup(RClusterGroupDescriptor &&clusterGroup)
void AddColumn(DescriptorId_t logicalId, DescriptorId_t physicalId, DescriptorId_t fieldId, const RColumnModel &model, std::uint32_t index, std::uint64_t firstElementIdx=0U)
void Reset()
Clears so-far stored clusters, fields, and columns and return to a pristine ntuple descriptor.
Records the parition of data into pages for a particular column in a particular cluster.
std::size_t ExtendToFitColumnRange(const RColumnRange &columnRange, const Internal::RColumnElementBase &element, std::size_t pageSize)
Extend this RPageRange to fit the given RColumnRange, i.e.
RPageInfoExtended Find(ClusterSize_t::ValueType idxInCluster) const
Find the page in the RPageRange that contains the given element. The element must exist.
RPageRange(const RPageRange &other)=delete
RPageRange & operator=(const RPageRange &other)=delete
Meta-data for a set of ntuple clusters.
std::unordered_map< DescriptorId_t, RPageRange > fPageRanges
RClusterDescriptor(RClusterDescriptor &&other)=default
bool ContainsColumn(DescriptorId_t physicalId) const
RClusterDescriptor(const RClusterDescriptor &other)=delete
NTupleSize_t fFirstEntryIndex
Clusters can be swapped by adjusting the entry offsets.
RClusterDescriptor & operator=(const RClusterDescriptor &other)=delete
const RColumnRange & GetColumnRange(DescriptorId_t physicalId) const
std::unordered_set< DescriptorId_t > GetColumnIds() const
std::unordered_map< DescriptorId_t, RColumnRange > fColumnRanges
bool operator==(const RClusterDescriptor &other) const
const RPageRange & GetPageRange(DescriptorId_t physicalId) const
Clusters are bundled in cluster groups.
std::uint64_t fMinEntry
The minimum first entry number of the clusters in the cluster group.
RClusterGroupDescriptor(const RClusterGroupDescriptor &other)=delete
RClusterGroupDescriptor & operator=(RClusterGroupDescriptor &&other)=default
std::uint64_t fEntrySpan
Number of entries that are (partially for sharded clusters) covered by this cluster group.
std::uint64_t fPageListLength
Uncompressed size of the page list.
RClusterGroupDescriptor CloneSummary() const
const std::vector< DescriptorId_t > & GetClusterIds() const
RClusterGroupDescriptor & operator=(const RClusterGroupDescriptor &other)=delete
std::uint32_t fNClusters
Number of clusters is always known even if the cluster IDs are not (yet) populated.
RNTupleLocator fPageListLocator
The page list that corresponds to the cluster group.
bool HasClusterDetails() const
A cluster group is loaded in two stages.
bool operator==(const RClusterGroupDescriptor &other) const
std::vector< DescriptorId_t > fClusterIds
The cluster IDs can be empty if the corresponding page list is not loaded.
RClusterGroupDescriptor(RClusterGroupDescriptor &&other)=default
Meta-data stored for every column of an ntuple.
DescriptorId_t fPhysicalColumnId
Usually identical to the logical column ID, except for alias columns where it references the shadowed...
std::uint64_t fFirstElementIndex
Specifies the index for the first stored element for this column.
RColumnDescriptor(const RColumnDescriptor &other)=delete
DescriptorId_t fLogicalColumnId
The actual column identifier, which is the link to the corresponding field.
RColumnDescriptor Clone() const
Get a copy of the descriptor.
RColumnDescriptor(RColumnDescriptor &&other)=default
DescriptorId_t fFieldId
Every column belongs to one and only one field.
RColumnDescriptor & operator=(const RColumnDescriptor &other)=delete
RColumnModel fModel
Contains the column type and whether it is sorted.
std::uint32_t fIndex
A field can be serialized into several columns, which are numbered from zero to $n$.
bool operator==(const RColumnDescriptor &other) const
Meta-data for a sets of columns; non-trivial column groups are used for sharded clusters.
RColumnGroupDescriptor(const RColumnGroupDescriptor &other)=delete
RColumnGroupDescriptor & operator=(const RColumnGroupDescriptor &other)=delete
std::unordered_set< DescriptorId_t > fPhysicalColumnIds
RColumnGroupDescriptor & operator=(RColumnGroupDescriptor &&other)=default
bool operator==(const RColumnGroupDescriptor &other) const
bool Contains(DescriptorId_t physicalId) const
const std::unordered_set< DescriptorId_t > & GetPhysicalColumnIds() const
RColumnGroupDescriptor(RColumnGroupDescriptor &&other)=default
Holds the static meta-data of an RNTuple column.
Base class for all ROOT issued exceptions.
Definition RError.hxx:78
A field translates read and write calls from/to underlying columns to/from tree values.
Definition RField.hxx:94
Meta-data stored for every field of an ntuple.
std::vector< DescriptorId_t > fLinkIds
The pointers in the other direction from parent to children.
std::unique_ptr< RFieldBase > CreateField(const RNTupleDescriptor &ntplDesc) const
In general, we create a field simply from the C++ type name.
std::uint32_t fTypeVersion
The version of the C++ type itself.
std::string fFieldDescription
Free text set by the user.
std::string fFieldName
The leaf name, not including parent fields.
std::uint32_t fFieldVersion
The version of the C++-type-to-column translation mechanics.
const std::vector< DescriptorId_t > & GetLinkIds() const
RFieldDescriptor(const RFieldDescriptor &other)=delete
DescriptorId_t fParentId
Establishes sub field relationships, such as classes and collections.
RFieldDescriptor Clone() const
Get a copy of the descriptor.
bool operator==(const RFieldDescriptor &other) const
std::string fTypeAlias
A typedef or using directive that resolved to the type name during field creation.
ENTupleStructure fStructure
The structural information carried by this field in the data model tree.
RFieldDescriptor & operator=(const RFieldDescriptor &other)=delete
RFieldDescriptor(RFieldDescriptor &&other)=default
std::string fTypeName
The C++ type that was used when writing the field.
std::uint64_t fNRepetitions
The number of elements per entry for fixed-size arrays.
Used to loop over all the clusters of an ntuple (in unspecified order)
const RNTupleDescriptor & fNTuple
The associated NTuple for this range.
Used to loop over all the cluster groups of an ntuple (in unspecified order)
const RNTupleDescriptor & fNTuple
The associated NTuple for this range.
RIterator(const RNTupleDescriptor &ntuple, const std::vector< DescriptorId_t > &columns, std::size_t index)
const std::vector< DescriptorId_t > & fColumns
The enclosing range's descriptor id list.
const RNTupleDescriptor & fNTuple
The enclosing range's NTuple.
const RNTupleDescriptor & fNTuple
The associated NTuple for this range.
std::vector< DescriptorId_t > fColumns
The descriptor ids of the columns ordered by index id.
const std::vector< DescriptorId_t > & fFieldChildren
The enclosing range's descriptor id list.
const RNTupleDescriptor & fNTuple
The enclosing range's NTuple.
RIterator(const RNTupleDescriptor &ntuple, const std::vector< DescriptorId_t > &fieldChildren, std::size_t index)
std::vector< DescriptorId_t > fFieldChildren
The descriptor ids of the child fields.
const RNTupleDescriptor & fNTuple
The associated NTuple for this range.
RFieldDescriptorIterable(const RNTupleDescriptor &ntuple, const RFieldDescriptor &field, const std::function< bool(DescriptorId_t, DescriptorId_t)> &comparator)
Sort the range using an arbitrary comparison function.
RFieldDescriptorIterable(const RNTupleDescriptor &ntuple, const RFieldDescriptor &field)
Summarizes information about fields and the corresponding columns that were added after the header ha...
std::vector< DescriptorId_t > fFields
Contains the list of field IDs that are part of the header extension; the corresponding columns are a...
std::uint64_t fNLogicalColumns
Number of logical and physical columns; updated by the descriptor builder when columns are added.
The on-storage meta-data of an ntuple.
std::uint64_t fNPhysicalColumns
Updated by the descriptor builder when columns are added.
std::unordered_map< DescriptorId_t, RClusterDescriptor > fClusterDescriptors
May contain only a subset of all the available clusters, e.g.
std::uint64_t fGeneration
Once constructed by an RNTupleDescriptorBuilder, the descriptor is mostly immutable except for set of...
std::uint64_t fOnDiskFooterSize
Like fOnDiskHeaderSize, contains both cluster summaries and page locations.
std::uint64_t fNEntries
Updated by the descriptor builder when the cluster groups are added.
DescriptorId_t FindPhysicalColumnId(DescriptorId_t fieldId, std::uint32_t columnIndex) const
NTupleSize_t GetNElements(DescriptorId_t physicalColumnId) const
DescriptorId_t FindLogicalColumnId(DescriptorId_t fieldId, std::uint32_t columnIndex) const
RClusterGroupDescriptorIterable GetClusterGroupIterable() const
std::unordered_map< DescriptorId_t, RClusterGroupDescriptor > fClusterGroupDescriptors
DescriptorId_t FindNextClusterId(DescriptorId_t clusterId) const
DescriptorId_t FindPrevClusterId(DescriptorId_t clusterId) const
RFieldDescriptorIterable GetFieldIterable(const RFieldDescriptor &fieldDesc, const std::function< bool(DescriptorId_t, DescriptorId_t)> &comparator) const
DescriptorId_t GetFieldZeroId() const
Returns the logical parent of all top-level NTuple data fields.
RColumnDescriptorIterable GetColumnIterable(const RFieldDescriptor &fieldDesc) const
std::unordered_map< DescriptorId_t, RColumnDescriptor > fColumnDescriptors
std::unique_ptr< RNTupleDescriptor > Clone() const
DescriptorId_t FindClusterId(DescriptorId_t physicalColumnId, NTupleSize_t index) const
RNTupleDescriptor(RNTupleDescriptor &&other)=default
std::uint64_t fNClusters
Updated by the descriptor builder when the cluster groups are added.
std::string fName
The ntuple name needs to be unique in a given storage location (file)
RFieldDescriptorIterable GetTopLevelFields() const
const RClusterDescriptor & GetClusterDescriptor(DescriptorId_t clusterId) const
RNTupleDescriptor(const RNTupleDescriptor &other)=delete
std::unordered_map< DescriptorId_t, RFieldDescriptor > fFieldDescriptors
RFieldDescriptorIterable GetFieldIterable(const RFieldDescriptor &fieldDesc) const
static constexpr unsigned int kFeatureFlagTest
RNTupleDescriptor & operator=(RNTupleDescriptor &&other)=default
std::uint64_t fOnDiskHeaderXxHash3
Set by the descriptor builder when deserialized.
NTupleSize_t GetNEntries() const
We know the number of entries from adding the cluster summaries.
RFieldDescriptorIterable GetTopLevelFields(const std::function< bool(DescriptorId_t, DescriptorId_t)> &comparator) const
RFieldDescriptorIterable GetFieldIterable(DescriptorId_t fieldId) const
bool operator==(const RNTupleDescriptor &other) const
std::string GetQualifiedFieldName(DescriptorId_t fieldId) const
Walks up the parents of the field ID and returns a field name of the form a.b.c.d In case of invalid ...
RResult< void > AddClusterGroupDetails(DescriptorId_t clusterGroupId, std::vector< RClusterDescriptor > &clusterDescs)
Methods to load and drop cluster group details (cluster IDs and page locations)
DescriptorId_t FindFieldId(std::string_view fieldName, DescriptorId_t parentId) const
const RColumnDescriptor & GetColumnDescriptor(DescriptorId_t columnId) const
RClusterDescriptorIterable GetClusterIterable() const
const RFieldDescriptor & GetFieldDescriptor(DescriptorId_t fieldId) const
std::unique_ptr< RNTupleModel > CreateModel() const
Re-create the C++ model from the stored meta-data.
RResult< void > DropClusterGroupDetails(DescriptorId_t clusterGroupId)
std::unique_ptr< RHeaderExtension > fHeaderExtension
const RClusterGroupDescriptor & GetClusterGroupDescriptor(DescriptorId_t clusterGroupId) const
RColumnDescriptorIterable GetColumnIterable() const
bool HasFeature(unsigned int flag) const
RNTupleDescriptor & operator=(const RNTupleDescriptor &other)=delete
std::string fDescription
Free text from the user.
RColumnDescriptorIterable GetColumnIterable(DescriptorId_t fieldId) const
const RHeaderExtension * GetHeaderExtension() const
Return header extension information; if the descriptor does not have a header extension,...
std::uint64_t fOnDiskHeaderSize
Set by the descriptor builder when deserialized.
const RFieldDescriptor & GetFieldZero() const
void PrintInfo(std::ostream &output) const
RFieldDescriptorIterable GetFieldIterable(DescriptorId_t fieldId, const std::function< bool(DescriptorId_t, DescriptorId_t)> &comparator) const
std::vector< std::uint64_t > GetFeatureFlags() const
The class is used as a return type for operations that can fail; wraps a value of type T or an RError...
Definition RError.hxx:194
const Int_t n
Definition legend1.C:16
std::uint64_t NTupleSize_t
Integer type long enough to hold the maximum number of entries in a column.
ENTupleStructure
The fields in the ntuple model tree can carry different structural information about the type system.
constexpr int kUnknownCompressionSettings
std::uint64_t DescriptorId_t
Distriniguishes elements of the same type within a descriptor, e.g. different fields.
constexpr NTupleSize_t kInvalidNTupleIndex
constexpr ClusterSize_t kInvalidClusterIndex(std::uint64_t(-1))
constexpr DescriptorId_t kInvalidDescriptorId
tbb::task_arena is an alias of tbb::interface7::task_arena, which doesn't allow to forward declare tb...
The window of element indexes of a particular column in a particular cluster.
int fCompressionSettings
The usual format for ROOT compression settings (see Compression.h).
ClusterSize_t fNElements
The number of column elements in the cluster.
ClusterSize_t::ValueType fFirstInPage
Index (in cluster) of the first element in page.
RPageInfoExtended(const RPageInfo &pi, ClusterSize_t::ValueType i, NTupleSize_t n)
NTupleSize_t fPageNo
Page number in the corresponding RPageRange.
We do not need to store the element size / uncompressed page size because we know to which column the...
std::uint32_t fNElements
The sum of the elements of all the pages must match the corresponding fNElements field in fColumnRang...
RNTupleLocator fLocator
The meaning of fLocator depends on the storage backend.
Wrap the integer in a struct in order to avoid template specialization clash with std::uint64_t.
Generic information about the physical location of data.
static void output()