Logo ROOT  
Reference Guide
 
Loading...
Searching...
No Matches
RNTupleDescriptor.hxx
Go to the documentation of this file.
1/// \file ROOT/RNTupleDescriptor.hxx
2/// \ingroup NTuple ROOT7
3/// \author Jakob Blomer <jblomer@cern.ch>
4/// \date 2018-07-19
5/// \warning This is part of the ROOT 7 prototype! It will change without notice. It might trigger earthquakes. Feedback
6/// is welcome!
7
8/*************************************************************************
9 * Copyright (C) 1995-2019, Rene Brun and Fons Rademakers. *
10 * All rights reserved. *
11 * *
12 * For the licensing terms see $ROOTSYS/LICENSE. *
13 * For the list of contributors see $ROOTSYS/README/CREDITS. *
14 *************************************************************************/
15
16#ifndef ROOT7_RNTupleDescriptor
17#define ROOT7_RNTupleDescriptor
18
19#include <ROOT/RColumnModel.hxx>
20#include <ROOT/RError.hxx>
22#include <ROOT/RNTupleUtil.hxx>
23#include <ROOT/RSpan.hxx>
24#include <ROOT/RStringView.hxx>
25
26#include <algorithm>
27#include <chrono>
28#include <functional>
29#include <iterator>
30#include <map>
31#include <memory>
32#include <ostream>
33#include <vector>
34#include <string>
35#include <unordered_map>
36#include <unordered_set>
37
38namespace ROOT {
39namespace Experimental {
40
41class RFieldDescriptorBuilder;
42class RNTupleDescriptor;
43class RNTupleDescriptorBuilder;
44class RNTupleModel;
45
46namespace Detail {
47 class RFieldBase;
48}
49
50
51// clang-format off
52/**
53\class ROOT::Experimental::RFieldDescriptor
54\ingroup NTuple
55\brief Meta-data stored for every field of an ntuple
56*/
57// clang-format on
61
62private:
64 /// The version of the C++-type-to-column translation mechanics
65 std::uint32_t fFieldVersion = 0;
66 /// The version of the C++ type itself
67 std::uint32_t fTypeVersion = 0;
68 /// The leaf name, not including parent fields
69 std::string fFieldName;
70 /// Free text set by the user
71 std::string fFieldDescription;
72 /// The C++ type that was used when writing the field
73 std::string fTypeName;
74 /// The number of elements per entry for fixed-size arrays
75 std::uint64_t fNRepetitions = 0;
76 /// The structural information carried by this field in the data model tree
78 /// Establishes sub field relationships, such as classes and collections
80 /// The pointers in the other direction from parent to children. They are serialized, too, to keep the
81 /// order of sub fields.
82 std::vector<DescriptorId_t> fLinkIds;
83
84public:
85 RFieldDescriptor() = default;
86 RFieldDescriptor(const RFieldDescriptor &other) = delete;
90
91 bool operator==(const RFieldDescriptor &other) const;
92 /// Get a copy of the descriptor
93 RFieldDescriptor Clone() const;
94 /// In general, we create a field simply from the C++ type name. For untyped fields, however, we potentially need
95 /// access to sub fields, which is provided by the ntuple descriptor argument.
96 std::unique_ptr<Detail::RFieldBase> CreateField(const RNTupleDescriptor &ntplDesc) const;
97
98 DescriptorId_t GetId() const { return fFieldId; }
99 std::uint32_t GetFieldVersion() const { return fFieldVersion; }
100 std::uint32_t GetTypeVersion() const { return fTypeVersion; }
101 std::string GetFieldName() const { return fFieldName; }
102 std::string GetFieldDescription() const { return fFieldDescription; }
103 std::string GetTypeName() const { return fTypeName; }
104 std::uint64_t GetNRepetitions() const { return fNRepetitions; }
107 const std::vector<DescriptorId_t> &GetLinkIds() const { return fLinkIds; }
108};
109
110
111// clang-format off
112/**
113\class ROOT::Experimental::RColumnDescriptor
114\ingroup NTuple
115\brief Meta-data stored for every column of an ntuple
116*/
117// clang-format on
121
122private:
124 /// Contains the column type and whether it is sorted
126 /// Every column belongs to one and only one field
128 /// A field can be serialized into several columns, which are numbered from zero to $n$
129 std::uint32_t fIndex;
130
131public:
132 RColumnDescriptor() = default;
133 RColumnDescriptor(const RColumnDescriptor &other) = delete;
137
138 bool operator==(const RColumnDescriptor &other) const;
139 /// Get a copy of the descriptor
140 RColumnDescriptor Clone() const;
141
142 DescriptorId_t GetId() const { return fColumnId; }
143 RColumnModel GetModel() const { return fModel; }
144 std::uint32_t GetIndex() const { return fIndex; }
146};
147
148// clang-format off
149/**
150\class ROOT::Experimental::RColumnGroupDescriptor
151\ingroup NTuple
152\brief Meta-data for a sets of columns; non-trivial column groups are used for sharded clusters
153
154Clusters can span a subset of columns. Such subsets are described as a column group. An empty column group
155is used to denote the column group of all the columns. Every ntuple has at least one column group.
156*/
157// clang-format on
160
161private:
163 std::unordered_set<DescriptorId_t> fColumnIds;
164
165public:
171
172 bool operator==(const RColumnGroupDescriptor &other) const;
173
175 const std::unordered_set<DescriptorId_t> &GetColumnIds() const { return fColumnIds; }
176 bool Contains(DescriptorId_t columnId) const { return fColumnIds.empty() || fColumnIds.count(columnId) > 0; }
177 bool HasAllColumns() const { return fColumnIds.empty(); }
178};
179
180// clang-format off
181/**
182\class ROOT::Experimental::RClusterDescriptor
183\ingroup NTuple
184\brief Meta-data for a set of ntuple clusters
185
186The cluster descriptor is built in two phases. In a first phase, the descriptor has only summary data,
187i.e. the ID and the event range. In a second phase, page locations and column ranges are added.
188Both phases are populated by the RClusterDescriptorBuilder.
189Clusters usually span across all available columns but in some cases they can describe only a subset of the columns,
190for instance when describing friend ntuples.
191*/
192// clang-format on
195
196public:
197 /// The window of element indexes of a particular column in a particular cluster
200 /// A 64bit element index
202 /// A 32bit value for the number of column elements in the cluster
204 /// The usual format for ROOT compression settings (see Compression.h).
205 /// The pages of a particular column in a particular cluster are all compressed with the same settings.
206 std::int64_t fCompressionSettings = 0;
207
208 // TODO(jblomer): we perhaps want to store summary information, such as average, min/max, etc.
209 // Should this be done on the field level?
210
211 bool operator==(const RColumnRange &other) const {
212 return fColumnId == other.fColumnId && fFirstElementIndex == other.fFirstElementIndex &&
214 }
215
218 }
219 };
220
221 /// Records the parition of data into pages for a particular column in a particular cluster
222 struct RPageRange {
223 /// We do not need to store the element size / uncompressed page size because we know to which column
224 /// the page belongs
225 struct RPageInfo {
226 /// The sum of the elements of all the pages must match the corresponding fNElements field in fColumnRanges
228 /// The meaning of fLocator depends on the storage backend.
230
231 bool operator==(const RPageInfo &other) const {
232 return fNElements == other.fNElements && fLocator == other.fLocator;
233 }
234 };
236 /// Index (in cluster) of the first element in page.
238 /// Page number in the corresponding RPageRange.
240
241 RPageInfoExtended() = default;
243 : RPageInfo(pi), fFirstInPage(i), fPageNo(n) {}
244 };
245
246 RPageRange() = default;
247 RPageRange(const RPageRange &other) = delete;
248 RPageRange &operator =(const RPageRange &other) = delete;
249 RPageRange(RPageRange &&other) = default;
250 RPageRange &operator =(RPageRange &&other) = default;
251
253 RPageRange clone;
254 clone.fColumnId = fColumnId;
255 clone.fPageInfos = fPageInfos;
256 return clone;
257 }
258
259 /// Find the page in the RPageRange that contains the given element. The element must exist.
260 RPageInfoExtended Find(RClusterSize::ValueType idxInCluster) const;
261
263 std::vector<RPageInfo> fPageInfos;
264
265 bool operator==(const RPageRange &other) const {
266 return fColumnId == other.fColumnId && fPageInfos == other.fPageInfos;
267 }
268 };
269
270private:
272 /// Clusters can be swapped by adjusting the entry offsets
274 // TODO(jblomer): change to std::uint64_t
276 bool fHasPageLocations = false;
277
278 std::unordered_map<DescriptorId_t, RColumnRange> fColumnRanges;
279 std::unordered_map<DescriptorId_t, RPageRange> fPageRanges;
280
281 void EnsureHasPageLocations() const;
282
283public:
285 // Constructor for a summary-only cluster descriptor without page locations
286 RClusterDescriptor(DescriptorId_t clusterId, std::uint64_t firstEntryIndex, std::uint64_t nEntries)
287 : fClusterId(clusterId), fFirstEntryIndex(firstEntryIndex), fNEntries(ClusterSize_t(nEntries))
288 {
289 }
294
296
297 bool operator==(const RClusterDescriptor &other) const;
298
299 DescriptorId_t GetId() const { return fClusterId; }
303 {
305 return fColumnRanges.at(columnId);
306 }
308 {
310 return fPageRanges.at(columnId);
311 }
312 bool ContainsColumn(DescriptorId_t columnId) const;
313 std::unordered_set<DescriptorId_t> GetColumnIds() const;
314 std::uint64_t GetBytesOnStorage() const;
315 bool HasPageLocations() const { return fHasPageLocations; }
316};
317
318// clang-format off
319/**
320\class ROOT::Experimental::RClusterGroupDescriptor
321\ingroup NTuple
322\brief Clusters are stored in cluster groups. Cluster groups span all the columns of a certain event range.
323
324Very large ntuples or combined ntuples (chains, friends) contain multiple cluster groups. The cluster groups
325may contain sharded clusters. However, a cluster group must contain the clusters spanning all the columns for the
326given event range. Cluster groups must partition the entry range of an ntuple.
327Every ntuple has at least one cluster group. The clusters in a cluster group are ordered corresponding to
328the order of page locations in the page list envelope that belongs to the cluster group (see format specification)
329*/
330// clang-format on
333
334private:
336 std::vector<DescriptorId_t> fClusterIds;
337 /// The page list that corresponds to the cluster group
339 /// Uncompressed size of the page list
340 std::uint32_t fPageListLength = 0;
341
342public:
348
350
351 bool operator==(const RClusterGroupDescriptor &other) const;
352
354 std::uint64_t GetNClusters() const { return fClusterIds.size(); }
356 std::uint32_t GetPageListLength() const { return fPageListLength; }
357 bool Contains(DescriptorId_t clusterId) const
358 {
359 return std::find(fClusterIds.begin(), fClusterIds.end(), clusterId) != fClusterIds.end();
360 }
361 const std::vector<DescriptorId_t> &GetClusterIds() const { return fClusterIds; }
362};
363
364// clang-format off
365/**
366\class ROOT::Experimental::RNTupleDescriptor
367\ingroup NTuple
368\brief The on-storage meta-data of an ntuple
369
370Represents the on-disk (on storage) information about an ntuple. The meta-data consists of a header and one or
371several footers. The header carries the ntuple schema, i.e. the fields and the associated columns and their
372relationships. The footer(s) carry information about one or several clusters. For every cluster, a footer stores
373its location and size, and for every column the range of element indexes as well as a list of pages and page
374locations.
375
376The descriptor provide machine-independent (de-)serialization of headers and footers, and it provides lookup routines
377for ntuple objects (pages, clusters, ...). It is supposed to be usable by all RPageStorage implementations.
378
379The serialization does not use standard ROOT streamers in order to not let it depend on libCore. The serialization uses
380the concept of frames: header, footer, and substructures have a preamble with version numbers and the size of the
381writte struct. This allows for forward and backward compatibility when the meta-data evolves.
382*/
383// clang-format on
386
387private:
388 /// The ntuple name needs to be unique in a given storage location (file)
389 std::string fName;
390 /// Free text from the user
391 std::string fDescription;
392
393 std::uint64_t fOnDiskHeaderSize = 0; ///< Set by the descriptor builder when deserialized
394 std::uint64_t fOnDiskFooterSize = 0; ///< Like fOnDiskHeaderSize, contains both cluster summaries and page locations
395
396 std::uint64_t fNEntries = 0; ///< Updated by the descriptor builder when the cluster summaries are added
397
398 /**
399 * Once constructed by an RNTupleDescriptorBuilder, the descriptor is mostly immutable except for set of
400 * active the page locations. During the lifetime of the descriptor, page location information for clusters
401 * can be added or removed. When this happens, the generation should be increased, so that users of the
402 * descriptor know that the information changed. The generation is increased, e.g., by the page source's
403 * exclusive lock guard around the descriptor. It is used, e.g., by the descriptor cache in RNTupleReader.
404 */
405 std::uint64_t fGeneration = 0;
406
407 std::unordered_map<DescriptorId_t, RFieldDescriptor> fFieldDescriptors;
408 std::unordered_map<DescriptorId_t, RColumnDescriptor> fColumnDescriptors;
409 std::unordered_map<DescriptorId_t, RClusterGroupDescriptor> fClusterGroupDescriptors;
410 /// May contain only a subset of all the available clusters, e.g. the clusters of the current file
411 /// from a chain of files
412 std::unordered_map<DescriptorId_t, RClusterDescriptor> fClusterDescriptors;
413
414public:
415 // clang-format off
416 /**
417 \class ROOT::Experimental::RNTupleDescriptor::RColumnDescriptorIterable
418 \ingroup NTuple
419 \brief Used to loop over a field's associated columns
420 */
421 // clang-format on
423 private:
424 /// The associated NTuple for this range.
426 /// The descriptor ids of the columns ordered by index id
427 std::vector<DescriptorId_t> fColumns = {};
428 public:
429 class RIterator {
430 private:
431 /// The enclosing range's NTuple.
433 /// The enclosing range's descriptor id list.
434 const std::vector<DescriptorId_t> &fColumns;
435 std::size_t fIndex = 0;
436 public:
437 using iterator_category = std::forward_iterator_tag;
440 using difference_type = std::ptrdiff_t;
443
444 RIterator(const RNTupleDescriptor &ntuple, const std::vector<DescriptorId_t> &columns, std::size_t index)
445 : fNTuple(ntuple), fColumns(columns), fIndex(index) {}
446 iterator operator++() { ++fIndex; return *this; }
448 bool operator!=(const iterator &rh) const { return fIndex != rh.fIndex; }
449 bool operator==(const iterator &rh) const { return fIndex == rh.fIndex; }
450 };
451
453 : fNTuple(ntuple)
454 {
455 for (unsigned int i = 0; true; ++i) {
456 auto columnId = ntuple.FindColumnId(field.GetId(), i);
457 if (columnId == kInvalidDescriptorId)
458 break;
459 fColumns.emplace_back(columnId);
460 }
461 }
464 };
465
466 // clang-format off
467 /**
468 \class ROOT::Experimental::RNTupleDescriptor::RFieldDescriptorIterable
469 \ingroup NTuple
470 \brief Used to loop over a field's child fields
471 */
472 // clang-format on
474 private:
475 /// The associated NTuple for this range.
477 /// The descriptor ids of the child fields. These may be sorted using
478 /// a comparison function.
479 std::vector<DescriptorId_t> fFieldChildren = {};
480 public:
481 class RIterator {
482 private:
483 /// The enclosing range's NTuple.
485 /// The enclosing range's descriptor id list.
486 const std::vector<DescriptorId_t>& fFieldChildren;
487 std::size_t fIndex = 0;
488 public:
489 using iterator_category = std::forward_iterator_tag;
492 using difference_type = std::ptrdiff_t;
495
496 RIterator(const RNTupleDescriptor& ntuple, const std::vector<DescriptorId_t>& fieldChildren,
497 std::size_t index) : fNTuple(ntuple), fFieldChildren(fieldChildren), fIndex(index) {}
498 iterator operator++() { ++fIndex; return *this; }
502 );
503 }
504 bool operator!=(const iterator& rh) const { return fIndex != rh.fIndex; }
505 bool operator==(const iterator& rh) const { return fIndex == rh.fIndex; }
506 };
508 : fNTuple(ntuple), fFieldChildren(field.GetLinkIds()) {}
509 /// Sort the range using an arbitrary comparison function.
511 const std::function<bool(DescriptorId_t, DescriptorId_t)>& comparator)
512 : fNTuple(ntuple), fFieldChildren(field.GetLinkIds())
513 {
514 std::sort(fFieldChildren.begin(), fFieldChildren.end(), comparator);
515 }
517 return RIterator(fNTuple, fFieldChildren, 0);
518 }
521 }
522 };
523
524 // clang-format off
525 /**
526 \class ROOT::Experimental::RNTupleDescriptor::RClusterGroupDescriptorIterable
527 \ingroup NTuple
528 \brief Used to loop over all the cluster groups of an ntuple (in unspecified order)
529
530 Enumerate all cluster group IDs from the cluster group descriptor. No specific order can be assumed, use
531 FindNextClusterGroupId and FindPrevClusterGroupId to traverse clusters groups by entry number.
532 */
533 // clang-format on
535 private:
536 /// The associated NTuple for this range.
538
539 public:
540 class RIterator {
541 private:
542 /// The enclosing range's NTuple.
544 std::size_t fIndex = 0;
545
546 public:
547 using iterator_category = std::forward_iterator_tag;
550 using difference_type = std::ptrdiff_t;
553
554 RIterator(const RNTupleDescriptor &ntuple, std::size_t index) : fNTuple(ntuple), fIndex(index) {}
556 {
557 ++fIndex;
558 return *this;
559 }
561 {
562 auto it = fNTuple.fClusterGroupDescriptors.begin();
563 std::advance(it, fIndex);
564 return it->second;
565 }
566 bool operator!=(const iterator &rh) const { return fIndex != rh.fIndex; }
567 bool operator==(const iterator &rh) const { return fIndex == rh.fIndex; }
568 };
569
573 };
574
575 // clang-format off
576 /**
577 \class ROOT::Experimental::RNTupleDescriptor::RClusterDescriptorIterable
578 \ingroup NTuple
579 \brief Used to loop over all the clusters of an ntuple (in unspecified order)
580
581 Enumerate all cluster IDs from the cluster descriptor. No specific order can be assumed, use
582 FindNextClusterId and FindPrevClusterId to travers clusters by entry number.
583 */
584 // clang-format on
586 private:
587 /// The associated NTuple for this range.
589 public:
590 class RIterator {
591 private:
592 /// The enclosing range's NTuple.
594 std::size_t fIndex = 0;
595 public:
596 using iterator_category = std::forward_iterator_tag;
599 using difference_type = std::ptrdiff_t;
602
603 RIterator(const RNTupleDescriptor &ntuple, std::size_t index) : fNTuple(ntuple), fIndex(index) {}
604 iterator operator++() { ++fIndex; return *this; }
606 auto it = fNTuple.fClusterDescriptors.begin();
607 std::advance(it, fIndex);
608 return it->second;
609 }
610 bool operator!=(const iterator &rh) const { return fIndex != rh.fIndex; }
611 bool operator==(const iterator &rh) const { return fIndex == rh.fIndex; }
612 };
613
617 };
618
619 RNTupleDescriptor() = default;
620 RNTupleDescriptor(const RNTupleDescriptor &other) = delete;
624
625 std::unique_ptr<RNTupleDescriptor> Clone() const;
626
627 bool operator ==(const RNTupleDescriptor &other) const;
628
629 std::uint64_t GetOnDiskHeaderSize() const { return fOnDiskHeaderSize; }
630 std::uint64_t GetOnDiskFooterSize() const { return fOnDiskFooterSize; }
631
633 return fFieldDescriptors.at(fieldId);
634 }
636 return fColumnDescriptors.at(columnId);
637 }
639 {
640 return fClusterGroupDescriptors.at(clusterGroupId);
641 }
643 return fClusterDescriptors.at(clusterId);
644 }
645
647 return RFieldDescriptorIterable(*this, fieldDesc);
648 }
650 const std::function<bool(DescriptorId_t, DescriptorId_t)>& comparator) const
651 {
652 return RFieldDescriptorIterable(*this, fieldDesc, comparator);
653 }
655 return GetFieldIterable(GetFieldDescriptor(fieldId));
656 }
658 const std::function<bool(DescriptorId_t, DescriptorId_t)>& comparator) const
659 {
660 return GetFieldIterable(GetFieldDescriptor(fieldId), comparator);
661 }
664 }
666 const std::function<bool(DescriptorId_t, DescriptorId_t)>& comparator) const
667 {
668 return GetFieldIterable(GetFieldZeroId(), comparator);
669 }
670
672 {
673 return RColumnDescriptorIterable(*this, fieldDesc);
674 }
676 {
677 return RColumnDescriptorIterable(*this, GetFieldDescriptor(fieldId));
678 }
679
681
683 {
684 return RClusterDescriptorIterable(*this);
685 }
686
687 std::string GetName() const { return fName; }
688 std::string GetDescription() const { return fDescription; }
689
690 std::size_t GetNFields() const { return fFieldDescriptors.size(); }
691 std::size_t GetNColumns() const { return fColumnDescriptors.size(); }
692 std::size_t GetNClusterGroups() const { return fClusterGroupDescriptors.size(); }
693 std::size_t GetNClusters() const { return fClusterDescriptors.size(); }
694
695 /// We know the number of entries from adding the cluster summaries
698
699 /// Returns the logical parent of all top-level NTuple data fields.
702 DescriptorId_t FindFieldId(std::string_view fieldName, DescriptorId_t parentId) const;
703 /// Searches for a top-level field
704 DescriptorId_t FindFieldId(std::string_view fieldName) const;
705 DescriptorId_t FindColumnId(DescriptorId_t fieldId, std::uint32_t columnIndex) const;
709
710 /// Walks up the parents of the field ID and returns a field name of the form a.b.c.d
711 /// In case of invalid field ID, an empty string is returned.
712 std::string GetQualifiedFieldName(DescriptorId_t fieldId) const;
713
714 /// Methods to load and drop cluster details
717
718 std::uint64_t GetGeneration() const { return fGeneration; }
720
721 /// Re-create the C++ model from the stored meta-data
722 std::unique_ptr<RNTupleModel> GenerateModel() const;
723 void PrintInfo(std::ostream &output) const;
724};
725
726
727// clang-format off
728/**
729\class ROOT::Experimental::RColumnDescriptorBuilder
730\ingroup NTuple
731\brief A helper class for piece-wise construction of an RColumnDescriptor
732
733Dangling column descriptors can become actual descriptors when added to an
734RNTupleDescriptorBuilder instance and then linked to their fields.
735*/
736// clang-format on
738private:
740public:
741 /// Make an empty column descriptor builder.
743
745 fColumn.fColumnId = columnId;
746 return *this;
747 }
749 fColumn.fModel = model;
750 return *this;
751 }
753 fColumn.fFieldId = fieldId;
754 return *this;
755 }
758 return *this;
759 }
761 /// Attempt to make a column descriptor. This may fail if the column
762 /// was not given enough information to make a proper descriptor.
764};
765
766
767// clang-format off
768/**
769\class ROOT::Experimental::RFieldDescriptorBuilder
770\ingroup NTuple
771\brief A helper class for piece-wise construction of an RFieldDescriptor
772
773Dangling field descriptors describe a single field in isolation. They are
774missing the necessary relationship information (parent field, any child fields)
775required to describe a real NTuple field.
776
777Dangling field descriptors can only become actual descriptors when added to an
778RNTupleDescriptorBuilder instance and then linked to other fields.
779*/
780// clang-format on
782private:
784public:
785 /// Make an empty dangling field descriptor.
787 /// Make a new RFieldDescriptorBuilder based off an existing descriptor.
788 /// Relationship information is lost during the conversion to a
789 /// dangling descriptor:
790 /// * Parent id is reset to an invalid id.
791 /// * Field children ids are forgotten.
792 ///
793 /// These properties must be set using RNTupleDescriptorBuilder::AddFieldLink().
794 explicit RFieldDescriptorBuilder(const RFieldDescriptor& fieldDesc);
795
796 /// Make a new RFieldDescriptorBuilder based off a live NTuple field.
798
800 fField.fFieldId = fieldId;
801 return *this;
802 }
803 RFieldDescriptorBuilder &FieldVersion(std::uint32_t fieldVersion)
804 {
805 fField.fFieldVersion = fieldVersion;
806 return *this;
807 }
808 RFieldDescriptorBuilder &TypeVersion(std::uint32_t typeVersion)
809 {
810 fField.fTypeVersion = typeVersion;
811 return *this;
812 }
815 return *this;
816 }
817 RFieldDescriptorBuilder& FieldName(const std::string& fieldName) {
818 fField.fFieldName = fieldName;
819 return *this;
820 }
821 RFieldDescriptorBuilder& FieldDescription(const std::string& fieldDescription) {
822 fField.fFieldDescription = fieldDescription;
823 return *this;
824 }
825 RFieldDescriptorBuilder& TypeName(const std::string& typeName) {
826 fField.fTypeName = typeName;
827 return *this;
828 }
829 RFieldDescriptorBuilder& NRepetitions(std::uint64_t nRepetitions) {
830 fField.fNRepetitions = nRepetitions;
831 return *this;
832 }
834 fField.fStructure = structure;
835 return *this;
836 }
838 /// Attempt to make a field descriptor. This may fail if the dangling field
839 /// was not given enough information to make a proper descriptor.
841};
842
843
844// clang-format off
845/**
846\class ROOT::Experimental::RClusterDescriptorBuilder
847\ingroup NTuple
848\brief A helper class for piece-wise construction of an RClusterDescriptor
849
850The cluster descriptor builder starts from a summary-only cluster descriptor and allows for the
851piecewise addition of page locations.
852*/
853// clang-format on
855private:
857
858public:
859 /// Make an empty cluster descriptor builder.
860 RClusterDescriptorBuilder(DescriptorId_t clusterId, std::uint64_t firstEntryIndex, std::uint64_t nEntries)
861 : fCluster(clusterId, firstEntryIndex, nEntries)
862 {
863 }
864
865 RResult<void> CommitColumnRange(DescriptorId_t columnId, std::uint64_t firstElementIndex,
866 std::uint32_t compressionSettings, const RClusterDescriptor::RPageRange &pageRange);
867
868 /// Move out the full cluster descriptor including page locations
870};
871
872// clang-format off
873/**
874\class ROOT::Experimental::RClusterGroupDescriptorBuilder
875\ingroup NTuple
876\brief A helper class for piece-wise construction of an RClusterGroupDescriptor
877*/
878// clang-format on
880private:
882
883public:
885
887 {
888 fClusterGroup.fClusterGroupId = clusterGroupId;
889 return *this;
890 }
892 {
893 fClusterGroup.fPageListLocator = pageListLocator;
894 return *this;
895 }
896 RClusterGroupDescriptorBuilder &PageListLength(std::uint32_t pageListLength)
897 {
898 fClusterGroup.fPageListLength = pageListLength;
899 return *this;
900 }
901 void AddCluster(DescriptorId_t clusterId) { fClusterGroup.fClusterIds.emplace_back(clusterId); }
902
904
905 /// Used to prepare the cluster descriptor builders when loading the page locations for a certain cluster group
906 static std::vector<RClusterDescriptorBuilder>
907 GetClusterSummaries(const RNTupleDescriptor &ntplDesc, DescriptorId_t clusterGroupId);
908
910};
911
912// clang-format off
913/**
914\class ROOT::Experimental::RColumnGroupDescriptorBuilder
915\ingroup NTuple
916\brief A helper class for piece-wise construction of an RColumnGroupDescriptor
917*/
918// clang-format on
920private:
922
923public:
925
927 {
928 fColumnGroup.fColumnGroupId = columnGroupId;
929 return *this;
930 }
931 void AddColumn(DescriptorId_t columnId) { fColumnGroup.fColumnIds.insert(columnId); }
932
934};
935
936// clang-format off
937/**
938\class ROOT::Experimental::RNTupleDescriptorBuilder
939\ingroup NTuple
940\brief A helper class for piece-wise construction of an RNTupleDescriptor
941
942Used by RPageStorage implementations in order to construct the RNTupleDescriptor from the various header parts.
943*/
944// clang-format on
946private:
948 std::uint32_t fHeaderCRC32 = 0;
949
951public:
952 /// Checks whether invariants hold:
953 /// * NTuple name is valid
954 /// * Fields have valid parent and child ids
956 const RNTupleDescriptor& GetDescriptor() const { return fDescriptor; }
958
959 void SetNTuple(const std::string_view name, const std::string_view description);
960 void SetHeaderCRC32(std::uint32_t crc32) { fHeaderCRC32 = crc32; }
961 std::uint32_t GetHeaderCRC32() const { return fHeaderCRC32; }
962
964 /// The real footer size also include the page list envelopes
966
967 void AddField(const RFieldDescriptor& fieldDesc);
969
970 void AddColumn(DescriptorId_t columnId, DescriptorId_t fieldId, const RColumnModel &model, std::uint32_t index);
972
973 RResult<void> AddClusterSummary(DescriptorId_t clusterId, std::uint64_t firstEntry, std::uint64_t nEntries);
975
976 /// Used during writing. For reading, cluster summaries are added in the builder and cluster details are added
977 /// on demand through the RNTupleDescriptor.
979
980 /// Clears so-far stored clusters, fields, and columns and return to a pristine ntuple descriptor
981 void Reset();
982};
983
984} // namespace Experimental
985} // namespace ROOT
986
987#endif // ROOT7_RNTupleDescriptor
size_t size(const MatrixT &matrix)
retrieve the size of a square matrix
Option_t Option_t TPoint TPoint const char GetTextMagnitude GetFillStyle GetLineColor GetLineWidth GetMarkerStyle GetTextAlign GetTextColor GetTextSize void char Point_t Rectangle_t WindowAttributes_t index
Option_t Option_t TPoint TPoint const char GetTextMagnitude GetFillStyle GetLineColor GetLineWidth GetMarkerStyle GetTextAlign GetTextColor GetTextSize id
char name[80]
Definition TGX11.cxx:110
A helper class for piece-wise construction of an RClusterDescriptor.
RResult< void > CommitColumnRange(DescriptorId_t columnId, std::uint64_t firstElementIndex, std::uint32_t compressionSettings, const RClusterDescriptor::RPageRange &pageRange)
RClusterDescriptorBuilder(DescriptorId_t clusterId, std::uint64_t firstEntryIndex, std::uint64_t nEntries)
Make an empty cluster descriptor builder.
RResult< RClusterDescriptor > MoveDescriptor()
Move out the full cluster descriptor including page locations.
Meta-data for a set of ntuple clusters.
std::unordered_map< DescriptorId_t, RPageRange > fPageRanges
bool ContainsColumn(DescriptorId_t columnId) const
RClusterDescriptor(DescriptorId_t clusterId, std::uint64_t firstEntryIndex, std::uint64_t nEntries)
RClusterDescriptor(RClusterDescriptor &&other)=default
RClusterDescriptor(const RClusterDescriptor &other)=delete
const RPageRange & GetPageRange(DescriptorId_t columnId) const
NTupleSize_t fFirstEntryIndex
Clusters can be swapped by adjusting the entry offsets.
const RColumnRange & GetColumnRange(DescriptorId_t columnId) const
RClusterDescriptor & operator=(const RClusterDescriptor &other)=delete
std::unordered_set< DescriptorId_t > GetColumnIds() const
std::unordered_map< DescriptorId_t, RColumnRange > fColumnRanges
bool operator==(const RClusterDescriptor &other) const
A helper class for piece-wise construction of an RClusterGroupDescriptor.
RClusterGroupDescriptorBuilder & ClusterGroupId(DescriptorId_t clusterGroupId)
RResult< RClusterGroupDescriptor > MoveDescriptor()
RClusterGroupDescriptorBuilder & PageListLength(std::uint32_t pageListLength)
static std::vector< RClusterDescriptorBuilder > GetClusterSummaries(const RNTupleDescriptor &ntplDesc, DescriptorId_t clusterGroupId)
Used to prepare the cluster descriptor builders when loading the page locations for a certain cluster...
RClusterGroupDescriptorBuilder & PageListLocator(const RNTupleLocator &pageListLocator)
Clusters are stored in cluster groups.
RClusterGroupDescriptor(const RClusterGroupDescriptor &other)=delete
RClusterGroupDescriptor & operator=(RClusterGroupDescriptor &&other)=default
const std::vector< DescriptorId_t > & GetClusterIds() const
RClusterGroupDescriptor & operator=(const RClusterGroupDescriptor &other)=delete
std::uint32_t fPageListLength
Uncompressed size of the page list.
RNTupleLocator fPageListLocator
The page list that corresponds to the cluster group.
bool Contains(DescriptorId_t clusterId) const
bool operator==(const RClusterGroupDescriptor &other) const
RClusterGroupDescriptor(RClusterGroupDescriptor &&other)=default
A helper class for piece-wise construction of an RColumnDescriptor.
RColumnDescriptorBuilder & Model(const RColumnModel &model)
RColumnDescriptorBuilder()=default
Make an empty column descriptor builder.
RResult< RColumnDescriptor > MakeDescriptor() const
Attempt to make a column descriptor.
RColumnDescriptorBuilder & FieldId(DescriptorId_t fieldId)
RColumnDescriptorBuilder & Index(std::uint32_t index)
RColumnDescriptorBuilder & ColumnId(DescriptorId_t columnId)
Meta-data stored for every column of an ntuple.
RColumnDescriptor(const RColumnDescriptor &other)=delete
RColumnDescriptor Clone() const
Get a copy of the descriptor.
RColumnDescriptor(RColumnDescriptor &&other)=default
DescriptorId_t fFieldId
Every column belongs to one and only one field.
RColumnDescriptor & operator=(const RColumnDescriptor &other)=delete
RColumnModel fModel
Contains the column type and whether it is sorted.
std::uint32_t fIndex
A field can be serialized into several columns, which are numbered from zero to $n$.
bool operator==(const RColumnDescriptor &other) const
A helper class for piece-wise construction of an RColumnGroupDescriptor.
RColumnGroupDescriptorBuilder & ColumnGroupId(DescriptorId_t columnGroupId)
RResult< RColumnGroupDescriptor > MoveDescriptor()
Meta-data for a sets of columns; non-trivial column groups are used for sharded clusters.
RColumnGroupDescriptor(const RColumnGroupDescriptor &other)=delete
RColumnGroupDescriptor & operator=(const RColumnGroupDescriptor &other)=delete
const std::unordered_set< DescriptorId_t > & GetColumnIds() const
RColumnGroupDescriptor & operator=(RColumnGroupDescriptor &&other)=default
bool operator==(const RColumnGroupDescriptor &other) const
std::unordered_set< DescriptorId_t > fColumnIds
bool Contains(DescriptorId_t columnId) const
RColumnGroupDescriptor(RColumnGroupDescriptor &&other)=default
Holds the static meta-data of a column in a tree.
A field translates read and write calls from/to underlying columns to/from tree values.
A helper class for piece-wise construction of an RFieldDescriptor.
RFieldDescriptorBuilder & FieldName(const std::string &fieldName)
RFieldDescriptorBuilder & NRepetitions(std::uint64_t nRepetitions)
static RFieldDescriptorBuilder FromField(const Detail::RFieldBase &field)
Make a new RFieldDescriptorBuilder based off a live NTuple field.
RFieldDescriptorBuilder & Structure(const ENTupleStructure &structure)
RResult< RFieldDescriptor > MakeDescriptor() const
Attempt to make a field descriptor.
RFieldDescriptorBuilder & TypeName(const std::string &typeName)
RFieldDescriptorBuilder & TypeVersion(std::uint32_t typeVersion)
RFieldDescriptorBuilder & ParentId(DescriptorId_t id)
RFieldDescriptorBuilder & FieldDescription(const std::string &fieldDescription)
RFieldDescriptorBuilder & FieldVersion(std::uint32_t fieldVersion)
RFieldDescriptorBuilder()=default
Make an empty dangling field descriptor.
RFieldDescriptorBuilder & FieldId(DescriptorId_t fieldId)
Meta-data stored for every field of an ntuple.
std::vector< DescriptorId_t > fLinkIds
The pointers in the other direction from parent to children.
std::uint32_t fTypeVersion
The version of the C++ type itself.
std::unique_ptr< Detail::RFieldBase > CreateField(const RNTupleDescriptor &ntplDesc) const
In general, we create a field simply from the C++ type name.
std::string fFieldDescription
Free text set by the user.
std::string fFieldName
The leaf name, not including parent fields.
std::uint32_t fFieldVersion
The version of the C++-type-to-column translation mechanics.
const std::vector< DescriptorId_t > & GetLinkIds() const
RFieldDescriptor(const RFieldDescriptor &other)=delete
DescriptorId_t fParentId
Establishes sub field relationships, such as classes and collections.
RFieldDescriptor Clone() const
Get a copy of the descriptor.
bool operator==(const RFieldDescriptor &other) const
ENTupleStructure fStructure
The structural information carried by this field in the data model tree.
RFieldDescriptor & operator=(const RFieldDescriptor &other)=delete
RFieldDescriptor(RFieldDescriptor &&other)=default
std::string fTypeName
The C++ type that was used when writing the field.
std::uint64_t fNRepetitions
The number of elements per entry for fixed-size arrays.
A helper class for piece-wise construction of an RNTupleDescriptor.
RResult< void > EnsureValidDescriptor() const
Checks whether invariants hold:
void AddColumn(DescriptorId_t columnId, DescriptorId_t fieldId, const RColumnModel &model, std::uint32_t index)
void AddToOnDiskFooterSize(std::uint64_t size)
The real footer size also include the page list envelopes.
RResult< void > AddClusterSummary(DescriptorId_t clusterId, std::uint64_t firstEntry, std::uint64_t nEntries)
RResult< void > EnsureFieldExists(DescriptorId_t fieldId) const
RResult< void > AddFieldLink(DescriptorId_t fieldId, DescriptorId_t linkId)
void SetNTuple(const std::string_view name, const std::string_view description)
const RNTupleDescriptor & GetDescriptor() const
void Reset()
Clears so-far stored clusters, fields, and columns and return to a pristine ntuple descriptor.
void AddClusterGroup(RClusterGroupDescriptorBuilder &&clusterGroup)
RResult< void > AddClusterWithDetails(RClusterDescriptor &&clusterDesc)
Used during writing.
void AddField(const RFieldDescriptor &fieldDesc)
Used to loop over all the clusters of an ntuple (in unspecified order)
const RNTupleDescriptor & fNTuple
The associated NTuple for this range.
Used to loop over all the cluster groups of an ntuple (in unspecified order)
const RNTupleDescriptor & fNTuple
The associated NTuple for this range.
RIterator(const RNTupleDescriptor &ntuple, const std::vector< DescriptorId_t > &columns, std::size_t index)
const std::vector< DescriptorId_t > & fColumns
The enclosing range's descriptor id list.
const RNTupleDescriptor & fNTuple
The enclosing range's NTuple.
const RNTupleDescriptor & fNTuple
The associated NTuple for this range.
std::vector< DescriptorId_t > fColumns
The descriptor ids of the columns ordered by index id.
RColumnDescriptorIterable(const RNTupleDescriptor &ntuple, const RFieldDescriptor &field)
const std::vector< DescriptorId_t > & fFieldChildren
The enclosing range's descriptor id list.
const RNTupleDescriptor & fNTuple
The enclosing range's NTuple.
RIterator(const RNTupleDescriptor &ntuple, const std::vector< DescriptorId_t > &fieldChildren, std::size_t index)
std::vector< DescriptorId_t > fFieldChildren
The descriptor ids of the child fields.
const RNTupleDescriptor & fNTuple
The associated NTuple for this range.
RFieldDescriptorIterable(const RNTupleDescriptor &ntuple, const RFieldDescriptor &field, const std::function< bool(DescriptorId_t, DescriptorId_t)> &comparator)
Sort the range using an arbitrary comparison function.
RFieldDescriptorIterable(const RNTupleDescriptor &ntuple, const RFieldDescriptor &field)
The on-storage meta-data of an ntuple.
std::unordered_map< DescriptorId_t, RClusterDescriptor > fClusterDescriptors
May contain only a subset of all the available clusters, e.g.
std::uint64_t fGeneration
Once constructed by an RNTupleDescriptorBuilder, the descriptor is mostly immutable except for set of...
std::uint64_t fOnDiskFooterSize
Like fOnDiskHeaderSize, contains both cluster summaries and page locations.
std::uint64_t fNEntries
Updated by the descriptor builder when the cluster summaries are added.
std::unique_ptr< RNTupleModel > GenerateModel() const
Re-create the C++ model from the stored meta-data.
RClusterGroupDescriptorIterable GetClusterGroupIterable() const
std::unordered_map< DescriptorId_t, RClusterGroupDescriptor > fClusterGroupDescriptors
DescriptorId_t FindNextClusterId(DescriptorId_t clusterId) const
DescriptorId_t FindPrevClusterId(DescriptorId_t clusterId) const
RResult< void > DropClusterDetails(DescriptorId_t clusterId)
RFieldDescriptorIterable GetFieldIterable(const RFieldDescriptor &fieldDesc, const std::function< bool(DescriptorId_t, DescriptorId_t)> &comparator) const
DescriptorId_t GetFieldZeroId() const
Returns the logical parent of all top-level NTuple data fields.
RColumnDescriptorIterable GetColumnIterable(const RFieldDescriptor &fieldDesc) const
std::unordered_map< DescriptorId_t, RColumnDescriptor > fColumnDescriptors
std::unique_ptr< RNTupleDescriptor > Clone() const
RNTupleDescriptor(RNTupleDescriptor &&other)=default
std::string fName
The ntuple name needs to be unique in a given storage location (file)
RFieldDescriptorIterable GetTopLevelFields() const
const RClusterDescriptor & GetClusterDescriptor(DescriptorId_t clusterId) const
RNTupleDescriptor(const RNTupleDescriptor &other)=delete
std::unordered_map< DescriptorId_t, RFieldDescriptor > fFieldDescriptors
RFieldDescriptorIterable GetFieldIterable(const RFieldDescriptor &fieldDesc) const
RNTupleDescriptor & operator=(RNTupleDescriptor &&other)=default
NTupleSize_t GetNEntries() const
We know the number of entries from adding the cluster summaries.
RFieldDescriptorIterable GetTopLevelFields(const std::function< bool(DescriptorId_t, DescriptorId_t)> &comparator) const
RFieldDescriptorIterable GetFieldIterable(DescriptorId_t fieldId) const
bool operator==(const RNTupleDescriptor &other) const
std::string GetQualifiedFieldName(DescriptorId_t fieldId) const
Walks up the parents of the field ID and returns a field name of the form a.b.c.d In case of invalid ...
DescriptorId_t FindFieldId(std::string_view fieldName, DescriptorId_t parentId) const
const RColumnDescriptor & GetColumnDescriptor(DescriptorId_t columnId) const
RClusterDescriptorIterable GetClusterIterable() const
RResult< void > AddClusterDetails(RClusterDescriptor &&clusterDesc)
Methods to load and drop cluster details.
const RFieldDescriptor & GetFieldDescriptor(DescriptorId_t fieldId) const
DescriptorId_t FindColumnId(DescriptorId_t fieldId, std::uint32_t columnIndex) const
NTupleSize_t GetNElements(DescriptorId_t columnId) const
const RClusterGroupDescriptor & GetClusterGroupDescriptor(DescriptorId_t clusterGroupId) const
RNTupleDescriptor & operator=(const RNTupleDescriptor &other)=delete
std::string fDescription
Free text from the user.
RColumnDescriptorIterable GetColumnIterable(DescriptorId_t fieldId) const
std::uint64_t fOnDiskHeaderSize
Set by the descriptor builder when deserialized.
const RFieldDescriptor & GetFieldZero() const
void PrintInfo(std::ostream &output) const
RFieldDescriptorIterable GetFieldIterable(DescriptorId_t fieldId, const std::function< bool(DescriptorId_t, DescriptorId_t)> &comparator) const
DescriptorId_t FindClusterId(DescriptorId_t columnId, NTupleSize_t index) const
The class is used as a return type for operations that can fail; wraps a value of type T or an RError...
Definition RError.hxx:207
const Int_t n
Definition legend1.C:16
std::uint64_t NTupleSize_t
Integer type long enough to hold the maximum number of entries in a column.
ENTupleStructure
The fields in the ntuple model tree can carry different structural information about the type system.
std::uint64_t DescriptorId_t
Distriniguishes elements of the same type within a descriptor, e.g. different fields.
constexpr NTupleSize_t kInvalidNTupleIndex
constexpr ClusterSize_t kInvalidClusterIndex(std::uint32_t(-1))
constexpr DescriptorId_t kInvalidDescriptorId
This file contains a specialised ROOT message handler to test for diagnostic in unit tests.
The window of element indexes of a particular column in a particular cluster.
std::int64_t fCompressionSettings
The usual format for ROOT compression settings (see Compression.h).
ClusterSize_t fNElements
A 32bit value for the number of column elements in the cluster.
RPageInfoExtended(const RPageInfo &pi, RClusterSize::ValueType i, NTupleSize_t n)
RClusterSize::ValueType fFirstInPage
Index (in cluster) of the first element in page.
NTupleSize_t fPageNo
Page number in the corresponding RPageRange.
We do not need to store the element size / uncompressed page size because we know to which column the...
RNTupleLocator fLocator
The meaning of fLocator depends on the storage backend.
ClusterSize_t fNElements
The sum of the elements of all the pages must match the corresponding fNElements field in fColumnRang...
Records the parition of data into pages for a particular column in a particular cluster.
RPageInfoExtended Find(RClusterSize::ValueType idxInCluster) const
Find the page in the RPageRange that contains the given element. The element must exist.
RPageRange(const RPageRange &other)=delete
RPageRange & operator=(const RPageRange &other)=delete
Wrap the 32bit integer in a struct in order to avoid template specialization clash with std::uint32_t...
Generic information about the physical location of data.
static void output()